In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
import torchvision.transforms as transformers
from torchvision.datasets import ImageFolder
from tqdm.notebook import tqdm
import sklearn
from sklearn.preprocessing import normalize
from tqdm import tqdm

import glob
import time

import math
import os
from sklearn.model_selection import train_test_split

In [2]:
initial_states = []

for dir, sub_dir, files in os.walk("input_data/"):
    for file in sorted(files):
        print(file)
        temp = pd.read_csv(("input_data/"+file),index_col=None, header=0)
        initial_states.append(temp)
#(initial_states)
initial_states_df = pd.concat(initial_states,axis=0,ignore_index=True)
#initial_states_df

hold = initial_states_df
x = initial_states_df.iloc[:,2:].values
x = normalize(x,norm='l2')
hold = pd.concat([hold['File ID'],pd.DataFrame(x)],axis=1)
initial_states_normalized = hold
#initial_states_normalized

00000_to_02284-initial_states.csv
02285_to_02357-initial_states.csv
02358_to_04264-initial_states.csv
04265_to_05570-initial_states.csv
05571_to_05614-initial_states.csv
05615_to_06671-initial_states.csv
06672_to_08118-initial_states.csv


In [3]:
initial_states = []

path = "input_data/"

for dir, sub_dir, files in os.walk(path):
    for file in sorted(files):
        #print(file)
        temp = pd.read_csv((path+file),index_col=None, header=0)
        initial_states.append(temp)

initial_states_df = pd.concat(initial_states,axis=0,ignore_index=True)

hold = initial_states_df
x = initial_states_df.iloc[:,2:].values
x = normalize(x,norm='l2')
hold = pd.concat([hold['File ID'],pd.DataFrame(x)],axis=1)
initial_states_normalized = hold
#'2000-08-02 04:50:33'
timestamps = initial_states_df['Timestamp']
initial_states_df

Unnamed: 0,File ID,Timestamp,Semi-major Axis (km),Eccentricity,Inclination (deg),RAAN (deg),Argument of Perigee (deg),True Anomaly (deg),Latitude (deg),Longitude (deg),Altitude (km)
0,0,2000-08-02 04:50:33,6826.387247,0.003882,87.275306,144.135111,257.314389,102.383270,43.637815,-62.543128,466.448890
1,1,2000-08-03 19:51:01,6826.327748,0.003879,87.275694,143.529694,250.438806,109.273118,43.444458,70.139709,463.435053
2,2,2000-08-05 05:40:05,6819.634802,0.004114,87.268611,142.972111,244.549389,115.138737,19.764250,104.521278,471.625453
3,3,2000-08-06 05:02:20,6819.606603,0.004134,87.268194,142.608389,241.172000,118.545161,12.450738,112.239558,470.385914
4,4,2000-08-08 20:54:57,6819.425918,0.004178,87.264611,141.605111,228.779611,130.982981,-8.776992,-130.559634,468.911226
...,...,...,...,...,...,...,...,...,...,...,...
8114,8114,2019-12-25 00:00:00,6765.013678,0.005730,87.863978,102.587920,240.608198,187.758342,69.535173,108.291937,443.930606
8115,8115,2019-12-27 00:00:00,6801.130577,0.002172,90.690901,99.760357,152.602156,226.350702,20.225336,99.507926,435.492910
8116,8116,2019-12-28 00:00:00,6805.864837,0.001925,91.053632,96.918243,43.442569,131.160787,4.265831,-83.003782,436.652863
8117,8117,2019-12-30 00:00:00,6774.300973,0.004785,88.598951,92.814340,320.652681,164.621884,53.716936,-89.084995,441.438958


In [4]:
class FullDataset(Dataset):
    def __init__(self, initial_states_df, density_length=432, goes_length=86400, omni2_length=1440, density_dir='data/dataset/test/sat_density', goes_dir="data/dataset/test/goes",
                 omni2_dir="data/dataset/test/omni2"):
        self.data = initial_states_df.reset_index(drop=True)
        self.density_dir = density_dir
        self.goes_dir = goes_dir
        self.omni2_dir = omni2_dir
        self.density_length = density_length
        self.goes_length = goes_length
        self.omni2_length = omni2_length
        #self.timestamps = timestamps
    
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        #timestamp = row['Timestamp']
        static_input = torch.tensor(row.drop('File ID').fillna(0.0).values, dtype=torch.float32)
        
        file_id = str(int(row['File ID'])).zfill(5)
        
        density_file = glob.glob(os.path.join(self.density_dir, f"*-{file_id}-*.csv"))
        goes_file = glob.glob(os.path.join(self.goes_dir, f"*-{file_id}-*.csv"))
        omni2_file = glob.glob(os.path.join(self.omni2_dir, f"*-{file_id}-*.csv"))

        #pos = len(self.density_dir)+1
        #density_sat = density_file[0][pos:pos+6]

        density_df = ((pd.read_csv(density_file[0])))
        density_df['Orbit Mean Density (kg/m^3)'] = np.where(density_df['Orbit Mean Density (kg/m^3)']>=1,np.nan,density_df['Orbit Mean Density (kg/m^3)'])
        if density_df.shape[0] > self.density_length:
            density_df = density_df[:self.density_length]
        elif density_df.shape[0] < self.density_length:
            padding = pd.DataFrame(np.empty((self.density_length-density_df.shape[0],2)),columns=density_df.columns)
            padding[:] = np.nan
            density_df = pd.concat((density_df,padding),ignore_index=True)
        density_df_mask = (pd.notnull(density_df)).astype(int)
        density_tensor = torch.tensor(density_df['Orbit Mean Density (kg/m^3)'].fillna(0.0).values, dtype=torch.float32)
        density_df_mask_tensor = torch.tensor(density_df_mask.iloc[:,1].values, dtype=torch.float32)

        goes_df = pd.read_csv(goes_file[0])
        if goes_df.shape[0] > self.goes_length:
            goes_df = goes_df[goes_df.shape[0]-self.goes_length:goes_df.shape[0]]
        elif goes_df.shape[0] < self.goes_length:
            padding = pd.DataFrame(np.empty((self.goes_length-goes_df.shape[0],43)),columns=goes_df.columns)
            padding[:] = np.nan
            goes_df = pd.concat((padding,goes_df),ignore_index=True)
        goes_mask = (~pd.isnull(goes_df)).astype(int)
        goes_valid_mask = ((goes_df['xrsa_flag'] == 0.0) & (goes_df['xrsb_flag'] == 0.0)).astype(int)
        goes_mask = goes_mask.mul(goes_valid_mask.values,axis=0)
        goes_tensor = torch.tensor(normalize(goes_df.iloc[:, 1:].fillna(0.0).values, norm='l2'), dtype=torch.float32)
        goes_mask_tensor = torch.tensor(goes_mask.iloc[:, 1:].values, dtype=torch.float32)
        
        omni2_df = pd.read_csv(omni2_file[0])
        if omni2_df.shape[0] > self.omni2_length:
            omni2_df = omni2_df[omni2_df.shape[0]-self.omni2_length:omni2_df.shape[0]]
        elif goes_df.shape[0] < self.omni2_length:
            padding = pd.DataFrame(np.empty((self.omni2_length-omni2_df.shape[0],58)),columns=omni2_df.columns)
            padding[:] = np.nan
            omni2_df = pd.concat((padding,omni2_df),ignore_index=True)
        omni2_tensor = torch.tensor(normalize(omni2_df.iloc[:, :57].fillna(0.0).values.astype(float), norm='l2'), dtype=torch.float32)
        omni2_mask = (~pd.isnull(omni2_df)).astype(int)
        omni2_mask_tensor = torch.tensor(omni2_mask.iloc[:, :57].values, dtype=torch.float32) 

        return static_input, density_tensor, density_df_mask_tensor, goes_tensor, goes_mask_tensor, omni2_tensor, omni2_mask_tensor,5#, self.timestamps[idx], idx



In [5]:
# -----------------------------------
# Positional Encoding for Sequences
# -----------------------------------
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=10000):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        pos = torch.arange(0, max_len).unsqueeze(1)
        div_term = torch.exp(-torch.arange(0, d_model, 2) * math.log(10000.0) / d_model)
        pe[:, 0::2] = torch.sin(pos * div_term)
        pe[:, 1::2] = torch.cos(pos * div_term)
        self.pe = pe.unsqueeze(0)

    def forward(self, x):
        return x + self.pe[:, :x.size(1), :].to(x.device)

# -----------------------------------
# STORMTransformer with Mask Handling
# -----------------------------------
class STORMTransformer(nn.Module):
    def __init__(self,
                 static_dim=9,
                 omni2_dim=57,
                 goes_dim=42,
                 d_model=128,
                 output_len=432,
                 nhead=4,
                 num_layers=2,
                 dropout=0.1):
        super().__init__()

        self.static_encoder = nn.Sequential(
            nn.Linear(static_dim, d_model),
            nn.ReLU(),
            nn.LayerNorm(d_model)
        )

        self.omni2_proj = nn.Linear(omni2_dim, d_model)
        self.goes_proj = nn.Linear(goes_dim, d_model)

        self.omni2_pos = PositionalEncoding(d_model)
        self.goes_pos = PositionalEncoding(d_model)

        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dropout=dropout, batch_first=True)
        self.omni2_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.goes_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

        self.fusion = nn.Sequential(
            nn.Linear(d_model * 3, 256),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(256, output_len)
        )

    def forward(self, static_input, omni2_seq, goes_seq, omni2_mask=None, goes_mask=None):
        B = static_input.size(0)

        static_embed = self.static_encoder(static_input)

        omni2_embed = self.omni2_proj(omni2_seq)
        omni2_embed = self.omni2_pos(omni2_embed)
        omni2_key_mask = (~omni2_mask.bool()).any(dim=-1) if omni2_mask is not None else None
        omni2_out = self.omni2_encoder(omni2_embed, src_key_padding_mask=omni2_key_mask)
        omni2_summary = omni2_out.mean(dim=1)

        if goes_seq.shape[1] > 1024:
            step = goes_seq.shape[1] // 1024
            goes_seq = goes_seq[:, ::step, :]
            goes_mask = goes_mask[:, ::step, :] if goes_mask is not None else None
        global tester_mask
        global tester_seq
        tester_seq = goes_seq
        tester_mask = goes_mask
        #print(goes_seq,"\n",goes_mask)
        #print(goes_mask.sum())
        goes_embed = self.goes_proj(goes_seq)
        goes_embed = self.goes_pos(goes_embed)
        goes_key_mask = (~goes_mask.bool()).any(dim=-1) if goes_mask is not None else None
        global tester_key_mask
        global tester_embed
        tester_embed = goes_embed
        tester_key_mask = goes_key_mask
        goes_out = self.goes_encoder(goes_embed, src_key_padding_mask=goes_key_mask)
        goes_summary = goes_out.mean(dim=1)

        combined = torch.cat((static_embed, omni2_summary, goes_summary), dim=-1)
        return self.fusion(combined)

# -----------------------------------
# Masked MSE Loss
# -----------------------------------
def masked_mse_loss(preds, targets, mask, eps=1e-8):
    preds = torch.nan_to_num(preds, nan=0.0, posinf=1e3, neginf=0.0)
    targets = torch.nan_to_num(targets, nan=0.0, posinf=1e3, neginf=0.0)
    loss = (preds - targets) ** 2 * mask
    return loss.sum() / (mask.sum() + eps)

# -----------------------------------
# Full Training Loop with FullDataset
# -----------------------------------

        

In [6]:
def train_storm_transformer(initial_states_df, num_epochs=10, batch_size=8, lr=1e-4, device=None):
    # from full_dataset import FullDataset
    # from storm_transformer import STORMTransformer, masked_mse_loss

    if device is None:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    os.makedirs("checkpoints", exist_ok=True)

    # 🔀 Train/validation split
    train_df, val_df = train_test_split(initial_states_df, test_size=0.05, random_state=42)

    train_dataset = FullDataset(train_df)
    val_dataset = FullDataset(val_df)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    model = STORMTransformer().to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)

    checkpoint_path = "checkpoints/storm_last.pt"
    best_model_path = "checkpoints/storm_best.pt"
    start_epoch = 0
    best_val_loss = float("inf")

    # 🔁 Resume support
    if os.path.exists(checkpoint_path):
        print(f"🔁 Resuming from checkpoint: {checkpoint_path}")
        checkpoint = torch.load(checkpoint_path, map_location=device)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        start_epoch = checkpoint.get('epoch', 0)
        best_val_loss = checkpoint.get('val_loss', float("inf"))

    # 🚀 Training loop
    for epoch in range(start_epoch, num_epochs):
        model.train()
        total_train_loss = 0.0

        print(f"\n🚀 Epoch {epoch + 1}/{num_epochs}")
        for batch in tqdm(train_loader):
            static_input, density, density_mask, goes, goes_mask, omni2, omni2_mask, _ = batch

            static_input = static_input.to(device)
            density = density.to(device)
            density_mask = density_mask.to(device)
            goes = goes.to(device)
            goes_mask = goes_mask.to(device)
            omni2 = omni2.to(device)
            omni2_mask = omni2_mask.to(device)

            # if (omni2_mask.any(dim=-1).sum(dim=1) == 0).any() or (goes_mask.any(dim=-1).sum(dim=1) == 0).any():
            #     print("⚠️ Skipping batch with fully masked inputs")
            #     continue

            optimizer.zero_grad()
            preds = model(static_input, omni2, goes, omni2_mask, goes_mask)
            print (preds)
            loss = masked_mse_loss(preds, density, density_mask)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()

            total_train_loss += loss.item()

        avg_train_loss = total_train_loss / len(train_loader)

        # 🧪 Validation
        model.eval()
        total_val_loss = 0.0
        with torch.no_grad():
            for batch_idx, batch in enumerate(val_loader):
                static_input, density, density_mask, goes, goes_mask, omni2, omni2_mask, _ = batch

                static_input = static_input.to(device)
                density = density.to(device)
                density_mask = density_mask.to(device)
                goes = goes.to(device)
                goes_mask = goes_mask.to(device)
                omni2 = omni2.to(device)
                omni2_mask = omni2_mask.to(device)

                if (omni2_mask.any(dim=-1).sum(dim=1) == 0).any() or (goes_mask.any(dim=-1).sum(dim=1) == 0).any():
                    continue

                preds = model(static_input, omni2, goes, omni2_mask, goes_mask)
                loss = masked_mse_loss(preds, density, density_mask)
                total_val_loss += loss.item()

                # 🧠 Mask diagnostics
                goes_mask_sum = goes_mask.sum().item()
                omni2_mask_sum = omni2_mask.sum().item()
                density_mask_sum = density_mask.sum().item()

                print(f"🧪 Eval Batch {batch_idx+1}/{len(val_loader)} — "
                      f"OMNI2 Mask Sum: {omni2_mask_sum} | "
                      f"GOES Mask Sum: {goes_mask_sum} | "
                      f"Density Mask Sum: {density_mask_sum}")

                # ⚠️ Alert if any mask has < 10% coverage
                if omni2_mask_sum < 0.1 * omni2_mask.numel():
                    print("⚠️ Low OMNI2 coverage in this batch!")
                if goes_mask_sum < 0.1 * goes_mask.numel():
                    print("⚠️ Low GOES coverage in this batch!")
                if density_mask_sum < 0.1 * density_mask.numel():
                    print("⚠️ Low density mask coverage in this batch!")

        avg_val_loss = total_val_loss / len(val_loader)
        print(f"\n📊 Epoch {epoch+1}/{num_epochs} — "
              f"Train Loss: {avg_train_loss:.20f} | Val Loss: {avg_val_loss:.20f}")

        # 💾 Save full checkpoint
        torch.save({
            'epoch': epoch + 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'val_loss': avg_val_loss
        }, checkpoint_path)

        # 💎 Save best model
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            torch.save(model.state_dict(), best_model_path)
            print("✅ Best model updated.")


In [None]:
train_storm_transformer(initial_states_normalized)


🚀 Epoch 1/10


  0%|          | 1/965 [00:01<28:14,  1.76s/it]

tensor([[-0.0359,  0.2330, -0.1040,  ...,  0.2040,  0.2030,  0.0286],
        [-0.2077,  0.2940, -0.1466,  ...,  0.2644,  0.0705,  0.1318],
        [-0.0200,  0.3347, -0.2198,  ...,  0.1540,  0.1337,  0.1093],
        ...,
        [-0.1639,  0.1841, -0.1524,  ...,  0.2383,  0.2127,  0.1916],
        [-0.0739,  0.2650, -0.1223,  ...,  0.2164,  0.1508,  0.0219],
        [ 0.0059,  0.0614, -0.1875,  ...,  0.1573,  0.0833,  0.0939]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  0%|          | 2/965 [00:03<27:59,  1.74s/it]

tensor([[-0.1404,  0.1686, -0.0758,  ...,  0.1756,  0.0217,  0.0082],
        [-0.0542,  0.1866, -0.1739,  ...,  0.1843,  0.0252, -0.0126],
        [-0.1307,  0.0871, -0.2445,  ...,  0.1416,  0.0530,  0.0011],
        ...,
        [-0.0622,  0.2543, -0.1952,  ...,  0.1878,  0.1099,  0.0053],
        [ 0.0244,  0.2647, -0.1403,  ...,  0.1631,  0.0372,  0.0942],
        [-0.0537,  0.2841, -0.1623,  ...,  0.1798, -0.0171, -0.0472]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  0%|          | 3/965 [00:05<28:05,  1.75s/it]

tensor([[-0.1042,  0.2317, -0.0934,  ...,  0.1254, -0.0698, -0.0847],
        [-0.0501,  0.1804, -0.0774,  ...,  0.1799,  0.0929, -0.0671],
        [-0.0794,  0.2711, -0.0537,  ...,  0.1366, -0.0040, -0.0912],
        ...,
        [-0.1264,  0.1777, -0.1344,  ...,  0.1391, -0.0293, -0.0629],
        [-0.1506,  0.2270, -0.0983,  ...,  0.1484, -0.0442, -0.0416],
        [-0.0466,  0.3084, -0.0940,  ...,  0.1342,  0.0700, -0.1749]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  0%|          | 4/965 [00:07<28:32,  1.78s/it]

tensor([[-0.0606,  0.1650, -0.0544,  ...,  0.1028, -0.0380, -0.0525],
        [ 0.0057,  0.1436, -0.0182,  ...,  0.0210,  0.0427, -0.0225],
        [ 0.0111,  0.2575, -0.1135,  ...,  0.0505,  0.0417, -0.1025],
        ...,
        [ 0.0341,  0.1149, -0.0036,  ...,  0.0868, -0.0390, -0.0485],
        [-0.0681,  0.1554, -0.0356,  ...,  0.0904,  0.0190, -0.1513],
        [-0.0974,  0.2320, -0.0622,  ...,  0.0367, -0.0880, -0.1007]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  1%|          | 5/965 [00:08<28:22,  1.77s/it]

tensor([[-0.0991,  0.2286, -0.0779,  ...,  0.0264, -0.0640, -0.1821],
        [-0.0433,  0.1651, -0.0426,  ...,  0.0709,  0.0301, -0.1386],
        [-0.1294,  0.1962, -0.0294,  ...,  0.0824, -0.0490, -0.0753],
        ...,
        [-0.0446,  0.1707, -0.0386,  ...,  0.0744, -0.0078, -0.1156],
        [-0.0687,  0.1841,  0.0114,  ...,  0.0171, -0.0940, -0.2588],
        [-0.0683,  0.1551, -0.0591,  ...,  0.0493, -0.0324, -0.1702]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  1%|          | 6/965 [00:10<29:02,  1.82s/it]

tensor([[ 0.0010,  0.1315,  0.0292,  ...,  0.0311, -0.0708, -0.1424],
        [-0.0320,  0.1684, -0.0175,  ...,  0.0714, -0.0350, -0.1656],
        [-0.0321,  0.1532, -0.0441,  ...,  0.0368, -0.0596, -0.2087],
        ...,
        [-0.0178,  0.1440,  0.0199,  ...,  0.0040, -0.0881, -0.1981],
        [-0.0164,  0.1086,  0.0028,  ...,  0.0454, -0.0567, -0.1690],
        [-0.1455,  0.2299, -0.0477,  ...,  0.1813,  0.0630, -0.1974]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  1%|          | 7/965 [00:12<28:32,  1.79s/it]

tensor([[-0.0340,  0.1129,  0.0147,  ..., -0.0072, -0.0584, -0.1051],
        [-0.0193,  0.1416,  0.0314,  ...,  0.0256, -0.0599, -0.1724],
        [-0.0508,  0.1062,  0.0186,  ...,  0.0324, -0.0380, -0.1724],
        ...,
        [-0.0346,  0.1322,  0.0064,  ...,  0.0547, -0.0770, -0.1424],
        [ 0.0130,  0.1847, -0.0051,  ...,  0.0707, -0.0493, -0.2384],
        [-0.0431,  0.1445, -0.0396,  ...,  0.0350, -0.0821, -0.1801]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  1%|          | 8/965 [00:14<28:25,  1.78s/it]

tensor([[ 0.0040,  0.1235,  0.0186,  ...,  0.0297, -0.0828, -0.1441],
        [-0.0017,  0.0995,  0.0312,  ...,  0.0438, -0.0817, -0.1543],
        [-0.0107,  0.1875, -0.0442,  ...,  0.0400, -0.0205, -0.1457],
        ...,
        [-0.0568,  0.1140, -0.0400,  ...,  0.0078, -0.0701, -0.1124],
        [ 0.0324,  0.0682,  0.0042,  ...,  0.0060, -0.0709, -0.1215],
        [ 0.0059,  0.0842,  0.0210,  ...,  0.0059, -0.0811, -0.1272]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  1%|          | 9/965 [00:16<29:02,  1.82s/it]

tensor([[ 0.0090,  0.1167,  0.0149,  ..., -0.0275, -0.1169, -0.0898],
        [-0.0101,  0.1061,  0.0063,  ...,  0.0534, -0.0221, -0.1353],
        [ 0.0746,  0.1060,  0.0223,  ...,  0.0475, -0.0602, -0.1241],
        ...,
        [-0.0123,  0.1104,  0.0078,  ..., -0.0085, -0.0864, -0.0816],
        [-0.0183,  0.0647, -0.0025,  ..., -0.0409, -0.0813, -0.1084],
        [-0.0130,  0.1286,  0.0184,  ...,  0.0058, -0.0751, -0.0845]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  1%|          | 10/965 [00:17<28:24,  1.78s/it]

tensor([[ 0.0145,  0.0982,  0.0257,  ..., -0.0281, -0.0883, -0.0526],
        [ 0.0105,  0.1035,  0.0080,  ...,  0.0043, -0.1096, -0.0633],
        [ 0.0095,  0.0530,  0.0069,  ..., -0.0154, -0.0871, -0.0796],
        ...,
        [-0.0161,  0.0783, -0.0218,  ..., -0.0086, -0.0941, -0.0863],
        [ 0.0439,  0.0403,  0.0508,  ..., -0.0441, -0.0812, -0.0303],
        [ 0.0370,  0.1436, -0.0040,  ...,  0.0221, -0.0810, -0.1090]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  1%|          | 11/965 [00:19<27:25,  1.73s/it]

tensor([[ 0.0186,  0.0789,  0.0403,  ..., -0.0101, -0.0855, -0.0589],
        [ 0.0147,  0.0421,  0.0363,  ..., -0.0127, -0.1003, -0.0232],
        [ 0.0361,  0.0360,  0.0124,  ..., -0.0359, -0.0968, -0.0827],
        ...,
        [ 0.0315,  0.0507,  0.0325,  ..., -0.0137, -0.0961, -0.0530],
        [ 0.0007,  0.0509,  0.0094,  ..., -0.0282, -0.1120, -0.0641],
        [ 0.0184,  0.0455,  0.0254,  ..., -0.0095, -0.0978, -0.0709]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  1%|          | 12/965 [00:21<27:33,  1.74s/it]

tensor([[ 0.0220,  0.0452,  0.0297,  ..., -0.0352, -0.0804, -0.0380],
        [ 0.0265,  0.0412,  0.0255,  ..., -0.0271, -0.0667, -0.0353],
        [ 0.0862,  0.0428, -0.0225,  ...,  0.0272, -0.0365, -0.0911],
        ...,
        [ 0.0078,  0.0614,  0.0191,  ..., -0.0052, -0.0901, -0.0305],
        [ 0.0180,  0.0419,  0.0172,  ..., -0.0491, -0.0725, -0.0360],
        [-0.0092,  0.0235,  0.0310,  ..., -0.0253, -0.0793, -0.0244]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  1%|▏         | 13/965 [00:23<29:08,  1.84s/it]

tensor([[ 0.0194,  0.0342,  0.0227,  ..., -0.0054, -0.0774, -0.0172],
        [-0.0029,  0.0432,  0.0173,  ..., -0.0252, -0.0745, -0.0167],
        [-0.0049,  0.0082,  0.0378,  ..., -0.0301, -0.0717, -0.0056],
        ...,
        [ 0.0115,  0.0453,  0.0381,  ..., -0.0273, -0.0801, -0.0084],
        [-0.0283,  0.0491, -0.0165,  ..., -0.0119, -0.0697, -0.0134],
        [ 0.0146,  0.0257,  0.0210,  ..., -0.0407, -0.0720, -0.0238]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  1%|▏         | 14/965 [00:25<30:46,  1.94s/it]

tensor([[ 0.0361,  0.0224,  0.0085,  ..., -0.0058, -0.0236, -0.0270],
        [ 0.0251,  0.0232,  0.0006,  ...,  0.0129, -0.0337, -0.0230],
        [ 0.0347,  0.0254,  0.0250,  ..., -0.0333, -0.0664, -0.0121],
        ...,
        [ 0.0006,  0.0380,  0.0304,  ..., -0.0243, -0.0673, -0.0082],
        [ 0.0025,  0.0388,  0.0068,  ..., -0.0348, -0.0626, -0.0111],
        [ 0.0012,  0.0197,  0.0119,  ..., -0.0184, -0.0787, -0.0184]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  2%|▏         | 15/965 [00:27<31:53,  2.01s/it]

tensor([[ 9.4634e-03,  4.4586e-02, -2.3023e-03,  ..., -2.0201e-02,
         -3.5764e-02,  1.0482e-02],
        [-1.0087e-02,  3.1041e-02, -1.5028e-02,  ..., -8.8982e-05,
         -6.1880e-02, -9.4101e-03],
        [ 2.2837e-02,  4.6546e-02, -3.6749e-04,  ..., -2.9909e-02,
         -4.3969e-02, -1.3499e-02],
        ...,
        [ 3.2140e-03,  2.6835e-02, -6.6248e-03,  ...,  6.9988e-03,
         -7.8751e-02, -4.1201e-03],
        [ 1.8185e-03,  1.9303e-02, -4.0860e-03,  ..., -3.5538e-02,
         -7.6686e-02, -8.6785e-03],
        [-1.4285e-02,  1.7164e-02,  1.9627e-03,  ..., -1.5313e-02,
         -6.5539e-02, -1.8452e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


  2%|▏         | 16/965 [00:29<33:21,  2.11s/it]

tensor([[ 3.0965e-02,  1.7976e-02, -2.6896e-02,  ...,  8.0724e-03,
         -2.7831e-02, -4.9499e-03],
        [ 1.6068e-03,  2.5506e-02,  8.2062e-03,  ..., -2.3738e-02,
         -6.8389e-02,  5.0205e-03],
        [-7.6565e-03,  4.3186e-02, -9.8081e-03,  ..., -1.9796e-02,
         -8.2957e-02,  2.1072e-02],
        ...,
        [ 5.1033e-03,  2.9319e-02, -1.7960e-02,  ...,  7.1031e-03,
         -3.5889e-02,  8.4657e-05],
        [ 1.5598e-03,  3.0676e-02, -3.5414e-03,  ..., -1.9603e-02,
         -7.0680e-02,  1.6123e-02],
        [-7.2142e-03,  2.7493e-02, -5.2740e-03,  ..., -1.8675e-02,
         -6.4696e-02,  2.6697e-02]], device='cuda:0', grad_fn=<AddmmBackward0>)


  2%|▏         | 17/965 [00:31<32:54,  2.08s/it]

tensor([[ 0.0174,  0.0213, -0.0092,  ..., -0.0182, -0.0225, -0.0101],
        [ 0.0210,  0.0408, -0.0336,  ..., -0.0142, -0.0350,  0.0063],
        [ 0.0073,  0.0406, -0.0040,  ..., -0.0192, -0.0612,  0.0122],
        ...,
        [-0.0070,  0.0375, -0.0109,  ..., -0.0224, -0.0686,  0.0235],
        [ 0.0087,  0.0255, -0.0209,  ..., -0.0098, -0.0457,  0.0076],
        [-0.0183,  0.0329, -0.0112,  ..., -0.0319, -0.0743,  0.0246]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  2%|▏         | 18/965 [00:33<32:20,  2.05s/it]

tensor([[ 0.0010,  0.0383, -0.0148,  ..., -0.0229, -0.0538,  0.0118],
        [-0.0283,  0.0490, -0.0110,  ..., -0.0289, -0.0629,  0.0326],
        [-0.0267,  0.0411, -0.0191,  ..., -0.0161, -0.0679,  0.0247],
        ...,
        [-0.0190,  0.0422, -0.0089,  ..., -0.0284, -0.0817,  0.0266],
        [-0.0211,  0.0338, -0.0127,  ..., -0.0232, -0.0842,  0.0280],
        [-0.0284,  0.0423, -0.0152,  ..., -0.0198, -0.0603,  0.0212]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  2%|▏         | 19/965 [00:36<33:57,  2.15s/it]

tensor([[-0.0262,  0.0441, -0.0232,  ..., -0.0303, -0.0674,  0.0300],
        [-0.0258,  0.0515, -0.0231,  ..., -0.0326, -0.0736,  0.0344],
        [-0.0061,  0.0318, -0.0232,  ..., -0.0177, -0.0422,  0.0115],
        ...,
        [-0.0229,  0.0424, -0.0188,  ..., -0.0343, -0.0716,  0.0291],
        [-0.0304,  0.0515, -0.0216,  ..., -0.0354, -0.0637,  0.0370],
        [-0.0089,  0.0157, -0.0369,  ..., -0.0183, -0.0361,  0.0252]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  2%|▏         | 20/965 [00:38<34:11,  2.17s/it]

tensor([[-0.0405,  0.0430, -0.0298,  ..., -0.0405, -0.0327,  0.0158],
        [-0.0394,  0.0323, -0.0318,  ..., -0.0136, -0.0375,  0.0309],
        [-0.0375,  0.0503, -0.0205,  ..., -0.0315, -0.0615,  0.0312],
        ...,
        [-0.0249,  0.0503, -0.0212,  ..., -0.0274, -0.0697,  0.0280],
        [-0.0355,  0.0522, -0.0127,  ..., -0.0302, -0.0694,  0.0308],
        [-0.0214,  0.0433, -0.0263,  ..., -0.0227, -0.0392,  0.0149]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  2%|▏         | 21/965 [00:40<31:27,  2.00s/it]

tensor([[-0.0407,  0.0462, -0.0061,  ..., -0.0351, -0.0675,  0.0249],
        [-0.0337,  0.0519, -0.0134,  ..., -0.0314, -0.0703,  0.0269],
        [-0.0244,  0.0198, -0.0485,  ..., -0.0136, -0.0457,  0.0148],
        ...,
        [-0.0358,  0.0432, -0.0067,  ..., -0.0326, -0.0669,  0.0237],
        [-0.0321,  0.0464, -0.0136,  ..., -0.0315, -0.0698,  0.0257],
        [-0.0377,  0.0465, -0.0120,  ..., -0.0362, -0.0685,  0.0251]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  2%|▏         | 22/965 [00:41<29:57,  1.91s/it]

tensor([[-0.0401,  0.0475, -0.0040,  ..., -0.0323, -0.0663,  0.0235],
        [-0.0342,  0.0284,  0.0068,  ..., -0.0271, -0.0478,  0.0098],
        [-0.0371,  0.0478, -0.0066,  ..., -0.0354, -0.0646,  0.0244],
        ...,
        [-0.0366,  0.0469, -0.0053,  ..., -0.0330, -0.0659,  0.0234],
        [-0.0182,  0.0270, -0.0349,  ..., -0.0284, -0.0311,  0.0124],
        [-0.0416,  0.0511, -0.0087,  ..., -0.0342, -0.0633,  0.0236]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  2%|▏         | 23/965 [00:44<31:17,  1.99s/it]

tensor([[-0.0267,  0.0484, -0.0323,  ..., -0.0258, -0.0595,  0.0222],
        [-0.0287,  0.0334, -0.0221,  ..., -0.0119, -0.0318,  0.0040],
        [-0.0235,  0.0534, -0.0318,  ..., -0.0402, -0.0670,  0.0207],
        ...,
        [-0.0387,  0.0467, -0.0034,  ..., -0.0292, -0.0679,  0.0233],
        [-0.0315,  0.0462,  0.0064,  ..., -0.0350, -0.0702,  0.0159],
        [-0.0403,  0.0426,  0.0043,  ..., -0.0262, -0.0698,  0.0157]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  2%|▏         | 24/965 [00:45<29:14,  1.86s/it]

tensor([[-0.0358,  0.0401,  0.0040,  ..., -0.0352, -0.0606,  0.0087],
        [-0.0287,  0.0411,  0.0151,  ..., -0.0397, -0.0532,  0.0135],
        [-0.0321,  0.0319,  0.0079,  ..., -0.0292, -0.0717,  0.0152],
        ...,
        [-0.0305,  0.0395,  0.0091,  ..., -0.0309, -0.0690,  0.0115],
        [-0.0386,  0.0443,  0.0003,  ..., -0.0301, -0.0632,  0.0198],
        [-0.0340,  0.0348,  0.0078,  ..., -0.0385, -0.0709,  0.0163]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  3%|▎         | 25/965 [00:47<28:51,  1.84s/it]

tensor([[-0.0336,  0.0404,  0.0023,  ..., -0.0220, -0.0689,  0.0196],
        [-0.0283,  0.0404,  0.0094,  ..., -0.0247, -0.0707,  0.0119],
        [-0.0385,  0.0345, -0.0115,  ..., -0.0004, -0.0634,  0.0215],
        ...,
        [-0.0393,  0.0474,  0.0035,  ..., -0.0264, -0.0678,  0.0170],
        [-0.0349,  0.0362,  0.0042,  ..., -0.0341, -0.0611,  0.0110],
        [-0.0126,  0.0400, -0.0341,  ..., -0.0242, -0.0389,  0.0047]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  3%|▎         | 26/965 [00:49<29:39,  1.90s/it]

tensor([[-3.4723e-02,  3.2700e-02, -1.6524e-02,  ...,  9.3708e-05,
         -6.4424e-02,  1.8821e-02],
        [-2.9056e-02,  2.9741e-02,  1.2014e-02,  ..., -3.2099e-02,
         -6.3412e-02,  2.4464e-03],
        [-3.1034e-02,  3.9270e-02,  9.6411e-03,  ..., -1.8426e-02,
         -6.7661e-02,  1.0296e-03],
        ...,
        [-3.8901e-02,  4.0349e-02, -1.9963e-02,  ..., -9.1468e-03,
         -6.9581e-02,  2.0191e-02],
        [-2.1819e-02,  4.3813e-02, -1.2455e-02,  ..., -2.9066e-02,
         -6.2774e-02,  2.4970e-02],
        [-3.3548e-02,  3.1180e-02,  4.8288e-03,  ..., -2.6284e-02,
         -7.3404e-02,  1.1020e-02]], device='cuda:0', grad_fn=<AddmmBackward0>)


  3%|▎         | 27/965 [00:51<28:45,  1.84s/it]

tensor([[-0.0327,  0.0517, -0.0068,  ..., -0.0337, -0.0668,  0.0135],
        [-0.0256,  0.0302,  0.0167,  ..., -0.0197, -0.0613, -0.0033],
        [-0.0326,  0.0398,  0.0127,  ..., -0.0337, -0.0673,  0.0128],
        ...,
        [-0.0356,  0.0457,  0.0114,  ..., -0.0254, -0.0759,  0.0127],
        [-0.0292,  0.0343,  0.0150,  ..., -0.0316, -0.0669,  0.0056],
        [-0.0245,  0.0450,  0.0044,  ..., -0.0358, -0.0698,  0.0127]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  3%|▎         | 28/965 [00:52<28:38,  1.83s/it]

tensor([[-0.0240,  0.0270,  0.0145,  ..., -0.0274, -0.0574, -0.0070],
        [-0.0154,  0.0365,  0.0265,  ..., -0.0352, -0.0594,  0.0036],
        [-0.0273,  0.0357,  0.0054,  ..., -0.0270, -0.0638,  0.0048],
        ...,
        [-0.0242,  0.0254,  0.0144,  ..., -0.0262, -0.0643, -0.0061],
        [-0.0231,  0.0350,  0.0163,  ..., -0.0254, -0.0671,  0.0013],
        [-0.0174,  0.0368, -0.0183,  ..., -0.0124, -0.0590, -0.0036]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  3%|▎         | 29/965 [00:54<29:05,  1.86s/it]

tensor([[-0.0163,  0.0334,  0.0106,  ..., -0.0240, -0.0608,  0.0024],
        [-0.0195,  0.0344,  0.0059,  ..., -0.0280, -0.0696,  0.0030],
        [-0.0261,  0.0372,  0.0141,  ..., -0.0286, -0.0696,  0.0031],
        ...,
        [-0.0330,  0.0397, -0.0251,  ..., -0.0103, -0.0666,  0.0136],
        [-0.0215,  0.0361,  0.0085,  ..., -0.0232, -0.0673,  0.0023],
        [-0.0208,  0.0401, -0.0141,  ..., -0.0189, -0.0711,  0.0092]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  3%|▎         | 30/965 [00:56<29:37,  1.90s/it]

tensor([[-0.0296,  0.0391,  0.0080,  ..., -0.0311, -0.0679,  0.0131],
        [-0.0168,  0.0323,  0.0085,  ..., -0.0238, -0.0634,  0.0017],
        [-0.0198,  0.0370, -0.0218,  ..., -0.0145, -0.0720,  0.0038],
        ...,
        [-0.0158,  0.0243,  0.0102,  ..., -0.0228, -0.0592, -0.0015],
        [-0.0014,  0.0402, -0.0196,  ..., -0.0095, -0.0724,  0.0059],
        [-0.0356,  0.0257, -0.0009,  ..., -0.0078, -0.0584,  0.0013]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  3%|▎         | 31/965 [00:58<29:20,  1.88s/it]

tensor([[-0.0180,  0.0264,  0.0191,  ..., -0.0125, -0.0701,  0.0041],
        [-0.0229,  0.0270,  0.0126,  ..., -0.0176, -0.0651,  0.0082],
        [ 0.0199,  0.0585, -0.0148,  ..., -0.0244, -0.0517,  0.0031],
        ...,
        [-0.0125,  0.0266,  0.0120,  ..., -0.0172, -0.0588, -0.0050],
        [-0.0213,  0.0247,  0.0049,  ..., -0.0126, -0.0603, -0.0042],
        [-0.0163,  0.0401,  0.0018,  ..., -0.0176, -0.0628,  0.0091]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  3%|▎         | 32/965 [01:00<31:08,  2.00s/it]

tensor([[-1.3697e-02,  4.1779e-02, -6.2100e-03,  ..., -2.9372e-02,
         -5.8817e-02,  5.0763e-03],
        [ 3.3880e-04,  4.1346e-02, -1.5136e-02,  ..., -2.7826e-02,
         -5.7988e-02,  9.1158e-03],
        [-2.6067e-02,  4.2739e-02, -1.3111e-03,  ..., -1.1465e-02,
         -5.9128e-02,  7.3511e-03],
        ...,
        [-2.2224e-02,  3.8790e-02, -1.3736e-02,  ..., -1.3764e-02,
         -6.9259e-02,  7.9091e-03],
        [-1.3342e-02,  3.0642e-02,  5.7031e-03,  ..., -2.6848e-02,
         -6.0528e-02, -7.6380e-05],
        [-7.5566e-03,  3.9640e-02, -6.6422e-03,  ..., -3.6650e-02,
         -5.8678e-02,  1.8865e-02]], device='cuda:0', grad_fn=<AddmmBackward0>)


  3%|▎         | 33/965 [01:03<32:01,  2.06s/it]

tensor([[-0.0206,  0.0407, -0.0141,  ..., -0.0134, -0.0648,  0.0113],
        [-0.0121,  0.0287,  0.0118,  ..., -0.0167, -0.0717, -0.0002],
        [-0.0070,  0.0442, -0.0096,  ..., -0.0169, -0.0662,  0.0085],
        ...,
        [-0.0156,  0.0294,  0.0084,  ..., -0.0211, -0.0684,  0.0147],
        [-0.0146,  0.0297,  0.0057,  ..., -0.0174, -0.0682,  0.0082],
        [-0.0191,  0.0337,  0.0046,  ..., -0.0160, -0.0761,  0.0030]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  4%|▎         | 34/965 [01:04<30:07,  1.94s/it]

tensor([[-0.0086,  0.0294,  0.0171,  ..., -0.0209, -0.0702,  0.0051],
        [-0.0169,  0.0365,  0.0083,  ..., -0.0076, -0.0694,  0.0118],
        [-0.0116,  0.0296,  0.0123,  ..., -0.0241, -0.0751,  0.0012],
        ...,
        [-0.0160,  0.0292,  0.0025,  ..., -0.0054, -0.0633,  0.0024],
        [-0.0079,  0.0268,  0.0204,  ..., -0.0201, -0.0682, -0.0036],
        [-0.0075,  0.0384,  0.0110,  ..., -0.0270, -0.0642,  0.0071]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  4%|▎         | 35/965 [01:06<29:32,  1.91s/it]

tensor([[-0.0063,  0.0375,  0.0124,  ..., -0.0267, -0.0700,  0.0108],
        [-0.0129,  0.0360,  0.0136,  ..., -0.0231, -0.0802,  0.0112],
        [-0.0120,  0.0313,  0.0177,  ..., -0.0176, -0.0748,  0.0084],
        ...,
        [-0.0020,  0.0410,  0.0067,  ..., -0.0089, -0.0698,  0.0194],
        [-0.0200,  0.0315,  0.0001,  ..., -0.0105, -0.0809,  0.0042],
        [-0.0108,  0.0353,  0.0074,  ..., -0.0272, -0.0740,  0.0094]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  4%|▎         | 36/965 [01:08<29:19,  1.89s/it]

tensor([[-0.0203,  0.0471,  0.0119,  ..., -0.0279, -0.0711,  0.0071],
        [-0.0066,  0.0318,  0.0089,  ..., -0.0340, -0.0701,  0.0059],
        [-0.0065,  0.0416,  0.0102,  ..., -0.0362, -0.0659,  0.0192],
        ...,
        [-0.0141,  0.0372,  0.0087,  ..., -0.0309, -0.0818,  0.0103],
        [-0.0124,  0.0365, -0.0029,  ..., -0.0161, -0.0810,  0.0135],
        [-0.0102,  0.0334,  0.0074,  ..., -0.0238, -0.0676,  0.0032]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  4%|▍         | 37/965 [01:10<28:51,  1.87s/it]

tensor([[-0.0299,  0.0322,  0.0100,  ..., -0.0223, -0.0737,  0.0048],
        [-0.0089,  0.0323,  0.0108,  ..., -0.0038, -0.0840,  0.0147],
        [-0.0078,  0.0396, -0.0022,  ..., -0.0350, -0.0714,  0.0200],
        ...,
        [-0.0142,  0.0351,  0.0029,  ..., -0.0243, -0.0694,  0.0126],
        [-0.0065,  0.0275,  0.0009,  ..., -0.0073, -0.0767,  0.0046],
        [-0.0164,  0.0316, -0.0159,  ..., -0.0164, -0.0569,  0.0166]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  4%|▍         | 38/965 [01:12<28:34,  1.85s/it]

tensor([[-0.0182,  0.0385,  0.0085,  ..., -0.0176, -0.0676,  0.0146],
        [ 0.0054,  0.0511, -0.0214,  ..., -0.0219, -0.0671,  0.0159],
        [-0.0038,  0.0296,  0.0036,  ..., -0.0192, -0.0767,  0.0031],
        ...,
        [-0.0218,  0.0364, -0.0017,  ..., -0.0159, -0.0795,  0.0218],
        [-0.0143,  0.0255, -0.0057,  ..., -0.0186, -0.0753,  0.0071],
        [-0.0079,  0.0308,  0.0091,  ..., -0.0092, -0.0723,  0.0059]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  4%|▍         | 39/965 [01:13<27:21,  1.77s/it]

tensor([[-0.0092,  0.0324,  0.0022,  ..., -0.0172, -0.0825,  0.0112],
        [ 0.0020,  0.0282,  0.0072,  ..., -0.0079, -0.0793,  0.0061],
        [-0.0093,  0.0341,  0.0038,  ..., -0.0145, -0.0810,  0.0095],
        ...,
        [-0.0150,  0.0271,  0.0012,  ..., -0.0257, -0.0756,  0.0073],
        [-0.0104,  0.0292,  0.0012,  ..., -0.0065, -0.0689,  0.0024],
        [-0.0195,  0.0121,  0.0009,  ..., -0.0052, -0.0773, -0.0041]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  4%|▍         | 40/965 [01:15<29:19,  1.90s/it]

tensor([[-0.0016,  0.0392, -0.0006,  ..., -0.0262, -0.0680,  0.0110],
        [-0.0190,  0.0563, -0.0125,  ..., -0.0270, -0.0816,  0.0320],
        [-0.0027,  0.0478, -0.0112,  ..., -0.0110, -0.0697,  0.0200],
        ...,
        [-0.0042,  0.0256, -0.0040,  ..., -0.0108, -0.0802,  0.0054],
        [-0.0151,  0.0336,  0.0032,  ..., -0.0200, -0.0805,  0.0080],
        [ 0.0013,  0.0498, -0.0201,  ..., -0.0149, -0.0686,  0.0125]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  4%|▍         | 41/965 [01:17<28:12,  1.83s/it]

tensor([[-0.0052,  0.0364,  0.0020,  ..., -0.0185, -0.0755,  0.0086],
        [-0.0014,  0.0247,  0.0002,  ..., -0.0117, -0.0760,  0.0070],
        [ 0.0095,  0.0305, -0.0226,  ..., -0.0075, -0.0570,  0.0080],
        ...,
        [-0.0030,  0.0333,  0.0053,  ..., -0.0188, -0.0728,  0.0012],
        [-0.0092,  0.0314,  0.0063,  ..., -0.0083, -0.0754, -0.0033],
        [-0.0219,  0.0332, -0.0041,  ..., -0.0041, -0.0715,  0.0064]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  4%|▍         | 42/965 [01:19<27:58,  1.82s/it]

tensor([[-0.0064,  0.0415,  0.0006,  ..., -0.0214, -0.0721,  0.0102],
        [-0.0116,  0.0330, -0.0073,  ..., -0.0337, -0.0559, -0.0008],
        [-0.0085,  0.0349, -0.0115,  ..., -0.0116, -0.0681,  0.0037],
        ...,
        [-0.0046,  0.0359,  0.0006,  ..., -0.0160, -0.0724,  0.0093],
        [ 0.0007,  0.0305,  0.0073,  ..., -0.0026, -0.0742, -0.0010],
        [-0.0161,  0.0211,  0.0019,  ..., -0.0176, -0.0791,  0.0096]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  4%|▍         | 43/965 [01:21<28:15,  1.84s/it]

tensor([[-0.0264,  0.0515, -0.0269,  ..., -0.0091, -0.0888,  0.0199],
        [ 0.0015,  0.0275,  0.0019,  ..., -0.0126, -0.0798,  0.0050],
        [-0.0145,  0.0290,  0.0021,  ..., -0.0072, -0.0754, -0.0074],
        ...,
        [-0.0181,  0.0335,  0.0032,  ..., -0.0095, -0.0659, -0.0068],
        [-0.0031,  0.0417, -0.0080,  ..., -0.0170, -0.0678,  0.0059],
        [-0.0177,  0.0399, -0.0106,  ..., -0.0093, -0.0793,  0.0171]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  5%|▍         | 44/965 [01:22<26:54,  1.75s/it]

tensor([[-1.0486e-02,  3.2611e-02, -1.0262e-03,  ..., -7.9089e-03,
         -7.0222e-02,  6.0642e-03],
        [-3.5869e-03,  2.9849e-02,  3.4504e-03,  ...,  9.8782e-04,
         -5.7302e-02, -1.7205e-03],
        [-1.0429e-02,  3.7465e-02,  7.4931e-03,  ..., -1.3683e-02,
         -7.1570e-02,  2.8152e-03],
        ...,
        [ 1.1849e-03,  3.3901e-02,  1.6819e-03,  ..., -1.0837e-02,
         -7.2778e-02, -7.1852e-04],
        [-7.8160e-05,  3.1404e-02,  4.2114e-03,  ..., -1.7175e-02,
         -7.0455e-02, -1.9380e-03],
        [-1.0988e-02,  1.7353e-02, -2.9113e-04,  ...,  4.7188e-03,
         -5.3955e-02, -1.5168e-02]], device='cuda:0', grad_fn=<AddmmBackward0>)


  5%|▍         | 45/965 [01:24<27:17,  1.78s/it]

tensor([[-0.0040,  0.0361,  0.0081,  ..., -0.0117, -0.0756,  0.0026],
        [-0.0066,  0.0175,  0.0286,  ..., -0.0154, -0.0587, -0.0179],
        [-0.0052,  0.0408, -0.0155,  ..., -0.0279, -0.0623,  0.0124],
        ...,
        [-0.0056,  0.0329, -0.0137,  ..., -0.0144, -0.0643, -0.0037],
        [-0.0085,  0.0245,  0.0029,  ..., -0.0140, -0.0687, -0.0072],
        [-0.0021,  0.0275,  0.0021,  ..., -0.0109, -0.0711, -0.0061]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  5%|▍         | 46/965 [01:26<28:22,  1.85s/it]

tensor([[-9.2849e-05,  2.7755e-02,  7.7451e-03,  ...,  5.9467e-03,
         -7.8856e-02, -5.7164e-04],
        [-6.5940e-03,  3.2457e-02,  9.2021e-04,  ..., -2.0621e-02,
         -5.5182e-02, -7.5521e-03],
        [-1.6034e-02,  3.1894e-02, -6.1544e-03,  ..., -3.4483e-02,
         -6.5439e-02,  8.8321e-03],
        ...,
        [-1.0604e-02,  3.0661e-02, -1.6674e-02,  ..., -8.8892e-04,
         -6.5621e-02, -9.3294e-03],
        [ 4.6274e-03,  3.6011e-02,  6.4379e-03,  ..., -1.0827e-02,
         -6.3244e-02, -1.2343e-03],
        [-9.6769e-03,  2.4786e-02,  1.3676e-02,  ...,  3.4882e-03,
         -7.3727e-02,  3.0863e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


  5%|▍         | 47/965 [01:28<29:07,  1.90s/it]

tensor([[-1.6865e-02,  2.6880e-02,  1.0486e-02,  ...,  2.2611e-03,
         -7.9188e-02,  5.8291e-04],
        [-1.2787e-02,  4.0524e-02,  7.7988e-05,  ..., -1.2918e-02,
         -7.8979e-02,  1.4834e-02],
        [-4.4229e-03,  4.1045e-02,  2.7789e-03,  ..., -1.1309e-02,
         -8.0795e-02,  8.7917e-03],
        ...,
        [-9.1174e-03,  4.5906e-02, -2.7762e-02,  ..., -1.8633e-02,
         -6.5523e-02,  1.2209e-02],
        [-6.1932e-03,  3.3379e-02,  1.2706e-02,  ..., -1.2154e-02,
         -7.0932e-02, -6.3823e-03],
        [ 3.1302e-04,  3.0679e-02,  1.2905e-02,  ..., -1.7441e-02,
         -6.9675e-02,  3.3092e-04]], device='cuda:0', grad_fn=<AddmmBackward0>)


  5%|▍         | 48/965 [01:30<29:46,  1.95s/it]

tensor([[-0.0027,  0.0166,  0.0006,  ..., -0.0080, -0.0623, -0.0062],
        [-0.0136,  0.0268, -0.0099,  ..., -0.0187, -0.0533,  0.0103],
        [ 0.0127,  0.0223,  0.0183,  ..., -0.0141, -0.0728, -0.0061],
        ...,
        [ 0.0080,  0.0239,  0.0119,  ..., -0.0096, -0.0711, -0.0042],
        [-0.0194,  0.0342,  0.0024,  ..., -0.0184, -0.0709, -0.0003],
        [ 0.0192,  0.0366, -0.0060,  ..., -0.0125, -0.0560,  0.0026]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  5%|▌         | 49/965 [01:32<29:11,  1.91s/it]

tensor([[ 0.0053,  0.0293,  0.0019,  ..., -0.0135, -0.0787,  0.0046],
        [ 0.0294,  0.0498, -0.0002,  ..., -0.0291, -0.0617,  0.0150],
        [-0.0050,  0.0280,  0.0141,  ..., -0.0049, -0.0778,  0.0002],
        ...,
        [ 0.0001,  0.0192,  0.0101,  ..., -0.0174, -0.0774, -0.0049],
        [ 0.0050,  0.0402, -0.0059,  ..., -0.0208, -0.0657,  0.0121],
        [ 0.0025,  0.0187,  0.0138,  ..., -0.0005, -0.0628, -0.0077]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  5%|▌         | 50/965 [01:34<27:39,  1.81s/it]

tensor([[ 6.3122e-03,  1.8195e-02,  1.5146e-03,  ..., -3.8939e-03,
         -7.3642e-02, -1.2076e-02],
        [ 1.2886e-02,  2.2410e-02,  1.9152e-02,  ..., -6.3577e-03,
         -7.5264e-02, -1.3137e-02],
        [ 1.9733e-02,  2.4707e-02,  1.9374e-02,  ..., -2.0999e-02,
         -6.0579e-02, -1.1869e-02],
        ...,
        [ 7.7322e-05,  2.3316e-02, -1.8288e-03,  ..., -4.9042e-03,
         -6.5486e-02, -2.7214e-03],
        [ 2.1402e-03,  2.7312e-02,  1.6147e-02,  ..., -1.6298e-02,
         -7.4798e-02,  8.8091e-03],
        [ 7.0884e-03,  2.2217e-02,  8.8822e-03,  ..., -1.2343e-02,
         -7.2296e-02, -1.1682e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


  5%|▌         | 51/965 [01:35<27:26,  1.80s/it]

tensor([[ 0.0084,  0.0197,  0.0104,  ..., -0.0072, -0.0712, -0.0109],
        [ 0.0107,  0.0176,  0.0086,  ..., -0.0095, -0.0728, -0.0074],
        [ 0.0111,  0.0269,  0.0059,  ..., -0.0165, -0.0696,  0.0018],
        ...,
        [ 0.0058,  0.0210,  0.0074,  ..., -0.0075, -0.0731, -0.0061],
        [ 0.0031,  0.0223, -0.0003,  ..., -0.0098, -0.0748, -0.0003],
        [-0.0014,  0.0313, -0.0057,  ..., -0.0015, -0.0773,  0.0021]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  5%|▌         | 52/965 [01:37<28:17,  1.86s/it]

tensor([[ 0.0061,  0.0077,  0.0095,  ..., -0.0091, -0.0706, -0.0179],
        [-0.0184,  0.0322, -0.0008,  ..., -0.0161, -0.0716,  0.0053],
        [ 0.0229,  0.0456,  0.0047,  ..., -0.0261, -0.0634,  0.0114],
        ...,
        [ 0.0036,  0.0242, -0.0065,  ..., -0.0086, -0.0702,  0.0077],
        [ 0.0030,  0.0105,  0.0082,  ..., -0.0083, -0.0677, -0.0173],
        [ 0.0046,  0.0328, -0.0105,  ..., -0.0124, -0.0736,  0.0036]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  5%|▌         | 53/965 [01:40<29:48,  1.96s/it]

tensor([[ 0.0008,  0.0355, -0.0081,  ..., -0.0185, -0.0656,  0.0015],
        [ 0.0076,  0.0306,  0.0028,  ..., -0.0063, -0.0619,  0.0004],
        [ 0.0179,  0.0236,  0.0076,  ..., -0.0073, -0.0752, -0.0054],
        ...,
        [ 0.0129,  0.0266, -0.0184,  ...,  0.0029, -0.0575, -0.0062],
        [ 0.0002,  0.0440, -0.0009,  ..., -0.0107, -0.0814,  0.0082],
        [ 0.0020,  0.0219,  0.0024,  ..., -0.0032, -0.0746,  0.0035]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  6%|▌         | 54/965 [01:42<30:22,  2.00s/it]

tensor([[ 0.0126,  0.0319, -0.0055,  ..., -0.0060, -0.0493, -0.0059],
        [ 0.0255,  0.0113,  0.0139,  ..., -0.0017, -0.0619, -0.0226],
        [ 0.0268,  0.0551, -0.0173,  ..., -0.0282, -0.0503,  0.0067],
        ...,
        [ 0.0103,  0.0429, -0.0180,  ..., -0.0019, -0.0636,  0.0056],
        [ 0.0282,  0.0145,  0.0121,  ..., -0.0045, -0.0574, -0.0222],
        [ 0.0078,  0.0324,  0.0010,  ...,  0.0060, -0.0820, -0.0012]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  6%|▌         | 55/965 [01:44<30:16,  2.00s/it]

tensor([[ 0.0033,  0.0106,  0.0150,  ..., -0.0124, -0.0661, -0.0260],
        [ 0.0140,  0.0161,  0.0032,  ..., -0.0006, -0.0659, -0.0093],
        [ 0.0079,  0.0297, -0.0170,  ..., -0.0132, -0.0583,  0.0066],
        ...,
        [ 0.0089,  0.0145,  0.0049,  ..., -0.0130, -0.0621, -0.0139],
        [ 0.0077,  0.0323, -0.0123,  ..., -0.0048, -0.0731,  0.0027],
        [ 0.0169,  0.0344, -0.0150,  ..., -0.0159, -0.0683,  0.0026]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  6%|▌         | 56/965 [01:46<30:34,  2.02s/it]

tensor([[ 0.0139,  0.0294,  0.0074,  ..., -0.0167, -0.0653, -0.0070],
        [ 0.0224,  0.0129, -0.0050,  ..., -0.0012, -0.0543, -0.0196],
        [ 0.0085,  0.0218,  0.0017,  ..., -0.0137, -0.0724,  0.0040],
        ...,
        [ 0.0038,  0.0201, -0.0060,  ..., -0.0027, -0.0629, -0.0028],
        [ 0.0193,  0.0372, -0.0174,  ..., -0.0168, -0.0611,  0.0044],
        [ 0.0162,  0.0215, -0.0045,  ...,  0.0022, -0.0671, -0.0094]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  6%|▌         | 57/965 [01:48<31:37,  2.09s/it]

tensor([[ 0.0183,  0.0261,  0.0014,  ..., -0.0069, -0.0640, -0.0075],
        [ 0.0002,  0.0223, -0.0104,  ..., -0.0046, -0.0587,  0.0016],
        [-0.0021,  0.0184, -0.0043,  ..., -0.0041, -0.0572,  0.0100],
        ...,
        [ 0.0041,  0.0356, -0.0021,  ..., -0.0011, -0.0675, -0.0018],
        [ 0.0026,  0.0230,  0.0085,  ..., -0.0027, -0.0676, -0.0122],
        [ 0.0123,  0.0211,  0.0018,  ...,  0.0020, -0.0668, -0.0132]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  6%|▌         | 58/965 [01:50<29:25,  1.95s/it]

tensor([[ 0.0111,  0.0192, -0.0029,  ...,  0.0018, -0.0658, -0.0052],
        [ 0.0078,  0.0241, -0.0010,  ..., -0.0068, -0.0696, -0.0002],
        [ 0.0133,  0.0160,  0.0068,  ...,  0.0021, -0.0624, -0.0063],
        ...,
        [ 0.0016,  0.0265, -0.0045,  ...,  0.0071, -0.0719, -0.0066],
        [ 0.0051,  0.0238,  0.0018,  ..., -0.0058, -0.0683, -0.0071],
        [-0.0021,  0.0192, -0.0032,  ..., -0.0205, -0.0800, -0.0094]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  6%|▌         | 59/965 [01:52<28:53,  1.91s/it]

tensor([[ 0.0018,  0.0382, -0.0133,  ..., -0.0058, -0.0721,  0.0077],
        [ 0.0027,  0.0277,  0.0105,  ..., -0.0004, -0.0691,  0.0035],
        [ 0.0019,  0.0281, -0.0089,  ..., -0.0066, -0.0724,  0.0059],
        ...,
        [ 0.0123,  0.0156, -0.0148,  ..., -0.0033, -0.0677,  0.0159],
        [ 0.0083,  0.0230,  0.0050,  ...,  0.0016, -0.0664, -0.0065],
        [ 0.0135,  0.0306,  0.0011,  ..., -0.0076, -0.0643,  0.0042]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  6%|▌         | 60/965 [01:53<28:18,  1.88s/it]

tensor([[ 0.0116,  0.0235,  0.0040,  ..., -0.0078, -0.0673,  0.0044],
        [-0.0104,  0.0198,  0.0127,  ...,  0.0005, -0.0763, -0.0113],
        [ 0.0167,  0.0264, -0.0039,  ..., -0.0153, -0.0648,  0.0092],
        ...,
        [ 0.0117,  0.0166,  0.0002,  ...,  0.0022, -0.0794,  0.0010],
        [ 0.0175,  0.0053,  0.0029,  ...,  0.0020, -0.0522,  0.0024],
        [-0.0081,  0.0003,  0.0080,  ...,  0.0002, -0.0690, -0.0216]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  6%|▋         | 61/965 [01:55<27:59,  1.86s/it]

tensor([[ 0.0041,  0.0144, -0.0069,  ..., -0.0189, -0.0577,  0.0093],
        [-0.0035,  0.0332,  0.0098,  ..., -0.0194, -0.0564, -0.0082],
        [ 0.0007,  0.0278,  0.0040,  ..., -0.0094, -0.0705,  0.0099],
        ...,
        [ 0.0158,  0.0116,  0.0159,  ..., -0.0039, -0.0632, -0.0070],
        [ 0.0044,  0.0233,  0.0024,  ..., -0.0211, -0.0730,  0.0043],
        [ 0.0057,  0.0164,  0.0026,  ...,  0.0168, -0.0752, -0.0134]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  6%|▋         | 62/965 [01:57<28:07,  1.87s/it]

tensor([[ 0.0044,  0.0247,  0.0019,  ..., -0.0052, -0.0764,  0.0052],
        [-0.0128,  0.0426, -0.0054,  ..., -0.0202, -0.0604,  0.0151],
        [ 0.0025,  0.0165, -0.0060,  ...,  0.0102, -0.0786, -0.0056],
        ...,
        [ 0.0104,  0.0258,  0.0056,  ..., -0.0130, -0.0681,  0.0077],
        [ 0.0029,  0.0201,  0.0078,  ..., -0.0197, -0.0642, -0.0062],
        [ 0.0059,  0.0302, -0.0103,  ..., -0.0097, -0.0540,  0.0301]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  7%|▋         | 63/965 [01:59<26:59,  1.80s/it]

tensor([[ 0.0010,  0.0132, -0.0007,  ..., -0.0073, -0.0591,  0.0012],
        [ 0.0120,  0.0127,  0.0106,  ..., -0.0046, -0.0742, -0.0022],
        [ 0.0237,  0.0168,  0.0100,  ..., -0.0035, -0.0496, -0.0082],
        ...,
        [ 0.0022,  0.0167,  0.0069,  ..., -0.0102, -0.0728,  0.0074],
        [ 0.0078,  0.0048, -0.0030,  ...,  0.0038, -0.0694,  0.0012],
        [ 0.0044,  0.0127,  0.0169,  ..., -0.0073, -0.0723,  0.0032]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  7%|▋         | 64/965 [02:01<27:46,  1.85s/it]

tensor([[ 7.0718e-03,  3.6041e-02,  4.0267e-03,  ..., -1.8513e-02,
         -6.8433e-02,  1.3433e-02],
        [ 2.3816e-02,  3.3763e-02, -7.7910e-03,  ...,  8.3282e-03,
         -6.0163e-02,  7.3000e-03],
        [ 1.8911e-02,  2.1707e-02,  5.7520e-03,  ..., -1.1074e-02,
         -6.6281e-02,  1.2482e-03],
        ...,
        [ 9.5407e-03,  7.0487e-03,  5.2311e-05,  ...,  1.5419e-03,
         -7.1467e-02, -1.8096e-02],
        [ 4.8691e-03,  1.9516e-02, -4.6028e-03,  ..., -6.2174e-03,
         -7.0836e-02,  5.4803e-04],
        [ 1.1622e-02,  1.3170e-02,  3.9095e-03,  ...,  2.1094e-03,
         -7.7442e-02, -3.1943e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


  7%|▋         | 65/965 [02:02<26:58,  1.80s/it]

tensor([[ 4.1290e-03,  2.9535e-02, -9.8885e-03,  ..., -9.9124e-03,
         -5.4583e-02,  2.4897e-02],
        [ 1.7952e-02,  5.7000e-03, -6.5898e-03,  ...,  6.7552e-03,
         -7.5861e-02, -1.0021e-02],
        [-6.1559e-04,  8.9726e-03,  9.2156e-03,  ..., -4.4486e-03,
         -5.8447e-02,  4.4288e-05],
        ...,
        [ 1.4007e-03,  2.0683e-02,  5.8283e-03,  ...,  6.9186e-03,
         -6.0460e-02, -1.4882e-03],
        [ 4.5847e-03,  2.4287e-02, -1.5866e-03,  ..., -2.6579e-03,
         -7.1186e-02,  6.1020e-03],
        [ 2.3295e-02,  2.2772e-02,  1.5505e-02,  ..., -6.3719e-03,
         -6.2062e-02, -2.5933e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


  7%|▋         | 66/965 [02:04<26:53,  1.80s/it]

tensor([[-0.0077,  0.0103, -0.0055,  ..., -0.0073, -0.0736, -0.0041],
        [-0.0095,  0.0185, -0.0022,  ..., -0.0060, -0.0560,  0.0158],
        [-0.0021,  0.0058, -0.0032,  ..., -0.0071, -0.0706, -0.0070],
        ...,
        [ 0.0134,  0.0103,  0.0152,  ..., -0.0264, -0.0647, -0.0196],
        [ 0.0386,  0.0051,  0.0164,  ...,  0.0164, -0.0584, -0.0114],
        [ 0.0104,  0.0163,  0.0067,  ...,  0.0030, -0.0710, -0.0036]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  7%|▋         | 67/965 [02:06<27:17,  1.82s/it]

tensor([[ 0.0197,  0.0123, -0.0174,  ...,  0.0019, -0.0638, -0.0043],
        [ 0.0225,  0.0153, -0.0094,  ..., -0.0093, -0.0726, -0.0084],
        [ 0.0229,  0.0122, -0.0056,  ..., -0.0095, -0.0467, -0.0079],
        ...,
        [ 0.0261,  0.0232, -0.0071,  ...,  0.0138, -0.0676,  0.0008],
        [ 0.0265,  0.0088,  0.0059,  ...,  0.0028, -0.0622, -0.0162],
        [ 0.0232,  0.0319,  0.0012,  ..., -0.0109, -0.0709,  0.0005]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  7%|▋         | 68/965 [02:08<26:36,  1.78s/it]

tensor([[ 0.0226,  0.0155, -0.0028,  ...,  0.0091, -0.0639, -0.0082],
        [ 0.0172,  0.0068, -0.0059,  ...,  0.0184, -0.0634, -0.0151],
        [ 0.0150,  0.0150,  0.0111,  ...,  0.0126, -0.0502, -0.0063],
        ...,
        [ 0.0154,  0.0148, -0.0054,  ...,  0.0127, -0.0592, -0.0012],
        [-0.0134, -0.0139,  0.0173,  ...,  0.0044, -0.0503, -0.0044],
        [ 0.0043,  0.0198, -0.0040,  ...,  0.0074, -0.0707,  0.0057]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  7%|▋         | 69/965 [02:10<27:08,  1.82s/it]

tensor([[ 0.0078,  0.0164,  0.0056,  ..., -0.0127, -0.0399, -0.0090],
        [ 0.0126,  0.0173, -0.0008,  ..., -0.0065, -0.0594, -0.0108],
        [ 0.0079,  0.0274, -0.0101,  ...,  0.0098, -0.0664,  0.0085],
        ...,
        [ 0.0146,  0.0290,  0.0071,  ..., -0.0021, -0.0605,  0.0079],
        [ 0.0233,  0.0257, -0.0219,  ..., -0.0143, -0.0369,  0.0025],
        [ 0.0080,  0.0304, -0.0096,  ..., -0.0067, -0.0576,  0.0168]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  7%|▋         | 70/965 [02:11<25:48,  1.73s/it]

tensor([[ 0.0033,  0.0153,  0.0007,  ...,  0.0072, -0.0618, -0.0038],
        [ 0.0248,  0.0143,  0.0080,  ...,  0.0129, -0.0624, -0.0107],
        [ 0.0024,  0.0111,  0.0069,  ...,  0.0097, -0.0540, -0.0066],
        ...,
        [ 0.0220,  0.0222,  0.0148,  ..., -0.0034, -0.0398,  0.0002],
        [ 0.0175,  0.0130,  0.0053,  ...,  0.0067, -0.0687, -0.0061],
        [ 0.0167,  0.0157,  0.0048,  ...,  0.0058, -0.0643, -0.0016]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  7%|▋         | 71/965 [02:13<26:33,  1.78s/it]

tensor([[-0.0014,  0.0140, -0.0006,  ...,  0.0114, -0.0555,  0.0264],
        [ 0.0189,  0.0164,  0.0105,  ...,  0.0044, -0.0682, -0.0028],
        [ 0.0066,  0.0176,  0.0088,  ..., -0.0146, -0.0618, -0.0085],
        ...,
        [ 0.0080,  0.0022,  0.0017,  ...,  0.0047, -0.0626, -0.0108],
        [ 0.0174,  0.0185,  0.0003,  ...,  0.0068, -0.0611, -0.0056],
        [-0.0071,  0.0332, -0.0089,  ...,  0.0073, -0.0501,  0.0124]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  7%|▋         | 72/965 [02:15<26:44,  1.80s/it]

tensor([[-0.0070,  0.0256, -0.0025,  ...,  0.0047, -0.0499,  0.0174],
        [ 0.0092,  0.0168, -0.0108,  ..., -0.0017, -0.0594,  0.0071],
        [ 0.0082,  0.0148,  0.0026,  ...,  0.0095, -0.0497, -0.0029],
        ...,
        [ 0.0223,  0.0117,  0.0068,  ...,  0.0123, -0.0702,  0.0015],
        [ 0.0036,  0.0227, -0.0080,  ...,  0.0067, -0.0659,  0.0136],
        [ 0.0145,  0.0202,  0.0061,  ...,  0.0010, -0.0684,  0.0013]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  8%|▊         | 73/965 [02:17<27:47,  1.87s/it]

tensor([[ 0.0054,  0.0085,  0.0105,  ...,  0.0077, -0.0655, -0.0122],
        [-0.0029, -0.0019, -0.0156,  ...,  0.0031, -0.0644, -0.0117],
        [ 0.0012,  0.0324, -0.0071,  ..., -0.0145, -0.0512,  0.0313],
        ...,
        [ 0.0209,  0.0088, -0.0128,  ...,  0.0103, -0.0302, -0.0022],
        [ 0.0103,  0.0088,  0.0042,  ..., -0.0182, -0.0671, -0.0100],
        [ 0.0123,  0.0195,  0.0054,  ..., -0.0012, -0.0360, -0.0022]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  8%|▊         | 74/965 [02:19<28:40,  1.93s/it]

tensor([[ 0.0166,  0.0224, -0.0114,  ...,  0.0053, -0.0661, -0.0065],
        [ 0.0020, -0.0023,  0.0080,  ...,  0.0014, -0.0743, -0.0191],
        [ 0.0147,  0.0148, -0.0039,  ...,  0.0021, -0.0595,  0.0115],
        ...,
        [ 0.0095,  0.0148, -0.0025,  ..., -0.0079, -0.0599,  0.0028],
        [ 0.0153, -0.0013, -0.0061,  ..., -0.0019, -0.0576, -0.0118],
        [ 0.0011,  0.0208, -0.0095,  ..., -0.0011, -0.0687,  0.0180]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  8%|▊         | 75/965 [02:21<27:40,  1.87s/it]

tensor([[ 5.2079e-03,  1.2141e-02,  6.0481e-05,  ...,  6.2064e-03,
         -7.2519e-02, -6.2557e-03],
        [ 8.0836e-03,  1.4079e-02, -6.5811e-03,  ...,  5.8210e-03,
         -7.1500e-02, -4.0325e-03],
        [ 7.4222e-03, -5.8291e-03,  4.3251e-03,  ...,  1.1131e-02,
         -6.0605e-02, -3.5888e-04],
        ...,
        [-1.5102e-02,  9.8977e-03, -9.7155e-03,  ..., -8.3722e-03,
         -6.4630e-02, -3.2131e-03],
        [ 7.0619e-03,  1.7596e-02, -1.4513e-02,  ..., -2.5878e-03,
         -6.6787e-02,  5.9209e-03],
        [-6.6611e-03,  9.2225e-03, -6.8179e-03,  ...,  2.5210e-04,
         -6.5865e-02,  7.7894e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


  8%|▊         | 76/965 [02:23<28:46,  1.94s/it]

tensor([[ 1.3639e-02,  2.0237e-02,  2.2637e-04,  ..., -7.1400e-03,
         -6.5459e-02,  2.2262e-03],
        [-7.7049e-04,  1.1195e-03, -2.2154e-03,  ...,  7.4777e-03,
         -6.8818e-02, -1.6620e-02],
        [ 8.3273e-03,  1.1170e-03,  8.0731e-03,  ...,  2.4122e-03,
         -5.6736e-02, -2.5962e-02],
        ...,
        [ 1.3284e-02,  1.3893e-03, -6.5560e-03,  ...,  5.4591e-03,
         -5.0913e-02,  9.8534e-06],
        [ 2.2099e-03,  1.7257e-02, -8.2459e-03,  ...,  3.9810e-03,
         -6.7161e-02,  2.1378e-03],
        [-7.8521e-03,  2.2463e-03,  1.1114e-02,  ...,  1.8370e-02,
         -4.7513e-02,  4.0215e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


  8%|▊         | 77/965 [02:25<28:34,  1.93s/it]

tensor([[ 7.5134e-03, -2.2005e-02,  7.5877e-05,  ..., -1.8421e-03,
         -7.0405e-02, -3.2568e-02],
        [ 2.4067e-02, -1.2542e-02, -2.4004e-02,  ...,  2.7625e-02,
         -5.6678e-02,  7.2816e-03],
        [ 1.3713e-02, -4.5285e-04,  1.0340e-02,  ..., -9.1378e-03,
         -7.5401e-02, -7.5383e-03],
        ...,
        [-8.2448e-03, -1.1910e-02, -8.3293e-03,  ...,  4.8023e-03,
         -5.7505e-02,  6.7382e-03],
        [ 1.9280e-02, -6.1466e-03,  2.7110e-03,  ...,  1.0417e-02,
         -6.0555e-02, -2.5650e-02],
        [ 2.4342e-03, -3.6757e-03,  1.5938e-02,  ..., -3.8590e-03,
         -6.4433e-02, -8.0459e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


  8%|▊         | 78/965 [02:27<28:30,  1.93s/it]

tensor([[ 0.0226,  0.0130, -0.0138,  ..., -0.0021, -0.0572, -0.0045],
        [ 0.0133,  0.0034, -0.0093,  ..., -0.0020, -0.0668, -0.0045],
        [-0.0006, -0.0087,  0.0013,  ...,  0.0297, -0.0558, -0.0002],
        ...,
        [ 0.0113,  0.0056, -0.0022,  ...,  0.0028, -0.0632, -0.0023],
        [-0.0004,  0.0107, -0.0015,  ...,  0.0130, -0.0385, -0.0045],
        [ 0.0126,  0.0082,  0.0073,  ...,  0.0036, -0.0676, -0.0028]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  8%|▊         | 79/965 [02:28<27:24,  1.86s/it]

tensor([[ 0.0103,  0.0125,  0.0018,  ...,  0.0048, -0.0509, -0.0069],
        [ 0.0140,  0.0088,  0.0041,  ...,  0.0053, -0.0624, -0.0010],
        [ 0.0139, -0.0078,  0.0139,  ..., -0.0037, -0.0497, -0.0151],
        ...,
        [ 0.0005,  0.0187, -0.0175,  ...,  0.0127, -0.0835,  0.0094],
        [ 0.0063,  0.0131,  0.0032,  ...,  0.0038, -0.0593, -0.0034],
        [ 0.0060,  0.0060,  0.0051,  ...,  0.0093, -0.0621, -0.0037]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  8%|▊         | 80/965 [02:30<27:59,  1.90s/it]

tensor([[ 0.0136, -0.0115,  0.0114,  ...,  0.0188, -0.0438, -0.0195],
        [ 0.0158,  0.0088,  0.0017,  ...,  0.0115, -0.0614, -0.0077],
        [ 0.0168,  0.0084, -0.0118,  ...,  0.0174, -0.0410,  0.0087],
        ...,
        [ 0.0018,  0.0155,  0.0093,  ...,  0.0104, -0.0492, -0.0075],
        [ 0.0319,  0.0102,  0.0239,  ...,  0.0174, -0.0536, -0.0080],
        [ 0.0260,  0.0106,  0.0035,  ..., -0.0038, -0.0457, -0.0099]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  8%|▊         | 81/965 [02:32<27:49,  1.89s/it]

tensor([[-2.4161e-03,  2.2530e-02, -7.6563e-03,  ...,  1.1039e-02,
         -4.8320e-02,  1.7530e-02],
        [ 1.2565e-02,  1.2533e-02,  4.9715e-03,  ...,  6.8825e-03,
         -5.9754e-02, -1.5965e-03],
        [ 5.6916e-03,  1.4332e-02, -1.1463e-02,  ...,  2.0953e-03,
         -5.3230e-02,  1.1560e-02],
        ...,
        [ 6.8304e-03,  4.3974e-03, -2.6169e-03,  ...,  7.4765e-05,
         -6.4840e-02, -1.1163e-02],
        [ 1.2273e-02,  1.5736e-02, -8.5871e-04,  ...,  3.1709e-03,
         -5.7495e-02,  5.2395e-03],
        [-2.6354e-03,  9.8714e-03,  4.7827e-04,  ...,  5.6727e-03,
         -6.3009e-02,  4.2953e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


  8%|▊         | 82/965 [02:34<27:59,  1.90s/it]

tensor([[ 0.0021,  0.0094, -0.0306,  ...,  0.0073, -0.0508,  0.0063],
        [ 0.0108,  0.0315, -0.0014,  ..., -0.0018, -0.0592,  0.0160],
        [ 0.0101, -0.0020,  0.0078,  ...,  0.0165, -0.0631, -0.0163],
        ...,
        [ 0.0116,  0.0281,  0.0019,  ..., -0.0043, -0.0523,  0.0145],
        [ 0.0243,  0.0229,  0.0063,  ..., -0.0070, -0.0655, -0.0008],
        [ 0.0073,  0.0109, -0.0037,  ..., -0.0037, -0.0669, -0.0072]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  9%|▊         | 83/965 [02:36<27:17,  1.86s/it]

tensor([[ 0.0186,  0.0161, -0.0052,  ..., -0.0021, -0.0546, -0.0097],
        [ 0.0272, -0.0019,  0.0067,  ...,  0.0180, -0.0521, -0.0186],
        [ 0.0189,  0.0099,  0.0009,  ...,  0.0069, -0.0611, -0.0119],
        ...,
        [ 0.0194,  0.0081,  0.0113,  ..., -0.0071, -0.0629, -0.0073],
        [ 0.0020,  0.0140, -0.0032,  ..., -0.0022, -0.0402, -0.0064],
        [ 0.0060,  0.0057,  0.0039,  ..., -0.0047, -0.0623, -0.0042]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  9%|▊         | 84/965 [02:38<27:07,  1.85s/it]

tensor([[ 0.0172,  0.0277,  0.0020,  ..., -0.0037, -0.0728,  0.0158],
        [ 0.0111,  0.0118,  0.0108,  ...,  0.0075, -0.0716,  0.0166],
        [ 0.0205,  0.0215, -0.0039,  ...,  0.0071, -0.0574, -0.0013],
        ...,
        [ 0.0140,  0.0232, -0.0079,  ..., -0.0009, -0.0551, -0.0042],
        [ 0.0356,  0.0127, -0.0051,  ..., -0.0017, -0.0705, -0.0009],
        [ 0.0044,  0.0089, -0.0080,  ...,  0.0075, -0.0623,  0.0071]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  9%|▉         | 85/965 [02:39<26:51,  1.83s/it]

tensor([[ 0.0072,  0.0210, -0.0119,  ...,  0.0004, -0.0680, -0.0009],
        [ 0.0204,  0.0085,  0.0106,  ...,  0.0034, -0.0649, -0.0070],
        [ 0.0121,  0.0039, -0.0008,  ...,  0.0093, -0.0525, -0.0071],
        ...,
        [ 0.0169,  0.0136, -0.0139,  ...,  0.0081, -0.0618, -0.0049],
        [ 0.0187,  0.0011, -0.0265,  ...,  0.0246, -0.0518,  0.0067],
        [ 0.0217,  0.0229, -0.0031,  ...,  0.0004, -0.0590, -0.0011]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  9%|▉         | 86/965 [02:41<25:51,  1.77s/it]

tensor([[ 0.0157,  0.0169,  0.0074,  ...,  0.0070, -0.0630, -0.0037],
        [ 0.0224,  0.0081,  0.0011,  ...,  0.0025, -0.0584, -0.0094],
        [ 0.0199,  0.0076,  0.0032,  ...,  0.0051, -0.0621, -0.0128],
        ...,
        [ 0.0185,  0.0187,  0.0007,  ..., -0.0065, -0.0600, -0.0052],
        [ 0.0209,  0.0067,  0.0030,  ...,  0.0044, -0.0653, -0.0069],
        [ 0.0121,  0.0233, -0.0093,  ...,  0.0061, -0.0654,  0.0010]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  9%|▉         | 87/965 [02:43<26:16,  1.80s/it]

tensor([[ 0.0102,  0.0009,  0.0013,  ...,  0.0047, -0.0562, -0.0064],
        [ 0.0087,  0.0125, -0.0083,  ...,  0.0002, -0.0706,  0.0111],
        [ 0.0091,  0.0091, -0.0049,  ..., -0.0121, -0.0679,  0.0003],
        ...,
        [ 0.0067,  0.0065, -0.0006,  ...,  0.0149, -0.0560,  0.0083],
        [ 0.0184,  0.0096,  0.0072,  ..., -0.0114, -0.0606,  0.0020],
        [ 0.0016,  0.0181, -0.0110,  ...,  0.0145, -0.0612,  0.0126]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  9%|▉         | 88/965 [02:45<27:32,  1.88s/it]

tensor([[ 0.0084, -0.0029,  0.0011,  ...,  0.0077, -0.0480,  0.0133],
        [ 0.0094,  0.0096, -0.0050,  ..., -0.0026, -0.0499, -0.0091],
        [ 0.0233,  0.0183,  0.0103,  ..., -0.0066, -0.0512,  0.0058],
        ...,
        [ 0.0184,  0.0031,  0.0020,  ...,  0.0085, -0.0595, -0.0078],
        [-0.0037,  0.0093, -0.0062,  ...,  0.0079, -0.0671,  0.0016],
        [ 0.0101,  0.0051,  0.0105,  ...,  0.0085, -0.0562, -0.0017]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  9%|▉         | 89/965 [02:47<27:50,  1.91s/it]

tensor([[-1.7249e-02,  3.3434e-02, -4.9134e-03,  ...,  1.6347e-02,
         -6.2910e-02,  6.4081e-03],
        [-3.4129e-03, -4.2633e-03, -1.0401e-02,  ...,  1.1542e-02,
         -5.4324e-02, -9.6715e-03],
        [-7.7144e-03,  1.9421e-02, -1.3118e-02,  ...,  4.3730e-03,
         -5.5782e-02,  7.4856e-03],
        ...,
        [ 1.4031e-02,  1.4951e-02, -3.1527e-03,  ...,  1.9576e-03,
         -5.5828e-02,  8.9731e-05],
        [ 1.1318e-02,  1.1741e-03,  1.3534e-03,  ...,  4.2223e-03,
         -5.2536e-02, -9.1094e-03],
        [ 1.2443e-02,  1.3724e-02,  3.1433e-03,  ..., -1.7008e-03,
         -5.5929e-02, -2.5088e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


  9%|▉         | 90/965 [02:49<28:14,  1.94s/it]

tensor([[ 0.0066, -0.0025,  0.0031,  ...,  0.0125, -0.0523, -0.0043],
        [ 0.0075,  0.0169, -0.0050,  ..., -0.0011, -0.0578,  0.0027],
        [ 0.0108,  0.0195,  0.0057,  ...,  0.0018, -0.0541, -0.0017],
        ...,
        [ 0.0055,  0.0161, -0.0009,  ..., -0.0135, -0.0428, -0.0089],
        [ 0.0026,  0.0136,  0.0051,  ...,  0.0119, -0.0438,  0.0050],
        [ 0.0010,  0.0086, -0.0070,  ...,  0.0279, -0.0386, -0.0209]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


  9%|▉         | 91/965 [02:51<28:45,  1.97s/it]

tensor([[ 0.0169, -0.0024,  0.0205,  ...,  0.0148, -0.0354, -0.0142],
        [-0.0041,  0.0039,  0.0034,  ...,  0.0282, -0.0495,  0.0130],
        [ 0.0158,  0.0035,  0.0062,  ...,  0.0037, -0.0550, -0.0064],
        ...,
        [-0.0178, -0.0084, -0.0185,  ...,  0.0202, -0.0133,  0.0162],
        [ 0.0071, -0.0142,  0.0055,  ..., -0.0084, -0.0584, -0.0200],
        [ 0.0205,  0.0064, -0.0150,  ...,  0.0070, -0.0473, -0.0047]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 10%|▉         | 92/965 [02:53<29:39,  2.04s/it]

tensor([[ 0.0089, -0.0081, -0.0080,  ...,  0.0253, -0.0343, -0.0187],
        [-0.0060, -0.0033,  0.0012,  ...,  0.0094, -0.0491,  0.0031],
        [ 0.0208,  0.0078,  0.0100,  ...,  0.0119, -0.0424, -0.0268],
        ...,
        [ 0.0037,  0.0079,  0.0049,  ..., -0.0046, -0.0548,  0.0020],
        [-0.0095,  0.0098,  0.0021,  ..., -0.0152, -0.0403,  0.0155],
        [-0.0014, -0.0077,  0.0101,  ...,  0.0142, -0.0390, -0.0150]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 10%|▉         | 93/965 [02:55<28:41,  1.97s/it]

tensor([[ 0.0089, -0.0123, -0.0007,  ...,  0.0022, -0.0540, -0.0309],
        [ 0.0140,  0.0070,  0.0115,  ...,  0.0095, -0.0482, -0.0068],
        [ 0.0166,  0.0050,  0.0057,  ...,  0.0062, -0.0555, -0.0145],
        ...,
        [ 0.0080,  0.0159, -0.0082,  ..., -0.0041, -0.0466,  0.0040],
        [-0.0002,  0.0123,  0.0091,  ..., -0.0091, -0.0616, -0.0031],
        [ 0.0190,  0.0058,  0.0094,  ...,  0.0063, -0.0351, -0.0059]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 10%|▉         | 94/965 [02:57<27:27,  1.89s/it]

tensor([[ 0.0076,  0.0171, -0.0043,  ...,  0.0023, -0.0616,  0.0157],
        [ 0.0069,  0.0080, -0.0008,  ..., -0.0162, -0.0554, -0.0041],
        [ 0.0110,  0.0118,  0.0032,  ...,  0.0014, -0.0500,  0.0087],
        ...,
        [ 0.0188,  0.0124,  0.0025,  ...,  0.0022, -0.0572, -0.0045],
        [ 0.0091,  0.0211,  0.0047,  ..., -0.0049, -0.0439, -0.0050],
        [ 0.0040,  0.0026,  0.0151,  ...,  0.0074, -0.0558, -0.0147]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 10%|▉         | 95/965 [02:58<26:10,  1.81s/it]

tensor([[ 0.0068,  0.0035,  0.0017,  ...,  0.0038, -0.0518, -0.0032],
        [-0.0019,  0.0076, -0.0149,  ..., -0.0003, -0.0368,  0.0009],
        [ 0.0015, -0.0016, -0.0131,  ...,  0.0077, -0.0507, -0.0038],
        ...,
        [ 0.0182,  0.0077,  0.0031,  ...,  0.0097, -0.0590, -0.0027],
        [ 0.0023, -0.0023, -0.0049,  ...,  0.0111, -0.0486, -0.0219],
        [ 0.0232, -0.0024,  0.0024,  ...,  0.0058, -0.0581, -0.0085]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 10%|▉         | 96/965 [03:00<25:49,  1.78s/it]

tensor([[ 1.3625e-02,  6.7643e-03, -5.4276e-03,  ..., -9.0153e-04,
         -5.6785e-02, -2.8473e-03],
        [-1.6500e-03,  9.2197e-03, -6.4128e-03,  ..., -2.1018e-03,
         -5.4399e-02, -1.7020e-03],
        [ 5.4009e-05, -1.2417e-02,  1.4496e-03,  ...,  1.8525e-02,
         -4.2220e-02,  8.1627e-03],
        ...,
        [ 2.2578e-02, -6.4170e-03,  3.1032e-03,  ...,  4.8934e-04,
         -4.9079e-02,  1.8390e-03],
        [ 1.1780e-02, -4.8866e-03, -4.8327e-03,  ...,  1.0472e-02,
         -4.3066e-02, -1.2197e-02],
        [ 1.1074e-02,  3.5857e-03, -3.8926e-03,  ...,  4.5885e-03,
         -5.4614e-02, -1.2975e-02]], device='cuda:0', grad_fn=<AddmmBackward0>)


 10%|█         | 97/965 [03:02<25:04,  1.73s/it]

tensor([[ 3.0066e-03,  1.3943e-02,  3.3471e-03,  ..., -2.6628e-02,
         -4.3707e-02, -6.9014e-04],
        [ 5.6273e-03,  4.5669e-03, -7.4831e-05,  ...,  1.2052e-02,
         -5.0613e-02, -6.8507e-03],
        [ 3.8778e-03, -5.3806e-03, -8.9798e-04,  ...,  7.5417e-03,
         -4.6599e-02, -1.2013e-02],
        ...,
        [ 1.2888e-03,  1.4975e-03, -8.6971e-03,  ...,  1.4821e-02,
         -5.5587e-02, -1.1684e-03],
        [-5.4918e-03, -5.4980e-04,  1.9066e-02,  ...,  6.7813e-03,
         -4.3786e-02, -2.0590e-02],
        [ 8.2226e-03,  1.2235e-02,  5.9042e-03,  ...,  1.8699e-03,
         -5.0194e-02,  1.7377e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 10%|█         | 98/965 [03:04<26:13,  1.82s/it]

tensor([[ 0.0021,  0.0094,  0.0017,  ...,  0.0060, -0.0547, -0.0045],
        [-0.0061,  0.0132, -0.0157,  ..., -0.0019, -0.0524,  0.0061],
        [ 0.0161,  0.0225, -0.0048,  ..., -0.0059, -0.0324,  0.0121],
        ...,
        [ 0.0138,  0.0050, -0.0053,  ..., -0.0180, -0.0630,  0.0010],
        [-0.0099,  0.0136, -0.0116,  ..., -0.0146, -0.0486,  0.0079],
        [-0.0042,  0.0154, -0.0017,  ..., -0.0067, -0.0248,  0.0092]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 10%|█         | 99/965 [03:06<27:08,  1.88s/it]

tensor([[-0.0027,  0.0048, -0.0185,  ...,  0.0246, -0.0564, -0.0133],
        [ 0.0128, -0.0011,  0.0148,  ...,  0.0015, -0.0455, -0.0047],
        [-0.0075,  0.0382, -0.0296,  ..., -0.0115, -0.0446,  0.0229],
        ...,
        [ 0.0159, -0.0163,  0.0113,  ...,  0.0048, -0.0311, -0.0119],
        [-0.0029,  0.0001, -0.0179,  ...,  0.0161, -0.0453,  0.0035],
        [-0.0078,  0.0025, -0.0124,  ..., -0.0046, -0.0492,  0.0019]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 10%|█         | 100/965 [03:07<26:09,  1.81s/it]

tensor([[ 0.0788, -0.1167, -0.0760,  ...,  0.1211, -0.0396,  0.0596],
        [ 0.0303,  0.0115,  0.0201,  ...,  0.0110, -0.0266,  0.0059],
        [ 0.0182, -0.0120,  0.0049,  ...,  0.0177, -0.0487, -0.0055],
        ...,
        [ 0.0065, -0.0174, -0.0078,  ..., -0.0024, -0.0352, -0.0104],
        [ 0.0130,  0.0068,  0.0002,  ..., -0.0075, -0.0405,  0.0090],
        [ 0.0131,  0.0002,  0.0074,  ...,  0.0192, -0.0436, -0.0043]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 10%|█         | 101/965 [03:09<25:55,  1.80s/it]

tensor([[ 1.2817e-02,  8.2671e-03, -1.2332e-02,  ...,  2.0543e-02,
         -6.5212e-02,  5.6496e-04],
        [-5.6009e-03,  1.3548e-02, -3.6326e-03,  ...,  4.4288e-03,
         -5.3299e-02,  2.0299e-02],
        [ 2.0343e-02, -3.0169e-03, -1.7729e-03,  ...,  7.1669e-03,
         -5.3659e-02, -1.4035e-03],
        ...,
        [ 4.7361e-03,  4.0175e-03,  6.8197e-05,  ...,  4.1065e-03,
         -5.9152e-02,  6.8126e-03],
        [ 8.4423e-03,  1.3791e-02, -7.8769e-03,  ..., -2.1410e-03,
         -4.4720e-02,  6.5073e-03],
        [ 1.8289e-02,  1.2473e-02, -5.1447e-03,  ..., -8.4601e-03,
         -5.0709e-02,  7.0947e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 11%|█         | 102/965 [03:11<25:44,  1.79s/it]

tensor([[ 6.6782e-03, -1.8468e-03, -6.4524e-03,  ..., -4.1732e-03,
         -6.6442e-02, -2.2363e-03],
        [ 3.8315e-03,  1.3286e-02,  1.9044e-05,  ..., -1.2714e-02,
         -5.7630e-02,  9.4972e-03],
        [ 1.8214e-02,  1.3081e-02,  2.7339e-03,  ..., -5.1958e-03,
         -4.1951e-02,  6.9721e-03],
        ...,
        [ 5.9072e-03,  4.0638e-03, -9.2681e-03,  ..., -2.6744e-03,
         -3.7269e-02,  7.9000e-03],
        [ 7.5899e-03,  4.9260e-03,  2.0896e-02,  ...,  2.1351e-02,
         -5.0344e-02, -1.8198e-02],
        [ 1.4052e-02,  1.9281e-02, -1.8951e-02,  ..., -1.7753e-03,
         -4.3667e-02,  1.0767e-02]], device='cuda:0', grad_fn=<AddmmBackward0>)


 11%|█         | 103/965 [03:13<26:50,  1.87s/it]

tensor([[ 1.2706e-02, -1.7978e-03, -6.6939e-03,  ..., -1.6858e-03,
         -6.5250e-02, -7.5402e-04],
        [-2.2556e-03,  1.0622e-02, -1.3643e-03,  ...,  7.1329e-03,
         -4.8776e-02, -8.0570e-03],
        [ 2.1250e-02,  1.5314e-02, -4.8847e-03,  ..., -3.4115e-03,
         -5.0343e-02, -5.1103e-03],
        ...,
        [ 1.5120e-02,  1.3901e-02, -4.3706e-03,  ...,  3.6627e-03,
         -4.3777e-02,  8.5858e-04],
        [-1.9918e-03,  1.2381e-02,  1.4527e-03,  ...,  1.3384e-02,
         -4.6966e-02,  5.7384e-05],
        [-1.5811e-02,  1.9167e-02, -7.3675e-03,  ...,  6.6581e-03,
         -3.6283e-02, -7.7554e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 11%|█         | 104/965 [03:15<26:02,  1.81s/it]

tensor([[ 0.0203, -0.0077,  0.0044,  ...,  0.0118, -0.0367, -0.0161],
        [ 0.0098, -0.0103,  0.0044,  ..., -0.0063, -0.0599,  0.0006],
        [ 0.0039,  0.0034, -0.0015,  ..., -0.0071, -0.0450, -0.0135],
        ...,
        [ 0.0055, -0.0003,  0.0014,  ...,  0.0079, -0.0429, -0.0040],
        [ 0.0059,  0.0054, -0.0050,  ...,  0.0068, -0.0556, -0.0002],
        [ 0.0171,  0.0066, -0.0027,  ...,  0.0208, -0.0545, -0.0105]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 11%|█         | 105/965 [03:17<26:35,  1.86s/it]

tensor([[ 0.0157,  0.0005,  0.0035,  ...,  0.0047, -0.0464, -0.0056],
        [-0.0028,  0.0138, -0.0072,  ...,  0.0141, -0.0591, -0.0026],
        [-0.0003, -0.0187,  0.0025,  ...,  0.0038, -0.0378, -0.0201],
        ...,
        [ 0.0106,  0.0139, -0.0167,  ...,  0.0043, -0.0289, -0.0110],
        [-0.0370, -0.0180, -0.0168,  ...,  0.0271, -0.0495,  0.0118],
        [ 0.0078,  0.0082, -0.0151,  ...,  0.0067, -0.0512, -0.0020]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 11%|█         | 106/965 [03:19<26:41,  1.86s/it]

tensor([[ 1.2916e-02, -5.7987e-03, -1.4955e-02,  ...,  8.1906e-03,
         -4.1420e-02,  3.4642e-03],
        [ 4.2977e-03, -9.3835e-04,  5.9001e-05,  ..., -3.3629e-03,
         -5.4796e-02, -2.6445e-04],
        [ 7.5765e-03, -6.0429e-03, -5.4006e-03,  ..., -9.7771e-03,
         -5.8234e-02, -1.1729e-02],
        ...,
        [ 1.2402e-02,  4.7885e-03, -2.2513e-02,  ...,  1.0123e-02,
         -2.1925e-02,  1.6103e-02],
        [ 8.6552e-03,  7.8502e-04,  1.0765e-02,  ...,  1.5544e-02,
         -3.4853e-02, -7.3348e-03],
        [ 9.9272e-03, -7.6760e-03, -1.1791e-03,  ..., -1.4607e-03,
         -5.4837e-02, -1.8850e-02]], device='cuda:0', grad_fn=<AddmmBackward0>)


 11%|█         | 107/965 [03:20<26:44,  1.87s/it]

tensor([[ 0.0178, -0.0173, -0.0010,  ...,  0.0048, -0.0324, -0.0158],
        [ 0.0109, -0.0091,  0.0086,  ...,  0.0128, -0.0387, -0.0107],
        [-0.0044,  0.0129, -0.0059,  ..., -0.0032, -0.0593,  0.0049],
        ...,
        [ 0.0118, -0.0004,  0.0054,  ...,  0.0044, -0.0260,  0.0040],
        [-0.0055, -0.0013,  0.0011,  ...,  0.0191, -0.0570, -0.0051],
        [ 0.0052, -0.0100, -0.0020,  ...,  0.0182, -0.0525, -0.0112]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 11%|█         | 108/965 [03:22<26:43,  1.87s/it]

tensor([[ 0.0163,  0.0092,  0.0117,  ..., -0.0031, -0.0327, -0.0011],
        [ 0.0064,  0.0177, -0.0212,  ..., -0.0076, -0.0526,  0.0157],
        [ 0.0037,  0.0041,  0.0086,  ...,  0.0066, -0.0373,  0.0007],
        ...,
        [ 0.0124, -0.0023,  0.0219,  ...,  0.0116, -0.0367,  0.0044],
        [ 0.0078, -0.0135, -0.0024,  ...,  0.0088, -0.0338, -0.0170],
        [ 0.0074,  0.0093,  0.0192,  ..., -0.0008, -0.0338,  0.0144]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 11%|█▏        | 109/965 [03:24<25:43,  1.80s/it]

tensor([[ 0.0189,  0.0017, -0.0088,  ..., -0.0093, -0.0422,  0.0023],
        [-0.0066, -0.0105, -0.0023,  ...,  0.0269, -0.0513, -0.0044],
        [-0.0061,  0.0187, -0.0002,  ..., -0.0186, -0.0524,  0.0064],
        ...,
        [ 0.0103, -0.0053,  0.0155,  ..., -0.0016, -0.0309, -0.0082],
        [ 0.0002, -0.0119,  0.0116,  ...,  0.0057, -0.0454,  0.0032],
        [ 0.0147, -0.0091,  0.0025,  ..., -0.0021, -0.0459,  0.0002]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 11%|█▏        | 110/965 [03:26<25:05,  1.76s/it]

tensor([[-0.0013,  0.0125, -0.0014,  ..., -0.0093, -0.0424,  0.0175],
        [ 0.0091,  0.0009,  0.0053,  ...,  0.0005, -0.0454,  0.0009],
        [ 0.0095,  0.0124,  0.0035,  ..., -0.0015, -0.0447,  0.0137],
        ...,
        [ 0.0097,  0.0114,  0.0015,  ..., -0.0073, -0.0426, -0.0018],
        [ 0.0017, -0.0097,  0.0015,  ..., -0.0099, -0.0570,  0.0041],
        [ 0.0005, -0.0153,  0.0072,  ...,  0.0061, -0.0283, -0.0230]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 12%|█▏        | 111/965 [03:28<26:27,  1.86s/it]

tensor([[ 0.0018, -0.0210,  0.0054,  ...,  0.0034, -0.0245, -0.0020],
        [ 0.0114,  0.0052,  0.0075,  ..., -0.0073, -0.0464, -0.0019],
        [-0.0156, -0.0116,  0.0075,  ...,  0.0106, -0.0294,  0.0051],
        ...,
        [ 0.0042,  0.0061, -0.0018,  ..., -0.0055, -0.0452,  0.0012],
        [-0.0111,  0.0133,  0.0099,  ..., -0.0304, -0.0593,  0.0067],
        [-0.0049,  0.0089,  0.0064,  ...,  0.0081, -0.0427,  0.0143]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 12%|█▏        | 112/965 [03:29<25:04,  1.76s/it]

tensor([[ 1.4789e-02,  4.1831e-03, -9.1335e-03,  ..., -9.7241e-03,
         -4.7116e-02, -8.8183e-05],
        [ 1.7880e-02, -9.9847e-03, -3.3669e-03,  ..., -9.4156e-03,
         -4.5597e-02, -1.6004e-03],
        [-2.1321e-03,  3.0139e-03,  9.4618e-03,  ...,  8.9975e-03,
         -2.9082e-02, -2.1982e-03],
        ...,
        [ 8.8577e-03,  7.4060e-03,  3.1794e-03,  ..., -5.3027e-03,
         -3.2179e-02, -2.1671e-04],
        [-6.3979e-03, -3.7345e-03,  3.7736e-03,  ...,  1.3364e-03,
         -5.2389e-02, -1.7382e-02],
        [-1.6899e-02, -1.6803e-02,  1.2691e-02,  ...,  1.1607e-02,
         -3.9130e-02, -2.1700e-02]], device='cuda:0', grad_fn=<AddmmBackward0>)


 12%|█▏        | 113/965 [03:31<25:17,  1.78s/it]

tensor([[ 1.9859e-02, -4.4750e-03, -7.1563e-05,  ..., -2.2743e-03,
         -4.3250e-02, -5.5676e-03],
        [-2.4439e-03,  2.5959e-03, -4.2017e-03,  ..., -1.8070e-03,
         -3.4120e-02,  1.2778e-02],
        [ 6.9936e-03,  2.3337e-03,  9.5964e-03,  ...,  1.7373e-04,
         -4.8421e-02, -1.1331e-03],
        ...,
        [-9.6229e-03, -2.6360e-02,  1.5064e-02,  ...,  5.2539e-03,
         -3.7323e-02, -1.1431e-02],
        [ 1.0019e-02,  3.5709e-03, -7.5138e-03,  ...,  6.8233e-03,
         -3.5413e-02, -3.1052e-03],
        [-1.9711e-02,  1.4382e-02, -9.8200e-03,  ..., -3.8873e-03,
         -6.5939e-02,  2.5025e-02]], device='cuda:0', grad_fn=<AddmmBackward0>)


 12%|█▏        | 114/965 [03:33<26:38,  1.88s/it]

tensor([[ 1.2927e-02, -4.4334e-03, -4.9581e-04,  ...,  4.2351e-03,
         -5.0084e-02, -6.4656e-03],
        [ 9.0260e-03, -2.9984e-03,  3.5559e-03,  ...,  9.6160e-03,
         -4.6573e-02, -4.8563e-03],
        [ 5.4041e-03, -9.1355e-04,  5.1182e-03,  ...,  1.1987e-03,
         -2.8549e-02, -2.7447e-03],
        ...,
        [-1.1463e-02, -5.7817e-03, -2.0529e-03,  ..., -5.7606e-03,
         -5.0734e-02,  1.0057e-02],
        [ 2.2497e-02, -4.3913e-03, -4.8476e-03,  ...,  8.5513e-03,
         -3.6512e-02, -1.5948e-03],
        [ 7.5291e-03,  6.6576e-03, -6.6372e-03,  ...,  5.2679e-03,
         -4.7035e-02,  5.6496e-05]], device='cuda:0', grad_fn=<AddmmBackward0>)


 12%|█▏        | 115/965 [03:35<26:45,  1.89s/it]

tensor([[ 0.0266,  0.0004,  0.0040,  ..., -0.0053, -0.0468,  0.0002],
        [ 0.0134, -0.0073, -0.0032,  ..., -0.0026, -0.0526,  0.0027],
        [-0.0017,  0.0157,  0.0106,  ..., -0.0050, -0.0259,  0.0029],
        ...,
        [ 0.0183,  0.0018,  0.0048,  ...,  0.0192, -0.0487, -0.0153],
        [-0.0003,  0.0059, -0.0047,  ..., -0.0025, -0.0639, -0.0042],
        [ 0.0154, -0.0016,  0.0088,  ...,  0.0192, -0.0311, -0.0065]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 12%|█▏        | 116/965 [03:37<27:53,  1.97s/it]

tensor([[ 0.0130, -0.0032, -0.0054,  ...,  0.0188, -0.0487, -0.0165],
        [-0.0051, -0.0220,  0.0105,  ...,  0.0036, -0.0460, -0.0203],
        [ 0.0269,  0.0146, -0.0014,  ..., -0.0030, -0.0447, -0.0037],
        ...,
        [ 0.0103, -0.0114, -0.0034,  ...,  0.0198, -0.0328, -0.0041],
        [ 0.0072, -0.0139, -0.0052,  ...,  0.0097, -0.0581, -0.0161],
        [ 0.0122, -0.0075, -0.0017,  ...,  0.0118, -0.0300,  0.0026]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 12%|█▏        | 117/965 [03:39<28:10,  1.99s/it]

tensor([[ 0.0165, -0.0064, -0.0005,  ...,  0.0073, -0.0431, -0.0037],
        [ 0.0099, -0.0124,  0.0092,  ..., -0.0013, -0.0457, -0.0183],
        [ 0.0112, -0.0077,  0.0059,  ...,  0.0099, -0.0329, -0.0166],
        ...,
        [ 0.0162, -0.0139,  0.0068,  ...,  0.0169, -0.0389,  0.0006],
        [-0.0028, -0.0037, -0.0191,  ...,  0.0227, -0.0396,  0.0111],
        [ 0.0225,  0.0106, -0.0077,  ...,  0.0106, -0.0514, -0.0038]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 12%|█▏        | 118/965 [03:41<28:42,  2.03s/it]

tensor([[ 0.0129, -0.0134, -0.0218,  ...,  0.0222, -0.0340, -0.0199],
        [ 0.0089,  0.0111,  0.0073,  ...,  0.0038, -0.0356,  0.0014],
        [ 0.0175, -0.0039,  0.0016,  ..., -0.0015, -0.0458, -0.0097],
        ...,
        [-0.0048,  0.0212,  0.0034,  ..., -0.0060, -0.0284,  0.0130],
        [ 0.0097,  0.0079,  0.0020,  ...,  0.0102, -0.0432,  0.0026],
        [ 0.0071, -0.0105,  0.0061,  ...,  0.0149, -0.0464, -0.0148]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 12%|█▏        | 119/965 [03:44<29:26,  2.09s/it]

tensor([[ 0.0144,  0.0139,  0.0043,  ..., -0.0029, -0.0421, -0.0012],
        [ 0.0146,  0.0045,  0.0098,  ...,  0.0035, -0.0411, -0.0098],
        [ 0.0174,  0.0041, -0.0053,  ..., -0.0041, -0.0220, -0.0082],
        ...,
        [ 0.0026, -0.0195, -0.0076,  ...,  0.0076, -0.0372, -0.0129],
        [-0.0048,  0.0088, -0.0018,  ...,  0.0077, -0.0482, -0.0005],
        [-0.0098, -0.0169,  0.0131,  ...,  0.0112, -0.0404, -0.0198]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 12%|█▏        | 120/965 [03:46<29:26,  2.09s/it]

tensor([[ 0.0130,  0.0088, -0.0037,  ...,  0.0134, -0.0478,  0.0117],
        [-0.0085,  0.0082, -0.0128,  ...,  0.0169, -0.0404,  0.0099],
        [ 0.0039,  0.0023,  0.0126,  ...,  0.0013, -0.0461,  0.0025],
        ...,
        [ 0.0173, -0.0037, -0.0035,  ..., -0.0155, -0.0474, -0.0078],
        [-0.0107, -0.0027,  0.0277,  ...,  0.0135, -0.0491, -0.0151],
        [ 0.0207,  0.0003,  0.0081,  ...,  0.0121, -0.0415, -0.0061]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 13%|█▎        | 121/965 [03:47<27:56,  1.99s/it]

tensor([[ 0.0097,  0.0064,  0.0100,  ...,  0.0129, -0.0458, -0.0071],
        [ 0.0032,  0.0027,  0.0083,  ...,  0.0059, -0.0432, -0.0084],
        [ 0.0199, -0.0046,  0.0034,  ..., -0.0009, -0.0397, -0.0066],
        ...,
        [-0.0029,  0.0054, -0.0010,  ..., -0.0082, -0.0530,  0.0161],
        [ 0.0101,  0.0326, -0.0015,  ..., -0.0145, -0.0329,  0.0094],
        [ 0.0083,  0.0029,  0.0085,  ...,  0.0005, -0.0392, -0.0041]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 13%|█▎        | 122/965 [03:50<29:03,  2.07s/it]

tensor([[-0.0043, -0.0106,  0.0042,  ..., -0.0036, -0.0498, -0.0051],
        [-0.0108, -0.0265,  0.0073,  ...,  0.0226, -0.0231, -0.0143],
        [ 0.0117, -0.0067,  0.0083,  ...,  0.0106, -0.0434, -0.0020],
        ...,
        [ 0.0088,  0.0021,  0.0106,  ..., -0.0045, -0.0364, -0.0074],
        [-0.0002, -0.0066, -0.0067,  ...,  0.0134, -0.0451,  0.0009],
        [-0.0008, -0.0008,  0.0101,  ..., -0.0137, -0.0431, -0.0130]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 13%|█▎        | 123/965 [03:52<29:10,  2.08s/it]

tensor([[-0.0030,  0.0319, -0.0032,  ..., -0.0457, -0.0304,  0.0294],
        [ 0.0139,  0.0210, -0.0089,  ..., -0.0167, -0.0450,  0.0146],
        [ 0.0065, -0.0056,  0.0228,  ...,  0.0026, -0.0334,  0.0077],
        ...,
        [ 0.0048, -0.0007, -0.0242,  ..., -0.0041, -0.0503,  0.0042],
        [ 0.0185, -0.0099, -0.0027,  ..., -0.0154, -0.0343, -0.0038],
        [-0.0012,  0.0090, -0.0218,  ..., -0.0035, -0.0333,  0.0110]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 13%|█▎        | 124/965 [03:54<28:19,  2.02s/it]

tensor([[ 0.0066, -0.0063,  0.0003,  ..., -0.0071, -0.0406, -0.0028],
        [ 0.0092, -0.0103, -0.0027,  ...,  0.0043, -0.0255, -0.0061],
        [ 0.0140,  0.0021,  0.0006,  ..., -0.0081, -0.0451,  0.0012],
        ...,
        [-0.0016,  0.0049, -0.0053,  ..., -0.0172, -0.0380,  0.0018],
        [ 0.0021, -0.0127,  0.0054,  ..., -0.0023, -0.0056,  0.0034],
        [ 0.0169, -0.0097, -0.0013,  ...,  0.0013, -0.0493, -0.0041]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 13%|█▎        | 125/965 [03:56<28:25,  2.03s/it]

tensor([[ 0.0187, -0.0113,  0.0046,  ..., -0.0008, -0.0368, -0.0100],
        [ 0.1003, -0.1113,  0.0017,  ...,  0.1744, -0.0325, -0.0027],
        [ 0.0022,  0.0145, -0.0096,  ..., -0.0049, -0.0345,  0.0112],
        ...,
        [-0.0084, -0.0233, -0.0349,  ...,  0.0208, -0.0257, -0.0109],
        [ 0.0211, -0.0065,  0.0013,  ...,  0.0073, -0.0359, -0.0116],
        [ 0.0129, -0.0233, -0.0139,  ...,  0.0130, -0.0257, -0.0156]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 13%|█▎        | 126/965 [03:58<27:37,  1.98s/it]

tensor([[ 0.0131,  0.0086,  0.0068,  ..., -0.0065, -0.0408,  0.0057],
        [ 0.0133,  0.0422, -0.0051,  ..., -0.0257, -0.0369,  0.0209],
        [ 0.0144,  0.0078,  0.0095,  ..., -0.0032, -0.0454,  0.0048],
        ...,
        [ 0.0124, -0.0121,  0.0083,  ...,  0.0160, -0.0286, -0.0114],
        [-0.0144,  0.0041, -0.0232,  ...,  0.0196, -0.0377, -0.0101],
        [ 0.0150, -0.0087, -0.0079,  ..., -0.0047, -0.0366, -0.0053]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 13%|█▎        | 127/965 [04:00<28:19,  2.03s/it]

tensor([[ 0.0038, -0.0193, -0.0012,  ...,  0.0042, -0.0246, -0.0013],
        [ 0.0021,  0.0025, -0.0074,  ..., -0.0061, -0.0358,  0.0151],
        [ 0.0055,  0.0087,  0.0044,  ..., -0.0050, -0.0193, -0.0133],
        ...,
        [ 0.0135, -0.0026, -0.0096,  ...,  0.0027, -0.0217, -0.0004],
        [ 0.0058, -0.0140, -0.0107,  ...,  0.0137, -0.0323,  0.0029],
        [-0.0080, -0.0012,  0.0209,  ...,  0.0102, -0.0393, -0.0088]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 13%|█▎        | 128/965 [04:01<26:19,  1.89s/it]

tensor([[ 0.0083, -0.0026,  0.0040,  ..., -0.0016, -0.0385, -0.0034],
        [-0.0072,  0.0054,  0.0197,  ...,  0.0114, -0.0334, -0.0098],
        [-0.0010, -0.0012,  0.0054,  ...,  0.0063, -0.0301, -0.0067],
        ...,
        [ 0.0024, -0.0014, -0.0015,  ...,  0.0084, -0.0372, -0.0130],
        [ 0.0091, -0.0073,  0.0106,  ...,  0.0011, -0.0316, -0.0011],
        [ 0.0074, -0.0012,  0.0029,  ...,  0.0035, -0.0363, -0.0012]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 13%|█▎        | 129/965 [04:03<27:13,  1.95s/it]

tensor([[-0.0058, -0.0039, -0.0086,  ...,  0.0169, -0.0417, -0.0145],
        [-0.0040,  0.0040,  0.0111,  ..., -0.0028, -0.0423,  0.0092],
        [-0.0048, -0.0050, -0.0058,  ..., -0.0032, -0.0352, -0.0010],
        ...,
        [-0.0017,  0.0008, -0.0063,  ...,  0.0001, -0.0369, -0.0015],
        [ 0.0213,  0.0044,  0.0060,  ..., -0.0119, -0.0438,  0.0024],
        [-0.0118,  0.0056,  0.0115,  ..., -0.0034, -0.0305, -0.0101]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 13%|█▎        | 130/965 [04:05<26:33,  1.91s/it]

tensor([[-0.0054, -0.0045,  0.0104,  ...,  0.0088, -0.0387, -0.0192],
        [ 0.0161, -0.0011,  0.0024,  ..., -0.0018, -0.0281, -0.0070],
        [ 0.0014, -0.0040,  0.0052,  ...,  0.0161, -0.0421, -0.0056],
        ...,
        [-0.0041, -0.0163, -0.0061,  ...,  0.0169, -0.0323, -0.0314],
        [ 0.0074, -0.0003,  0.0128,  ..., -0.0067, -0.0326,  0.0077],
        [-0.0023, -0.0069,  0.0091,  ...,  0.0160, -0.0222, -0.0194]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 14%|█▎        | 131/965 [04:07<25:25,  1.83s/it]

tensor([[ 0.0040, -0.0122, -0.0035,  ...,  0.0153, -0.0374, -0.0048],
        [ 0.0060,  0.0009, -0.0025,  ...,  0.0042, -0.0384, -0.0027],
        [-0.0022,  0.0117, -0.0009,  ...,  0.0052, -0.0510, -0.0101],
        ...,
        [ 0.0171,  0.0262, -0.0021,  ..., -0.0268, -0.0184,  0.0066],
        [ 0.0260,  0.0056, -0.0023,  ...,  0.0016, -0.0316,  0.0031],
        [-0.0103,  0.0179, -0.0225,  ..., -0.0244, -0.0557,  0.0188]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 14%|█▎        | 132/965 [04:09<24:48,  1.79s/it]

tensor([[ 1.0453e-02,  7.3355e-04, -8.7213e-03,  ...,  2.4410e-04,
         -3.9836e-02, -9.4801e-05],
        [ 8.5830e-03, -5.2154e-08, -6.3239e-03,  ..., -1.0027e-02,
         -5.6573e-02,  2.2859e-03],
        [ 7.0707e-03,  6.3265e-03, -2.3277e-03,  ...,  4.2495e-04,
         -3.3170e-02,  3.9301e-03],
        ...,
        [ 8.0718e-03, -4.7362e-03, -1.6752e-03,  ...,  1.0097e-03,
         -4.0585e-02, -4.4170e-03],
        [ 7.9798e-03,  8.3647e-03, -1.8600e-02,  ..., -7.0770e-03,
         -3.3346e-02,  7.7084e-03],
        [-1.1122e-03, -1.0179e-02,  6.5857e-03,  ...,  7.6839e-03,
         -3.5175e-02, -1.2475e-02]], device='cuda:0', grad_fn=<AddmmBackward0>)


 14%|█▍        | 133/965 [04:11<26:21,  1.90s/it]

tensor([[ 0.0015, -0.0059, -0.0025,  ..., -0.0064, -0.0458, -0.0004],
        [ 0.0065,  0.0033, -0.0143,  ..., -0.0092, -0.0330,  0.0086],
        [ 0.0015, -0.0198,  0.0025,  ..., -0.0104, -0.0413, -0.0069],
        ...,
        [ 0.0133, -0.0135, -0.0089,  ...,  0.0037, -0.0347, -0.0064],
        [ 0.0199,  0.0343,  0.0061,  ..., -0.0188, -0.0495,  0.0067],
        [ 0.0143,  0.0125, -0.0006,  ..., -0.0157, -0.0439,  0.0063]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 14%|█▍        | 134/965 [04:13<25:54,  1.87s/it]

tensor([[-1.0112e-02, -5.8075e-04,  9.3985e-03,  ..., -3.9799e-03,
         -4.8236e-02, -1.8056e-03],
        [ 1.6171e-02, -5.2289e-03, -8.1471e-03,  ..., -2.4446e-03,
         -2.9271e-02, -1.1210e-02],
        [ 2.4145e-03,  7.8212e-03, -4.5911e-03,  ...,  2.7846e-03,
         -3.8308e-02, -7.1529e-05],
        ...,
        [-1.5181e-02,  7.1014e-03,  9.0641e-04,  ...,  1.4751e-02,
         -4.8590e-02, -1.6056e-03],
        [-7.0089e-03, -1.3030e-03, -7.6715e-03,  ..., -1.1140e-02,
         -3.6706e-02, -7.4393e-04],
        [ 6.0967e-03,  8.0726e-03, -2.0132e-03,  ..., -1.2957e-02,
         -3.1446e-02,  1.3270e-02]], device='cuda:0', grad_fn=<AddmmBackward0>)


 14%|█▍        | 135/965 [04:15<26:35,  1.92s/it]

tensor([[-0.0131, -0.0003, -0.0050,  ...,  0.0020, -0.0261,  0.0084],
        [-0.0108, -0.0092, -0.0165,  ..., -0.0193, -0.0238,  0.0034],
        [-0.0021, -0.0159, -0.0083,  ...,  0.0124, -0.0310, -0.0002],
        ...,
        [ 0.0076,  0.0012, -0.0072,  ..., -0.0017, -0.0218,  0.0008],
        [ 0.0003, -0.0015,  0.0180,  ..., -0.0116, -0.0344, -0.0033],
        [ 0.0011,  0.0075,  0.0048,  ...,  0.0012, -0.0362, -0.0007]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 14%|█▍        | 136/965 [04:16<25:19,  1.83s/it]

tensor([[ 0.0116, -0.0034,  0.0016,  ...,  0.0065, -0.0296, -0.0110],
        [ 0.0002,  0.0102,  0.0086,  ...,  0.0020, -0.0393, -0.0019],
        [ 0.0081,  0.0049,  0.0076,  ..., -0.0007, -0.0172, -0.0013],
        ...,
        [ 0.0003, -0.0075,  0.0072,  ...,  0.0036, -0.0262, -0.0047],
        [-0.0086, -0.0092,  0.0182,  ..., -0.0018, -0.0111, -0.0214],
        [ 0.0008, -0.0087, -0.0046,  ..., -0.0143, -0.0170, -0.0050]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 14%|█▍        | 137/965 [04:18<26:44,  1.94s/it]

tensor([[ 0.0002,  0.0011,  0.0123,  ..., -0.0006, -0.0215,  0.0137],
        [-0.0044, -0.0142,  0.0150,  ...,  0.0095, -0.0358,  0.0014],
        [-0.0081, -0.0124, -0.0051,  ..., -0.0062, -0.0216, -0.0116],
        ...,
        [ 0.0032,  0.0014,  0.0027,  ...,  0.0046, -0.0347, -0.0071],
        [ 0.0002, -0.0241,  0.0364,  ...,  0.0239, -0.0079, -0.0262],
        [ 0.0088, -0.0113,  0.0111,  ...,  0.0093, -0.0248, -0.0109]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 14%|█▍        | 138/965 [04:20<26:13,  1.90s/it]

tensor([[ 0.0117, -0.0027,  0.0139,  ..., -0.0059, -0.0237,  0.0086],
        [ 0.0038,  0.0047, -0.0060,  ..., -0.0013, -0.0213,  0.0036],
        [ 0.0073, -0.0030,  0.0146,  ...,  0.0069, -0.0286, -0.0056],
        ...,
        [ 0.0021, -0.0112,  0.0029,  ..., -0.0030, -0.0398, -0.0071],
        [ 0.0032, -0.0030,  0.0145,  ..., -0.0012, -0.0101, -0.0042],
        [ 0.0122,  0.0020,  0.0041,  ..., -0.0027, -0.0221, -0.0089]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 14%|█▍        | 139/965 [04:22<25:21,  1.84s/it]

tensor([[ 0.0065, -0.0071,  0.0053,  ...,  0.0065, -0.0289, -0.0055],
        [ 0.0062, -0.0079,  0.0039,  ...,  0.0090, -0.0254, -0.0011],
        [ 0.0029, -0.0180,  0.0124,  ...,  0.0053, -0.0255, -0.0169],
        ...,
        [ 0.0123,  0.0038,  0.0136,  ...,  0.0081, -0.0299, -0.0086],
        [-0.0022,  0.0033,  0.0056,  ...,  0.0120, -0.0051,  0.0109],
        [-0.0005, -0.0014,  0.0079,  ..., -0.0020, -0.0391,  0.0012]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 15%|█▍        | 140/965 [04:24<25:23,  1.85s/it]

tensor([[ 0.0004,  0.0032,  0.0039,  ..., -0.0042, -0.0334,  0.0048],
        [ 0.0147,  0.0136, -0.0285,  ..., -0.0056, -0.0333,  0.0070],
        [ 0.0041, -0.0109,  0.0042,  ..., -0.0041, -0.0216, -0.0144],
        ...,
        [-0.0024,  0.0030,  0.0037,  ..., -0.0071, -0.0339,  0.0064],
        [-0.0035,  0.0186, -0.0009,  ..., -0.0023, -0.0426,  0.0119],
        [-0.0059,  0.0120,  0.0105,  ..., -0.0106, -0.0281,  0.0171]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 15%|█▍        | 141/965 [04:26<25:51,  1.88s/it]

tensor([[ 0.0049,  0.0107, -0.0024,  ...,  0.0079, -0.0428,  0.0099],
        [ 0.0055, -0.0236,  0.0023,  ..., -0.0055, -0.0305,  0.0012],
        [ 0.0073, -0.0048,  0.0073,  ..., -0.0196, -0.0395,  0.0062],
        ...,
        [ 0.0032, -0.0023,  0.0025,  ..., -0.0073, -0.0262,  0.0016],
        [ 0.0087, -0.0027,  0.0080,  ..., -0.0120, -0.0374, -0.0053],
        [ 0.0099, -0.0023, -0.0006,  ...,  0.0019, -0.0376,  0.0090]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 15%|█▍        | 142/965 [04:27<24:44,  1.80s/it]

tensor([[ 0.0041,  0.0077, -0.0088,  ..., -0.0164, -0.0453, -0.0023],
        [ 0.0078, -0.0115,  0.0028,  ...,  0.0145, -0.0332,  0.0115],
        [-0.0040, -0.0056, -0.0050,  ...,  0.0117, -0.0305, -0.0140],
        ...,
        [-0.0072,  0.0174,  0.0043,  ..., -0.0161, -0.0273,  0.0166],
        [-0.0031,  0.0045,  0.0223,  ..., -0.0041, -0.0350, -0.0170],
        [ 0.0053, -0.0076,  0.0007,  ...,  0.0049, -0.0296, -0.0040]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 15%|█▍        | 143/965 [04:29<23:44,  1.73s/it]

tensor([[-0.0049,  0.0085,  0.0022,  ..., -0.0027, -0.0338,  0.0102],
        [ 0.0038,  0.0072, -0.0115,  ..., -0.0037, -0.0300,  0.0106],
        [-0.0076, -0.0357,  0.0161,  ...,  0.0033, -0.0175, -0.0262],
        ...,
        [ 0.0060,  0.0118,  0.0030,  ..., -0.0164, -0.0289,  0.0154],
        [ 0.0053,  0.0022, -0.0016,  ..., -0.0081, -0.0159,  0.0024],
        [-0.0075,  0.0043,  0.0063,  ..., -0.0172, -0.0310,  0.0185]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 15%|█▍        | 144/965 [04:31<23:09,  1.69s/it]

tensor([[-0.0028,  0.0096, -0.0105,  ..., -0.0119, -0.0321,  0.0218],
        [-0.0055, -0.0123, -0.0145,  ...,  0.0129, -0.0274,  0.0054],
        [-0.0086,  0.0113, -0.0179,  ..., -0.0272, -0.0427,  0.0136],
        ...,
        [-0.0169, -0.0126, -0.0030,  ...,  0.0050, -0.0189,  0.0068],
        [ 0.0014,  0.0013,  0.0068,  ...,  0.0064, -0.0416,  0.0033],
        [-0.0038,  0.0030, -0.0069,  ..., -0.0039, -0.0237,  0.0028]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 15%|█▌        | 145/965 [04:32<23:12,  1.70s/it]

tensor([[ 0.0019, -0.0019, -0.0047,  ...,  0.0043, -0.0272, -0.0105],
        [-0.0045, -0.0028,  0.0020,  ...,  0.0215, -0.0372, -0.0085],
        [ 0.0021, -0.0066, -0.0015,  ..., -0.0058, -0.0323, -0.0077],
        ...,
        [-0.0018,  0.0122, -0.0071,  ...,  0.0057, -0.0234,  0.0124],
        [-0.0022, -0.0115, -0.0147,  ..., -0.0142, -0.0380, -0.0018],
        [-0.0013,  0.0009, -0.0035,  ...,  0.0158, -0.0309, -0.0094]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 15%|█▌        | 146/965 [04:34<23:08,  1.70s/it]

tensor([[ 0.0006, -0.0027,  0.0027,  ...,  0.0025, -0.0325, -0.0063],
        [ 0.0020, -0.0069, -0.0034,  ...,  0.0011, -0.0351,  0.0057],
        [-0.0058,  0.0108, -0.0097,  ...,  0.0044, -0.0353,  0.0066],
        ...,
        [-0.0136, -0.0003, -0.0055,  ..., -0.0017, -0.0275, -0.0005],
        [-0.0030,  0.0050, -0.0095,  ..., -0.0115, -0.0236,  0.0068],
        [-0.0038, -0.0213, -0.0114,  ...,  0.0005, -0.0437, -0.0086]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 15%|█▌        | 147/965 [04:36<23:31,  1.73s/it]

tensor([[ 0.0029, -0.0066,  0.0162,  ..., -0.0145, -0.0252, -0.0076],
        [ 0.0073,  0.0029,  0.0047,  ...,  0.0032, -0.0384,  0.0023],
        [ 0.0051,  0.0073, -0.0015,  ...,  0.0101, -0.0314,  0.0020],
        ...,
        [ 0.0022,  0.0019, -0.0012,  ..., -0.0023, -0.0248,  0.0057],
        [ 0.0008, -0.0046, -0.0017,  ...,  0.0076, -0.0260,  0.0041],
        [-0.0002,  0.0056, -0.0058,  ..., -0.0033, -0.0265,  0.0096]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 15%|█▌        | 148/965 [04:37<23:32,  1.73s/it]

tensor([[-0.0138,  0.0057,  0.0220,  ...,  0.0111, -0.0266, -0.0035],
        [ 0.0091, -0.0055, -0.0097,  ...,  0.0055, -0.0243, -0.0120],
        [ 0.0005, -0.0102,  0.0072,  ...,  0.0033, -0.0240,  0.0028],
        ...,
        [ 0.0052, -0.0242,  0.0214,  ..., -0.0201, -0.0370, -0.0095],
        [ 0.0056, -0.0069,  0.0059,  ...,  0.0065, -0.0253, -0.0005],
        [ 0.0001,  0.0145, -0.0044,  ...,  0.0080, -0.0338,  0.0098]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 15%|█▌        | 149/965 [04:39<22:39,  1.67s/it]

tensor([[-0.0114, -0.0085,  0.0186,  ...,  0.0151, -0.0293, -0.0178],
        [ 0.0012, -0.0084,  0.0140,  ..., -0.0055, -0.0373,  0.0003],
        [-0.0067,  0.0110, -0.0113,  ...,  0.0024, -0.0452,  0.0264],
        ...,
        [ 0.0113,  0.0051,  0.0036,  ...,  0.0037, -0.0269, -0.0044],
        [-0.0082, -0.0153,  0.0118,  ..., -0.0025, -0.0441, -0.0097],
        [ 0.0064,  0.0045,  0.0021,  ..., -0.0077, -0.0256,  0.0070]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 16%|█▌        | 150/965 [04:41<23:40,  1.74s/it]

tensor([[-0.0050,  0.0005,  0.0037,  ..., -0.0184, -0.0251,  0.0017],
        [ 0.0074, -0.0105,  0.0004,  ...,  0.0051, -0.0271,  0.0008],
        [ 0.0070,  0.0072, -0.0020,  ..., -0.0066, -0.0331,  0.0070],
        ...,
        [-0.0034, -0.0171,  0.0028,  ...,  0.0113, -0.0238, -0.0026],
        [-0.0005, -0.0007,  0.0054,  ..., -0.0022, -0.0229, -0.0001],
        [-0.0003, -0.0003,  0.0048,  ...,  0.0008, -0.0251, -0.0035]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 16%|█▌        | 151/965 [04:43<25:40,  1.89s/it]

tensor([[-0.0005, -0.0114, -0.0093,  ...,  0.0208, -0.0236,  0.0030],
        [ 0.0052, -0.0023,  0.0178,  ..., -0.0095, -0.0392,  0.0131],
        [ 0.0123, -0.0114,  0.0054,  ...,  0.0318, -0.0201, -0.0234],
        ...,
        [ 0.0095,  0.0160,  0.0101,  ..., -0.0047, -0.0097,  0.0106],
        [ 0.0057, -0.0014,  0.0094,  ...,  0.0159, -0.0253, -0.0092],
        [-0.0175, -0.0116,  0.0182,  ...,  0.0201, -0.0231, -0.0163]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 16%|█▌        | 152/965 [04:45<26:36,  1.96s/it]

tensor([[-0.0064,  0.0113, -0.0088,  ...,  0.0071, -0.0343,  0.0079],
        [ 0.0066,  0.0108,  0.0014,  ..., -0.0090, -0.0288,  0.0110],
        [-0.0054,  0.0049,  0.0085,  ..., -0.0035, -0.0300,  0.0052],
        ...,
        [-0.0219, -0.0062,  0.0089,  ...,  0.0113, -0.0307, -0.0170],
        [-0.0082, -0.0112,  0.0089,  ...,  0.0021, -0.0234, -0.0117],
        [ 0.0113,  0.0186, -0.0039,  ..., -0.0063, -0.0386,  0.0025]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 16%|█▌        | 153/965 [04:48<27:55,  2.06s/it]

tensor([[ 0.0108,  0.0061,  0.0067,  ..., -0.0030, -0.0326,  0.0151],
        [ 0.0133,  0.0091,  0.0114,  ..., -0.0035, -0.0275, -0.0015],
        [ 0.0123, -0.0067, -0.0101,  ...,  0.0095, -0.0180,  0.0037],
        ...,
        [ 0.0125, -0.0064, -0.0007,  ...,  0.0081, -0.0116, -0.0049],
        [ 0.0037, -0.0007,  0.0165,  ..., -0.0032, -0.0223,  0.0002],
        [-0.0075,  0.0128,  0.0010,  ...,  0.0138, -0.0187, -0.0058]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 16%|█▌        | 154/965 [04:50<27:30,  2.03s/it]

tensor([[-0.0008,  0.0143, -0.0015,  ...,  0.0030, -0.0428, -0.0088],
        [-0.0024, -0.0039, -0.0027,  ..., -0.0121, -0.0383, -0.0066],
        [ 0.0022, -0.0069, -0.0028,  ...,  0.0029, -0.0117, -0.0075],
        ...,
        [ 0.0024,  0.0119, -0.0274,  ...,  0.0001, -0.0261, -0.0034],
        [ 0.0006,  0.0044, -0.0093,  ..., -0.0093, -0.0300,  0.0037],
        [-0.0017,  0.0162,  0.0053,  ...,  0.0074, -0.0259,  0.0103]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 16%|█▌        | 155/965 [04:52<28:15,  2.09s/it]

tensor([[ 1.1963e-02, -2.7438e-03, -2.2157e-02,  ...,  1.0323e-02,
         -3.5537e-02, -4.6316e-03],
        [ 4.5373e-03, -4.8699e-03, -3.9585e-03,  ...,  3.7317e-03,
         -2.4113e-02, -7.6424e-03],
        [-1.3501e-03, -8.3965e-03,  5.0772e-04,  ...,  1.0572e-02,
         -2.3579e-02, -7.5161e-03],
        ...,
        [ 9.2495e-05,  1.7288e-02, -1.4205e-02,  ..., -3.7957e-03,
         -3.1745e-02,  7.5564e-05],
        [-6.5561e-05,  1.2278e-03, -2.2048e-02,  ..., -1.8502e-04,
         -2.4622e-02,  8.8595e-03],
        [-4.3429e-03, -1.3924e-02,  1.1495e-02,  ..., -1.6181e-03,
         -3.7069e-02,  8.2089e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 16%|█▌        | 156/965 [04:54<27:56,  2.07s/it]

tensor([[ 0.0137, -0.0103, -0.0078,  ...,  0.0121, -0.0237, -0.0075],
        [ 0.0056, -0.0013, -0.0099,  ...,  0.0045, -0.0231, -0.0035],
        [ 0.0031, -0.0067, -0.0008,  ...,  0.0211, -0.0294, -0.0045],
        ...,
        [-0.0022, -0.0095, -0.0063,  ...,  0.0089, -0.0220, -0.0055],
        [ 0.0035, -0.0152, -0.0012,  ...,  0.0007, -0.0220, -0.0187],
        [ 0.0168,  0.0130,  0.0031,  ..., -0.0010, -0.0152,  0.0138]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 16%|█▋        | 157/965 [04:55<26:27,  1.96s/it]

tensor([[-0.0011, -0.0015, -0.0013,  ...,  0.0081, -0.0269, -0.0115],
        [ 0.0015, -0.0041, -0.0118,  ...,  0.0102, -0.0260,  0.0113],
        [-0.0031, -0.0034, -0.0062,  ...,  0.0067, -0.0321,  0.0078],
        ...,
        [-0.0018,  0.0194,  0.0088,  ..., -0.0204, -0.0342,  0.0251],
        [-0.0046, -0.0038, -0.0060,  ..., -0.0190, -0.0353,  0.0004],
        [-0.0130,  0.0038, -0.0157,  ...,  0.0092, -0.0209, -0.0012]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 16%|█▋        | 158/965 [04:57<25:55,  1.93s/it]

tensor([[-0.0011,  0.0013,  0.0023,  ..., -0.0069, -0.0385, -0.0036],
        [ 0.0169, -0.0120, -0.0063,  ..., -0.0070, -0.0313, -0.0034],
        [-0.0027, -0.0049, -0.0164,  ...,  0.0050, -0.0236,  0.0084],
        ...,
        [-0.0035,  0.0155, -0.0076,  ..., -0.0022, -0.0395,  0.0138],
        [ 0.0071,  0.0077,  0.0078,  ..., -0.0063, -0.0167,  0.0099],
        [ 0.0208, -0.0015, -0.0035,  ..., -0.0142, -0.0262,  0.0150]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 16%|█▋        | 159/965 [04:59<25:55,  1.93s/it]

tensor([[-0.0171, -0.0108,  0.0103,  ...,  0.0086, -0.0172, -0.0098],
        [ 0.0109,  0.0032,  0.0006,  ..., -0.0070, -0.0304,  0.0002],
        [-0.0006, -0.0079,  0.0094,  ..., -0.0036, -0.0340, -0.0074],
        ...,
        [-0.0005, -0.0141,  0.0092,  ..., -0.0048, -0.0143, -0.0126],
        [ 0.0139, -0.0252, -0.0006,  ...,  0.0042, -0.0288, -0.0120],
        [ 0.0067, -0.0068,  0.0007,  ..., -0.0016, -0.0280,  0.0008]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 17%|█▋        | 160/965 [05:01<25:22,  1.89s/it]

tensor([[-1.5555e-02,  1.7949e-03,  2.0702e-02,  ...,  5.1381e-03,
         -2.5886e-02, -1.1714e-03],
        [-5.8990e-03,  6.1299e-03,  1.9210e-02,  ..., -9.0812e-03,
         -1.2191e-02, -4.5761e-03],
        [ 2.2237e-04,  1.8215e-02,  3.6904e-04,  ..., -1.4579e-02,
         -1.3439e-02,  1.2029e-02],
        ...,
        [ 1.6417e-03,  9.4557e-03, -2.2580e-03,  ...,  7.7848e-03,
         -2.4739e-02,  1.9016e-03],
        [ 1.0124e-02,  9.3561e-03,  7.2419e-03,  ..., -6.9619e-03,
         -1.4536e-02,  1.1119e-02],
        [ 1.1346e-02,  6.4796e-03,  4.9860e-05,  ..., -3.6091e-03,
         -3.4239e-02,  1.3395e-02]], device='cuda:0', grad_fn=<AddmmBackward0>)


 17%|█▋        | 161/965 [05:03<27:11,  2.03s/it]

tensor([[-0.0035, -0.0064,  0.0123,  ...,  0.0094, -0.0304,  0.0055],
        [-0.0112, -0.0140,  0.0133,  ...,  0.0158, -0.0122, -0.0184],
        [ 0.0016, -0.0017,  0.0092,  ..., -0.0173, -0.0077,  0.0166],
        ...,
        [-0.0005, -0.0050, -0.0065,  ..., -0.0157, -0.0249,  0.0061],
        [-0.0070, -0.0100,  0.0028,  ...,  0.0147, -0.0213,  0.0054],
        [-0.0019, -0.0041,  0.0011,  ...,  0.0098, -0.0214,  0.0119]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 17%|█▋        | 162/965 [05:05<25:39,  1.92s/it]

tensor([[ 0.0072, -0.0018, -0.0019,  ...,  0.0002, -0.0141, -0.0059],
        [-0.0034,  0.0052, -0.0032,  ..., -0.0034, -0.0255,  0.0028],
        [-0.0094,  0.0091,  0.0038,  ...,  0.0037, -0.0268,  0.0057],
        ...,
        [ 0.0024, -0.0005, -0.0071,  ...,  0.0019, -0.0113, -0.0016],
        [-0.0075, -0.0161,  0.0071,  ..., -0.0041, -0.0244, -0.0106],
        [ 0.0056,  0.0174,  0.0187,  ..., -0.0179, -0.0368,  0.0001]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 17%|█▋        | 163/965 [05:07<25:59,  1.94s/it]

tensor([[-3.0706e-03, -1.7792e-02, -9.1128e-04,  ...,  5.6702e-03,
         -1.1883e-02, -1.5164e-02],
        [ 5.2279e-03, -7.5509e-03,  1.2383e-02,  ...,  2.2384e-02,
         -2.5975e-02, -1.4910e-02],
        [-2.4111e-03,  7.7086e-03, -8.2491e-03,  ..., -7.5134e-03,
         -1.1632e-02,  5.8951e-03],
        ...,
        [-5.0339e-03, -8.6062e-03,  6.6272e-03,  ...,  1.0256e-02,
         -1.6163e-02,  7.8222e-03],
        [ 6.1914e-03,  5.1787e-03, -3.4638e-05,  ..., -1.3143e-03,
         -1.8982e-02,  7.8490e-03],
        [-7.6734e-03, -1.9751e-03,  1.4048e-02,  ...,  1.2610e-02,
         -2.5903e-02, -6.4102e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 17%|█▋        | 164/965 [05:09<26:06,  1.96s/it]

tensor([[-0.0090, -0.0076, -0.0074,  ...,  0.0127, -0.0073, -0.0163],
        [-0.0123, -0.0166, -0.0020,  ...,  0.0014, -0.0282,  0.0010],
        [-0.0005,  0.0061, -0.0025,  ...,  0.0058, -0.0301, -0.0040],
        ...,
        [ 0.0034,  0.0069, -0.0054,  ..., -0.0038, -0.0240,  0.0006],
        [ 0.0275,  0.0094, -0.0110,  ...,  0.0065, -0.0320, -0.0025],
        [ 0.0023,  0.0031, -0.0126,  ...,  0.0067, -0.0208,  0.0045]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 17%|█▋        | 165/965 [05:11<25:43,  1.93s/it]

tensor([[ 0.0067, -0.0044, -0.0009,  ...,  0.0096, -0.0253, -0.0094],
        [ 0.0046, -0.0073, -0.0045,  ...,  0.0129, -0.0242, -0.0146],
        [-0.0025,  0.0034, -0.0030,  ..., -0.0014, -0.0262,  0.0038],
        ...,
        [-0.0061,  0.0165, -0.0133,  ..., -0.0032, -0.0224, -0.0087],
        [-0.0015,  0.0030, -0.0080,  ...,  0.0059, -0.0260, -0.0029],
        [-0.0063,  0.0080,  0.0067,  ...,  0.0057, -0.0207,  0.0012]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 17%|█▋        | 166/965 [05:13<25:22,  1.91s/it]

tensor([[-0.0078, -0.0128,  0.0070,  ...,  0.0093, -0.0245,  0.0031],
        [ 0.0080, -0.0013, -0.0034,  ...,  0.0137, -0.0367, -0.0105],
        [-0.0017, -0.0157,  0.0018,  ..., -0.0108, -0.0283, -0.0110],
        ...,
        [-0.0110,  0.0083, -0.0015,  ...,  0.0083, -0.0151,  0.0144],
        [ 0.0042, -0.0122,  0.0002,  ..., -0.0056, -0.0266, -0.0067],
        [ 0.0040, -0.0086,  0.0024,  ...,  0.0053, -0.0231, -0.0085]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 17%|█▋        | 167/965 [05:15<26:21,  1.98s/it]

tensor([[-0.0090,  0.0176, -0.0080,  ...,  0.0100, -0.0410,  0.0193],
        [ 0.0040, -0.0065,  0.0018,  ...,  0.0014, -0.0206, -0.0067],
        [-0.0184, -0.0271,  0.0045,  ...,  0.0146, -0.0184, -0.0015],
        ...,
        [ 0.0111,  0.0040,  0.0058,  ..., -0.0103, -0.0210,  0.0003],
        [-0.0146,  0.0075, -0.0051,  ..., -0.0009, -0.0383,  0.0120],
        [ 0.0050,  0.0093, -0.0167,  ..., -0.0058, -0.0129,  0.0053]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 17%|█▋        | 168/965 [05:17<24:57,  1.88s/it]

tensor([[-0.0080, -0.0100,  0.0036,  ..., -0.0068, -0.0291, -0.0072],
        [-0.0033, -0.0064, -0.0011,  ..., -0.0002, -0.0188, -0.0060],
        [-0.0067, -0.0158,  0.0057,  ...,  0.0029, -0.0252, -0.0116],
        ...,
        [-0.0013, -0.0084, -0.0104,  ...,  0.0133, -0.0163, -0.0020],
        [ 0.0019,  0.0073,  0.0012,  ...,  0.0026, -0.0155,  0.0020],
        [ 0.0037, -0.0094, -0.0130,  ...,  0.0133, -0.0157, -0.0095]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 18%|█▊        | 169/965 [05:19<25:18,  1.91s/it]

tensor([[ 0.0012,  0.0053,  0.0017,  ...,  0.0013, -0.0234,  0.0094],
        [-0.0038, -0.0086,  0.0132,  ..., -0.0084, -0.0309, -0.0049],
        [-0.0138,  0.0050, -0.0023,  ..., -0.0043, -0.0223,  0.0074],
        ...,
        [-0.0203,  0.0088, -0.0047,  ..., -0.0012, -0.0462,  0.0052],
        [-0.0099, -0.0068,  0.0040,  ...,  0.0034, -0.0131, -0.0239],
        [-0.0036, -0.0002, -0.0040,  ..., -0.0052, -0.0285, -0.0071]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 18%|█▊        | 170/965 [05:21<26:42,  2.02s/it]

tensor([[ 7.9681e-03, -1.1464e-02, -2.5213e-03,  ..., -1.6030e-03,
         -1.3088e-02,  1.2384e-03],
        [-2.5661e-03, -7.3280e-03, -1.0942e-02,  ..., -1.9782e-02,
         -3.2553e-02,  1.5423e-02],
        [ 1.0262e-02,  1.3173e-02, -1.0031e-02,  ..., -1.2448e-02,
         -2.6587e-02,  7.2302e-03],
        ...,
        [-3.7204e-05,  6.0498e-03,  9.3919e-03,  ..., -3.6502e-03,
         -1.5389e-02,  2.8584e-03],
        [ 1.6093e-02,  4.8616e-04,  5.7900e-04,  ...,  2.2247e-03,
         -3.1398e-02,  7.7500e-03],
        [-3.4796e-03, -3.6314e-03, -7.6739e-03,  ...,  5.0558e-03,
         -2.2173e-02, -9.3692e-04]], device='cuda:0', grad_fn=<AddmmBackward0>)


 18%|█▊        | 171/965 [05:23<26:24,  2.00s/it]

tensor([[ 0.0054, -0.0046,  0.0004,  ..., -0.0022, -0.0202, -0.0042],
        [-0.0089, -0.0011,  0.0032,  ..., -0.0022, -0.0173,  0.0133],
        [ 0.0062, -0.0126,  0.0040,  ...,  0.0021,  0.0028, -0.0070],
        ...,
        [-0.0173,  0.0005,  0.0142,  ...,  0.0083, -0.0253, -0.0047],
        [ 0.0129,  0.0055,  0.0041,  ...,  0.0002, -0.0246,  0.0022],
        [ 0.0051,  0.0067, -0.0006,  ..., -0.0101, -0.0208,  0.0095]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 18%|█▊        | 172/965 [05:24<24:36,  1.86s/it]

tensor([[ 0.0009, -0.0116, -0.0086,  ...,  0.0059, -0.0088,  0.0011],
        [-0.0044,  0.0120, -0.0035,  ..., -0.0098, -0.0188,  0.0044],
        [-0.0023,  0.0040, -0.0046,  ...,  0.0051, -0.0181,  0.0049],
        ...,
        [ 0.0071,  0.0125,  0.0075,  ...,  0.0015, -0.0171,  0.0024],
        [ 0.0042, -0.0022, -0.0015,  ...,  0.0007, -0.0172,  0.0025],
        [-0.0068, -0.0010, -0.0137,  ...,  0.0155, -0.0271, -0.0010]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 18%|█▊        | 173/965 [05:26<25:06,  1.90s/it]

tensor([[ 0.0115,  0.0029, -0.0218,  ..., -0.0099, -0.0085,  0.0034],
        [-0.0115,  0.0122,  0.0058,  ...,  0.0037, -0.0153,  0.0182],
        [-0.0173,  0.0013, -0.0082,  ..., -0.0137, -0.0162,  0.0072],
        ...,
        [ 0.0064,  0.0233, -0.0009,  ..., -0.0210, -0.0142,  0.0123],
        [-0.0129, -0.0132,  0.0099,  ...,  0.0092, -0.0136, -0.0192],
        [-0.0025,  0.0136, -0.0068,  ..., -0.0115, -0.0126, -0.0011]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 18%|█▊        | 174/965 [05:28<24:37,  1.87s/it]

tensor([[-0.0078,  0.0182, -0.0035,  ..., -0.0236, -0.0321,  0.0229],
        [-0.0049, -0.0018, -0.0075,  ..., -0.0186, -0.0146, -0.0075],
        [-0.0155,  0.0122, -0.0053,  ..., -0.0053, -0.0088,  0.0068],
        ...,
        [ 0.0031,  0.0191,  0.0077,  ..., -0.0020, -0.0167,  0.0147],
        [-0.0020,  0.0104, -0.0189,  ..., -0.0033, -0.0147,  0.0059],
        [ 0.0030, -0.0042, -0.0064,  ...,  0.0090, -0.0275, -0.0093]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 18%|█▊        | 175/965 [05:30<23:19,  1.77s/it]

tensor([[ 0.0011,  0.0004, -0.0022,  ...,  0.0014, -0.0246, -0.0017],
        [-0.0088, -0.0060, -0.0140,  ...,  0.0034, -0.0169,  0.0054],
        [ 0.0055,  0.0080,  0.0053,  ..., -0.0021, -0.0271,  0.0089],
        ...,
        [-0.0042,  0.0056, -0.0028,  ..., -0.0105, -0.0214,  0.0004],
        [-0.0198,  0.0144, -0.0054,  ...,  0.0008, -0.0347,  0.0177],
        [-0.0043,  0.0051, -0.0225,  ..., -0.0050, -0.0047,  0.0054]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 18%|█▊        | 176/965 [05:32<24:52,  1.89s/it]

tensor([[ 5.9408e-03, -1.0632e-02,  2.1308e-05,  ...,  2.4245e-03,
         -2.3676e-02, -5.2657e-03],
        [-2.9777e-03, -9.1523e-03,  4.0150e-03,  ..., -5.0190e-03,
         -1.8602e-02, -1.0956e-02],
        [ 1.8186e-02,  1.1433e-02, -1.6887e-03,  ...,  2.9634e-03,
         -1.2111e-02,  4.7763e-03],
        ...,
        [-6.6078e-03,  1.0161e-02, -1.0185e-02,  ...,  9.7897e-03,
         -2.9053e-02,  1.9767e-03],
        [ 2.9124e-03, -3.4933e-03, -8.1187e-04,  ...,  1.8415e-04,
         -2.7556e-02, -4.4934e-04],
        [ 5.0198e-03,  3.8710e-02,  7.4114e-03,  ..., -1.1450e-02,
         -1.9949e-02,  2.8214e-02]], device='cuda:0', grad_fn=<AddmmBackward0>)


 18%|█▊        | 177/965 [05:33<23:24,  1.78s/it]

tensor([[-0.0033, -0.0044,  0.0037,  ...,  0.0099, -0.0179, -0.0136],
        [ 0.0098,  0.0007, -0.0017,  ..., -0.0098, -0.0185, -0.0004],
        [-0.0073, -0.0021,  0.0060,  ...,  0.0018, -0.0154,  0.0015],
        ...,
        [ 0.0031,  0.0055, -0.0082,  ...,  0.0030, -0.0160, -0.0147],
        [ 0.0081, -0.0041, -0.0018,  ...,  0.0005, -0.0239, -0.0060],
        [-0.0093,  0.0036, -0.0110,  ...,  0.0117, -0.0263, -0.0082]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 18%|█▊        | 178/965 [05:35<24:46,  1.89s/it]

tensor([[ 0.0006,  0.0101, -0.0023,  ...,  0.0149, -0.0331,  0.0094],
        [-0.0002, -0.0179,  0.0047,  ...,  0.0174, -0.0163,  0.0010],
        [ 0.0020, -0.0211, -0.0033,  ...,  0.0095, -0.0328, -0.0205],
        ...,
        [-0.0031,  0.0006,  0.0044,  ..., -0.0014, -0.0268,  0.0103],
        [ 0.0008,  0.0209, -0.0112,  ..., -0.0130, -0.0270,  0.0228],
        [ 0.0019, -0.0034, -0.0004,  ...,  0.0064, -0.0188,  0.0043]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 19%|█▊        | 179/965 [05:37<24:40,  1.88s/it]

tensor([[-0.0136, -0.0025, -0.0141,  ...,  0.0171, -0.0221,  0.0120],
        [-0.0092,  0.0063, -0.0115,  ...,  0.0032, -0.0258,  0.0090],
        [-0.0120,  0.0023,  0.0051,  ..., -0.0056, -0.0200,  0.0145],
        ...,
        [-0.0020,  0.0015,  0.0087,  ...,  0.0003, -0.0159, -0.0135],
        [-0.0013, -0.0081, -0.0036,  ...,  0.0140, -0.0105, -0.0080],
        [ 0.0168, -0.0009, -0.0016,  ..., -0.0143, -0.0196,  0.0034]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 19%|█▊        | 180/965 [05:39<25:20,  1.94s/it]

tensor([[-0.0071, -0.0007,  0.0009,  ..., -0.0004, -0.0169, -0.0057],
        [ 0.0010, -0.0026, -0.0081,  ..., -0.0032, -0.0090, -0.0068],
        [ 0.0065,  0.0114,  0.0090,  ...,  0.0056, -0.0300,  0.0081],
        ...,
        [ 0.0045, -0.0008,  0.0066,  ..., -0.0022, -0.0148, -0.0044],
        [ 0.0042,  0.0026, -0.0020,  ..., -0.0180, -0.0144,  0.0063],
        [ 0.0010, -0.0040,  0.0025,  ...,  0.0045, -0.0143,  0.0011]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 19%|█▉        | 181/965 [05:41<25:26,  1.95s/it]

tensor([[ 2.2633e-03, -7.6977e-03,  4.8568e-03,  ..., -8.9047e-04,
         -9.6333e-03, -1.1159e-02],
        [-1.8981e-03,  5.8223e-03, -7.9776e-03,  ...,  2.8691e-03,
         -2.6951e-02,  8.0201e-03],
        [-7.8753e-03, -2.1353e-05, -1.2109e-02,  ...,  2.4613e-03,
         -1.2113e-02,  9.4573e-03],
        ...,
        [-3.4065e-04, -1.2464e-02, -3.2823e-03,  ...,  6.6711e-03,
         -1.0729e-02, -4.2951e-03],
        [-2.4634e-03, -1.1474e-02,  1.5225e-03,  ...,  6.7343e-03,
         -1.1090e-02, -2.3032e-03],
        [ 7.8052e-03, -4.9542e-03,  7.7071e-03,  ..., -2.2506e-03,
         -1.4712e-02, -4.6556e-04]], device='cuda:0', grad_fn=<AddmmBackward0>)


 19%|█▉        | 182/965 [05:43<25:03,  1.92s/it]

tensor([[-0.0041, -0.0072, -0.0072,  ...,  0.0052, -0.0021, -0.0010],
        [-0.0018, -0.0017,  0.0267,  ...,  0.0062, -0.0133, -0.0283],
        [-0.0079, -0.0103, -0.0076,  ...,  0.0202, -0.0211, -0.0075],
        ...,
        [ 0.0058, -0.0076,  0.0062,  ...,  0.0061, -0.0136, -0.0119],
        [ 0.0031, -0.0132,  0.0029,  ..., -0.0042, -0.0071, -0.0170],
        [ 0.0123, -0.0129,  0.0045,  ...,  0.0004, -0.0153, -0.0090]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 19%|█▉        | 183/965 [05:45<24:46,  1.90s/it]

tensor([[-0.0007, -0.0063,  0.0135,  ...,  0.0043, -0.0220, -0.0124],
        [ 0.0033, -0.0017, -0.0054,  ..., -0.0040, -0.0125,  0.0005],
        [-0.0009,  0.0003,  0.0067,  ...,  0.0043, -0.0117,  0.0007],
        ...,
        [-0.0004, -0.0021, -0.0092,  ..., -0.0005, -0.0040, -0.0090],
        [-0.0011, -0.0014, -0.0142,  ..., -0.0036, -0.0153,  0.0028],
        [-0.0062,  0.0012,  0.0003,  ..., -0.0003, -0.0184,  0.0018]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 19%|█▉        | 184/965 [05:47<26:03,  2.00s/it]

tensor([[ 0.0026,  0.0299, -0.0065,  ..., -0.0216, -0.0251,  0.0222],
        [-0.0042,  0.0016,  0.0036,  ...,  0.0060, -0.0215,  0.0134],
        [ 0.0022, -0.0046,  0.0050,  ..., -0.0100, -0.0142, -0.0074],
        ...,
        [-0.0010, -0.0049,  0.0039,  ...,  0.0006, -0.0250, -0.0130],
        [ 0.0099, -0.0003, -0.0055,  ..., -0.0053, -0.0170,  0.0037],
        [-0.0052, -0.0048, -0.0028,  ..., -0.0019, -0.0137, -0.0047]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 19%|█▉        | 185/965 [05:49<26:14,  2.02s/it]

tensor([[ 0.0070,  0.0070,  0.0012,  ..., -0.0075, -0.0075,  0.0012],
        [ 0.0063,  0.0128, -0.0025,  ..., -0.0206, -0.0166, -0.0009],
        [ 0.0101,  0.0002, -0.0050,  ..., -0.0051, -0.0097, -0.0022],
        ...,
        [-0.0105, -0.0033,  0.0019,  ...,  0.0131, -0.0115, -0.0154],
        [ 0.0104, -0.0018,  0.0054,  ..., -0.0149, -0.0194, -0.0057],
        [-0.0084, -0.0268,  0.0195,  ..., -0.0075, -0.0092, -0.0134]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 19%|█▉        | 186/965 [05:51<25:14,  1.94s/it]

tensor([[ 0.0113, -0.0016, -0.0111,  ..., -0.0230, -0.0143, -0.0100],
        [-0.0066,  0.0122,  0.0118,  ..., -0.0097, -0.0136,  0.0180],
        [ 0.0094, -0.0031,  0.0039,  ...,  0.0035, -0.0242, -0.0035],
        ...,
        [-0.0017,  0.0085, -0.0043,  ..., -0.0089, -0.0239,  0.0102],
        [-0.0080, -0.0058,  0.0038,  ...,  0.0059, -0.0192,  0.0041],
        [-0.0202, -0.0097,  0.0018,  ...,  0.0276, -0.0138, -0.0228]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 19%|█▉        | 187/965 [05:53<24:43,  1.91s/it]

tensor([[-6.1587e-03, -9.8598e-04, -7.1851e-03,  ..., -1.5665e-02,
         -3.5512e-02,  5.2806e-03],
        [ 1.7177e-03,  1.6222e-03,  9.9059e-04,  ...,  2.6307e-03,
         -2.1383e-02,  4.3696e-03],
        [ 7.4181e-03,  5.2805e-03,  4.9663e-03,  ...,  2.5477e-03,
         -5.2131e-03,  7.2066e-03],
        ...,
        [-1.7575e-02, -9.9898e-03,  9.4760e-03,  ..., -3.2220e-03,
         -2.3958e-02,  7.3342e-03],
        [ 8.2630e-04,  6.9075e-03,  1.0414e-03,  ..., -4.5192e-03,
         -2.1723e-02,  1.0099e-02],
        [ 3.4264e-03,  9.5572e-03,  2.1429e-03,  ...,  7.5847e-06,
         -2.3204e-02, -5.5456e-04]], device='cuda:0', grad_fn=<AddmmBackward0>)


 19%|█▉        | 188/965 [05:55<26:02,  2.01s/it]

tensor([[-0.0190,  0.0058,  0.0090,  ...,  0.0140, -0.0166,  0.0001],
        [ 0.0055,  0.0181, -0.0026,  ...,  0.0026, -0.0289,  0.0091],
        [-0.0028,  0.0029, -0.0073,  ...,  0.0020, -0.0157,  0.0054],
        ...,
        [-0.0294, -0.0117,  0.0041,  ..., -0.0015, -0.0346,  0.0006],
        [-0.0043,  0.0140, -0.0002,  ...,  0.0053, -0.0132,  0.0086],
        [-0.0017,  0.0129, -0.0066,  ..., -0.0008, -0.0235,  0.0104]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 20%|█▉        | 189/965 [05:57<26:12,  2.03s/it]

tensor([[-0.0235,  0.0108, -0.0177,  ...,  0.0098, -0.0122,  0.0127],
        [-0.0019, -0.0026,  0.0034,  ...,  0.0054, -0.0112,  0.0083],
        [-0.0164, -0.0002,  0.0202,  ...,  0.0133, -0.0231, -0.0072],
        ...,
        [-0.0007,  0.0011,  0.0043,  ...,  0.0051, -0.0169,  0.0045],
        [-0.0044,  0.0003, -0.0110,  ...,  0.0112, -0.0279,  0.0093],
        [ 0.0012,  0.0184, -0.0086,  ..., -0.0110, -0.0168,  0.0104]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 20%|█▉        | 190/965 [05:59<25:16,  1.96s/it]

tensor([[ 0.0036,  0.0091, -0.0106,  ...,  0.0019, -0.0277,  0.0106],
        [ 0.0008, -0.0083, -0.0028,  ...,  0.0187, -0.0190, -0.0089],
        [-0.0046,  0.0146, -0.0111,  ...,  0.0210, -0.0158,  0.0023],
        ...,
        [ 0.0050, -0.0012,  0.0013,  ...,  0.0043, -0.0152,  0.0008],
        [-0.0121,  0.0019,  0.0130,  ..., -0.0007, -0.0030, -0.0147],
        [-0.0038,  0.0245,  0.0152,  ...,  0.0047, -0.0275,  0.0049]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 20%|█▉        | 191/965 [06:01<25:10,  1.95s/it]

tensor([[ 0.0077, -0.0134, -0.0047,  ...,  0.0080, -0.0196,  0.0005],
        [ 0.0027,  0.0182, -0.0014,  ..., -0.0091, -0.0032,  0.0139],
        [-0.0090,  0.0220,  0.0219,  ..., -0.0005, -0.0119,  0.0086],
        ...,
        [-0.0072, -0.0026, -0.0056,  ...,  0.0056, -0.0093,  0.0064],
        [ 0.0091, -0.0051,  0.0015,  ...,  0.0016, -0.0167, -0.0057],
        [-0.0001,  0.0004,  0.0063,  ...,  0.0039, -0.0115, -0.0064]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 20%|█▉        | 192/965 [06:03<24:40,  1.92s/it]

tensor([[-0.0067, -0.0167,  0.0008,  ..., -0.0191, -0.0280, -0.0014],
        [-0.0073,  0.0047,  0.0171,  ..., -0.0023,  0.0002,  0.0018],
        [-0.0049, -0.0093,  0.0036,  ...,  0.0086, -0.0144, -0.0098],
        ...,
        [ 0.0061,  0.0119, -0.0033,  ..., -0.0054, -0.0153,  0.0010],
        [ 0.0006,  0.0187, -0.0092,  ...,  0.0063, -0.0288,  0.0097],
        [ 0.0065, -0.0029, -0.0020,  ...,  0.0027, -0.0122, -0.0077]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 20%|██        | 193/965 [06:05<23:49,  1.85s/it]

tensor([[-0.0059,  0.0061, -0.0089,  ...,  0.0038, -0.0157, -0.0026],
        [-0.0151, -0.0030, -0.0076,  ...,  0.0115, -0.0279, -0.0005],
        [-0.0072,  0.0082, -0.0107,  ..., -0.0075, -0.0264,  0.0112],
        ...,
        [ 0.0022, -0.0106, -0.0036,  ...,  0.0125, -0.0165, -0.0102],
        [ 0.0047, -0.0035,  0.0089,  ...,  0.0046, -0.0063, -0.0059],
        [ 0.0018, -0.0053,  0.0123,  ..., -0.0018, -0.0136, -0.0004]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 20%|██        | 194/965 [06:06<23:41,  1.84s/it]

tensor([[-0.0006,  0.0046, -0.0053,  ..., -0.0014, -0.0143,  0.0059],
        [-0.0187,  0.0059, -0.0062,  ...,  0.0111, -0.0176,  0.0169],
        [-0.0096, -0.0066,  0.0066,  ..., -0.0073, -0.0158, -0.0113],
        ...,
        [-0.0084,  0.0111, -0.0073,  ...,  0.0039, -0.0219,  0.0117],
        [ 0.0065, -0.0005, -0.0087,  ..., -0.0065, -0.0139, -0.0006],
        [-0.0020,  0.0050, -0.0016,  ...,  0.0008, -0.0145,  0.0054]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 20%|██        | 195/965 [06:08<23:17,  1.82s/it]

tensor([[-3.2520e-03,  2.0558e-03, -1.0812e-02,  ..., -2.6705e-03,
         -1.4434e-02,  3.0778e-03],
        [-1.1483e-02, -1.8621e-03, -2.1404e-03,  ...,  9.1341e-03,
         -2.2540e-02,  1.2212e-02],
        [-4.1693e-03, -2.4758e-03, -4.2425e-05,  ...,  3.1219e-03,
         -2.0852e-02, -2.6304e-03],
        ...,
        [ 6.5259e-03, -7.8166e-03,  3.1163e-03,  ..., -3.4020e-04,
         -7.9896e-03, -1.5138e-03],
        [ 7.2048e-03, -3.6528e-03,  4.9596e-03,  ...,  5.9345e-03,
         -1.8474e-02, -7.6547e-03],
        [-7.9889e-04, -4.2091e-04, -1.3421e-02,  ..., -1.0636e-02,
         -1.6162e-02,  5.7843e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 20%|██        | 196/965 [06:10<24:45,  1.93s/it]

tensor([[-1.2160e-02,  6.8207e-04, -4.8323e-03,  ...,  3.3707e-03,
         -2.6465e-02,  1.3129e-02],
        [-6.2970e-03, -5.8534e-03, -4.8839e-03,  ...,  1.8815e-02,
         -2.1734e-02, -1.5370e-02],
        [-4.5805e-03,  5.5530e-03, -3.6136e-03,  ..., -1.2243e-02,
         -2.8570e-02,  5.2438e-03],
        ...,
        [-1.8858e-03, -5.3292e-03, -2.6492e-03,  ...,  8.5553e-04,
         -3.2656e-02, -3.6304e-03],
        [-8.4538e-05,  9.1129e-03, -1.2751e-02,  ..., -5.8139e-03,
         -1.9207e-02,  1.4335e-03],
        [-4.7932e-03, -1.7948e-03,  4.6244e-03,  ..., -2.8251e-03,
         -2.0478e-02, -3.0874e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 20%|██        | 197/965 [06:12<24:50,  1.94s/it]

tensor([[ 5.8074e-03, -7.5509e-03, -6.1636e-04,  ..., -1.1585e-03,
         -1.5817e-02, -4.4918e-03],
        [-1.0379e-02,  1.8283e-02, -3.0263e-03,  ..., -2.5958e-02,
         -9.8005e-03,  2.2575e-02],
        [ 5.5292e-03,  1.4473e-02,  3.4747e-04,  ..., -7.8334e-03,
         -2.2316e-02,  3.6406e-03],
        ...,
        [ 7.7926e-03, -9.5263e-03,  5.7414e-03,  ..., -4.1374e-03,
         -1.3182e-02, -2.0058e-03],
        [ 2.1946e-04, -4.4063e-05, -3.6421e-03,  ..., -1.6534e-03,
         -1.2818e-02, -3.7409e-03],
        [ 1.4734e-03,  8.5532e-04, -1.7768e-02,  ..., -1.5331e-02,
         -3.7644e-03, -1.1622e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 21%|██        | 198/965 [06:14<24:32,  1.92s/it]

tensor([[-0.0081, -0.0018,  0.0123,  ...,  0.0113, -0.0092, -0.0123],
        [ 0.0033,  0.0013, -0.0048,  ..., -0.0042, -0.0190,  0.0107],
        [ 0.0107, -0.0058,  0.0013,  ..., -0.0080, -0.0067, -0.0019],
        ...,
        [ 0.0008, -0.0017, -0.0013,  ..., -0.0064, -0.0182,  0.0097],
        [-0.0031,  0.0110, -0.0164,  ...,  0.0015, -0.0203,  0.0106],
        [ 0.0086, -0.0024,  0.0016,  ..., -0.0033, -0.0062, -0.0002]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 21%|██        | 199/965 [06:16<24:39,  1.93s/it]

tensor([[ 0.0005, -0.0124, -0.0026,  ...,  0.0110, -0.0030, -0.0132],
        [ 0.0124,  0.0100, -0.0001,  ..., -0.0165, -0.0198,  0.0078],
        [-0.0043,  0.0013, -0.0067,  ...,  0.0062, -0.0140,  0.0077],
        ...,
        [-0.0012, -0.0134,  0.0038,  ...,  0.0029, -0.0094, -0.0121],
        [-0.0083,  0.0186, -0.0060,  ...,  0.0017, -0.0319,  0.0016],
        [ 0.0045, -0.0024,  0.0018,  ...,  0.0035, -0.0137, -0.0076]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 21%|██        | 200/965 [06:18<24:43,  1.94s/it]

tensor([[-2.5771e-03, -1.0180e-03, -3.9294e-03,  ..., -1.2611e-02,
         -2.7632e-02, -1.0435e-02],
        [-6.2100e-03,  2.3525e-05,  1.0205e-03,  ...,  4.3024e-03,
         -1.7090e-02,  1.0947e-02],
        [ 5.7089e-03, -1.0822e-04, -2.1564e-03,  ...,  1.5120e-03,
         -1.5336e-02,  9.3728e-06],
        ...,
        [ 7.4331e-03,  9.5890e-03, -2.4428e-03,  ..., -1.9889e-03,
         -2.0407e-02,  3.7374e-03],
        [-3.6477e-03, -3.1871e-03, -8.7576e-03,  ...,  5.3055e-03,
         -7.6249e-03, -3.1080e-03],
        [ 3.6148e-03, -1.0855e-05, -3.3446e-03,  ...,  7.5675e-03,
         -7.8418e-03, -2.2167e-04]], device='cuda:0', grad_fn=<AddmmBackward0>)


 21%|██        | 201/965 [06:20<24:17,  1.91s/it]

tensor([[-0.0060,  0.0011,  0.0069,  ...,  0.0056, -0.0159,  0.0010],
        [ 0.0082,  0.0052,  0.0006,  ..., -0.0003, -0.0137,  0.0024],
        [ 0.0084, -0.0041,  0.0061,  ...,  0.0051, -0.0049, -0.0042],
        ...,
        [ 0.0081,  0.0143, -0.0046,  ..., -0.0081, -0.0107, -0.0007],
        [-0.0070,  0.0126, -0.0026,  ..., -0.0217, -0.0367,  0.0042],
        [ 0.0012, -0.0036,  0.0136,  ..., -0.0011, -0.0182, -0.0021]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 21%|██        | 202/965 [06:22<26:27,  2.08s/it]

tensor([[ 0.0043, -0.0052, -0.0098,  ...,  0.0084, -0.0112, -0.0032],
        [-0.0062,  0.0054,  0.0049,  ...,  0.0058, -0.0162,  0.0059],
        [ 0.0026,  0.0004,  0.0106,  ..., -0.0271, -0.0192,  0.0069],
        ...,
        [ 0.0071, -0.0037,  0.0045,  ...,  0.0038, -0.0130, -0.0009],
        [ 0.0035, -0.0008,  0.0103,  ...,  0.0015, -0.0152,  0.0042],
        [ 0.0086, -0.0107,  0.0089,  ...,  0.0034, -0.0061, -0.0038]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 21%|██        | 203/965 [06:25<26:29,  2.09s/it]

tensor([[-0.0007,  0.0011, -0.0012,  ...,  0.0017, -0.0160,  0.0005],
        [ 0.0095, -0.0009,  0.0074,  ..., -0.0062, -0.0095, -0.0070],
        [-0.0051,  0.0007,  0.0102,  ..., -0.0057, -0.0280,  0.0047],
        ...,
        [-0.0036,  0.0060, -0.0033,  ..., -0.0039, -0.0109,  0.0069],
        [-0.0063, -0.0039,  0.0038,  ..., -0.0034, -0.0101,  0.0070],
        [-0.0015, -0.0089,  0.0092,  ...,  0.0052, -0.0109, -0.0135]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 21%|██        | 204/965 [06:27<27:05,  2.14s/it]

tensor([[ 1.7755e-03,  2.4099e-02,  1.4869e-04,  ..., -2.2774e-02,
         -7.3926e-03,  2.4312e-02],
        [-1.7998e-04, -5.5956e-04,  4.7270e-05,  ..., -1.4723e-03,
         -2.0860e-02, -8.1479e-04],
        [-8.7127e-03, -5.0362e-03, -2.5465e-03,  ...,  1.4800e-02,
         -2.2481e-02, -3.3024e-03],
        ...,
        [-1.5856e-02,  5.8918e-03,  1.1011e-03,  ..., -4.9387e-03,
         -1.9349e-02,  1.7147e-02],
        [-1.2462e-02,  2.0069e-03,  8.4337e-03,  ..., -4.9906e-03,
          9.4241e-03,  1.2626e-02],
        [-5.2084e-03,  2.5509e-02, -8.8829e-03,  ..., -7.5359e-03,
         -1.0975e-02,  1.5368e-02]], device='cuda:0', grad_fn=<AddmmBackward0>)


 21%|██        | 205/965 [06:29<27:38,  2.18s/it]

tensor([[-0.0057, -0.0140,  0.0031,  ...,  0.0095, -0.0026, -0.0024],
        [-0.0057, -0.0052,  0.0013,  ..., -0.0070, -0.0080, -0.0035],
        [ 0.0018, -0.0025,  0.0031,  ...,  0.0007, -0.0140, -0.0028],
        ...,
        [ 0.0010, -0.0012,  0.0017,  ..., -0.0088, -0.0154,  0.0038],
        [-0.0041,  0.0029,  0.0055,  ...,  0.0066, -0.0034,  0.0093],
        [-0.0118,  0.0022, -0.0020,  ..., -0.0072, -0.0053,  0.0085]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 21%|██▏       | 206/965 [06:32<30:15,  2.39s/it]

tensor([[-0.0072, -0.0032, -0.0037,  ..., -0.0127, -0.0166,  0.0022],
        [ 0.0065,  0.0118,  0.0007,  ..., -0.0063, -0.0266,  0.0133],
        [ 0.0027,  0.0219,  0.0043,  ..., -0.0149, -0.0083,  0.0193],
        ...,
        [-0.0116,  0.0122, -0.0110,  ...,  0.0132, -0.0269,  0.0068],
        [ 0.0049, -0.0038,  0.0016,  ...,  0.0010, -0.0106, -0.0007],
        [ 0.0148,  0.0089,  0.0076,  ..., -0.0153, -0.0175,  0.0007]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 21%|██▏       | 207/965 [06:34<28:57,  2.29s/it]

tensor([[ 8.2556e-03, -2.7837e-03, -7.2078e-03,  ...,  8.2310e-06,
         -8.1099e-03, -6.7150e-04],
        [-6.1644e-03, -6.9014e-03,  6.5537e-03,  ...,  7.6835e-03,
         -1.0519e-02, -1.4292e-02],
        [ 4.9590e-03,  8.0940e-03,  2.7794e-04,  ..., -1.2514e-03,
         -2.1698e-02,  4.8484e-03],
        ...,
        [-2.7523e-03, -5.8083e-03,  2.0382e-03,  ..., -6.5298e-03,
         -1.5228e-02,  1.6455e-03],
        [ 4.1913e-03, -1.3994e-02, -1.1584e-03,  ...,  2.3337e-03,
          8.0929e-03, -1.0029e-02],
        [-6.9020e-03,  4.7435e-03,  7.1152e-03,  ...,  1.5134e-05,
         -1.4448e-02, -3.7358e-04]], device='cuda:0', grad_fn=<AddmmBackward0>)


 22%|██▏       | 208/965 [06:36<27:03,  2.14s/it]

tensor([[-8.2550e-03, -1.5649e-02, -4.4001e-03,  ..., -2.2802e-03,
         -2.3477e-02, -3.7654e-03],
        [-6.9648e-03, -9.6762e-03, -5.6890e-03,  ..., -8.5345e-03,
         -2.6356e-02,  1.6878e-03],
        [-9.7647e-03,  3.2056e-05, -3.0562e-03,  ..., -8.0578e-03,
         -2.2739e-02,  7.4432e-04],
        ...,
        [-4.8154e-03, -4.0171e-03, -1.3466e-02,  ..., -1.5992e-02,
         -2.7207e-02,  5.2433e-03],
        [-8.3979e-05,  4.3243e-03,  8.7303e-03,  ...,  1.1251e-03,
         -1.6657e-02, -3.7207e-03],
        [ 4.1173e-03,  1.5783e-03,  4.6756e-03,  ..., -1.4248e-03,
         -8.8892e-03, -1.9346e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 22%|██▏       | 209/965 [06:38<26:45,  2.12s/it]

tensor([[-1.0020e-02, -6.2387e-03, -4.2885e-03,  ...,  6.7222e-03,
         -1.8073e-02, -2.0709e-03],
        [-1.8858e-03, -4.9329e-03, -3.1837e-03,  ...,  5.5693e-03,
         -1.1196e-02, -1.0135e-02],
        [ 1.7445e-03,  1.0904e-02,  2.5586e-05,  ..., -1.2197e-02,
         -2.5732e-02,  8.9128e-03],
        ...,
        [-5.5933e-03, -1.1961e-02, -6.0901e-03,  ...,  1.7746e-03,
         -6.5728e-03, -1.3978e-04],
        [-5.3071e-03, -1.4398e-02, -7.9235e-03,  ...,  2.8211e-02,
         -1.1537e-02, -9.7099e-03],
        [ 1.3962e-03, -5.2681e-03,  1.6982e-02,  ..., -1.6389e-02,
         -4.0080e-03, -1.2014e-02]], device='cuda:0', grad_fn=<AddmmBackward0>)


 22%|██▏       | 210/965 [06:40<25:52,  2.06s/it]

tensor([[ 5.4263e-03, -2.6146e-03, -5.5038e-03,  ...,  7.5266e-04,
         -1.2559e-02,  3.3476e-03],
        [ 7.0010e-03, -9.8096e-03,  1.2326e-03,  ..., -4.9024e-03,
         -1.9456e-02, -8.0133e-03],
        [-9.6665e-03, -1.7432e-02,  6.0125e-03,  ...,  5.2573e-03,
         -1.9739e-02, -2.1072e-03],
        ...,
        [ 1.6849e-03,  3.3752e-03,  1.1433e-02,  ..., -2.5855e-03,
         -1.1861e-02,  4.4207e-03],
        [-5.9712e-03,  5.0206e-05,  4.9316e-03,  ..., -2.0118e-02,
         -1.3854e-02, -2.4119e-03],
        [-9.7990e-03,  6.2994e-03, -6.5679e-03,  ..., -2.0717e-02,
         -2.9812e-02,  1.5202e-02]], device='cuda:0', grad_fn=<AddmmBackward0>)


 22%|██▏       | 211/965 [06:41<24:31,  1.95s/it]

tensor([[-8.3735e-03,  1.1406e-02, -1.5031e-02,  ...,  2.2469e-03,
         -2.5031e-02,  1.0604e-02],
        [ 5.3495e-03, -2.5843e-03,  5.7145e-03,  ..., -3.2558e-03,
          4.8613e-04,  2.6199e-03],
        [ 9.0825e-03,  1.7141e-02,  8.5831e-04,  ..., -8.4248e-03,
         -1.7847e-02,  1.1571e-02],
        ...,
        [-6.1750e-05, -8.7792e-03,  6.1081e-03,  ...,  3.2376e-03,
         -1.0823e-02,  6.3109e-03],
        [-1.3491e-03, -3.5178e-03,  5.7840e-03,  ..., -4.3156e-03,
         -1.0222e-02,  3.0936e-03],
        [ 7.7721e-04,  3.9191e-03, -1.5025e-02,  ..., -6.9920e-03,
         -1.4537e-02,  1.2218e-02]], device='cuda:0', grad_fn=<AddmmBackward0>)


 22%|██▏       | 212/965 [06:44<25:09,  2.01s/it]

tensor([[-0.0013,  0.0039, -0.0037,  ...,  0.0064, -0.0135, -0.0062],
        [-0.0127,  0.0040, -0.0014,  ..., -0.0016, -0.0213,  0.0159],
        [-0.0037, -0.0045,  0.0050,  ...,  0.0060, -0.0170,  0.0064],
        ...,
        [-0.0104, -0.0191,  0.0029,  ...,  0.0114, -0.0056, -0.0008],
        [-0.0031, -0.0153, -0.0075,  ...,  0.0220, -0.0112, -0.0091],
        [-0.0171,  0.0037, -0.0049,  ...,  0.0043, -0.0076, -0.0108]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 22%|██▏       | 213/965 [06:45<24:22,  1.94s/it]

tensor([[-1.8179e-05,  1.6326e-02, -3.0707e-04,  ..., -9.2402e-03,
         -7.2114e-03,  8.7100e-03],
        [-3.9552e-03,  4.0342e-03,  3.3550e-03,  ..., -7.1312e-04,
         -1.3283e-02,  4.4874e-03],
        [-5.2894e-03, -1.8347e-03,  2.5760e-03,  ...,  2.8042e-03,
         -1.0897e-02,  8.0065e-03],
        ...,
        [-6.3375e-03,  1.0952e-03,  7.8832e-03,  ...,  4.6216e-03,
         -1.6512e-02, -3.7712e-03],
        [-9.1197e-03, -5.7823e-03,  3.6052e-03,  ...,  5.6515e-03,
         -1.6231e-02, -3.2888e-03],
        [-3.2878e-03,  8.8333e-03, -4.3947e-03,  ...,  7.8883e-03,
         -2.7431e-02,  4.3173e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 22%|██▏       | 214/965 [06:47<23:02,  1.84s/it]

tensor([[-0.0045,  0.0063,  0.0047,  ..., -0.0033, -0.0169, -0.0002],
        [ 0.0096,  0.0212,  0.0041,  ..., -0.0144, -0.0133,  0.0153],
        [ 0.0085,  0.0162,  0.0034,  ..., -0.0080, -0.0068,  0.0112],
        ...,
        [-0.0009,  0.0071,  0.0010,  ..., -0.0089, -0.0318,  0.0023],
        [-0.0123,  0.0082, -0.0036,  ...,  0.0017, -0.0077,  0.0121],
        [-0.0019,  0.0058,  0.0071,  ..., -0.0125, -0.0224,  0.0066]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 22%|██▏       | 215/965 [06:49<23:04,  1.85s/it]

tensor([[-1.2504e-02,  1.0171e-02,  8.2135e-03,  ...,  1.1047e-02,
         -1.3120e-02, -9.5566e-04],
        [-2.3880e-02, -1.4227e-02,  1.0714e-02,  ...,  2.2205e-02,
         -9.5210e-03, -2.6418e-02],
        [ 2.0073e-03,  1.1640e-02,  4.7219e-05,  ..., -1.8921e-03,
         -1.7297e-02,  4.2542e-03],
        ...,
        [ 5.5309e-03,  1.6491e-02,  2.8326e-03,  ..., -1.1766e-02,
         -1.4516e-02,  1.5249e-03],
        [ 1.9848e-03,  8.3122e-03, -5.6711e-03,  ..., -9.6825e-03,
         -3.4416e-03,  6.4302e-04],
        [ 6.9787e-03,  4.7507e-03, -7.2508e-04,  ..., -8.4039e-03,
         -1.2145e-02,  2.8321e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 22%|██▏       | 216/965 [06:51<23:54,  1.91s/it]

tensor([[-0.0084,  0.0001,  0.0058,  ..., -0.0011, -0.0235, -0.0015],
        [-0.0016,  0.0027,  0.0044,  ...,  0.0012, -0.0098, -0.0068],
        [ 0.0101, -0.0093,  0.0124,  ...,  0.0040, -0.0151, -0.0087],
        ...,
        [ 0.0050,  0.0003, -0.0031,  ...,  0.0009, -0.0175,  0.0033],
        [-0.0014,  0.0157, -0.0013,  ...,  0.0007, -0.0146,  0.0129],
        [-0.0027,  0.0136, -0.0064,  ..., -0.0127, -0.0180,  0.0111]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 22%|██▏       | 217/965 [06:53<23:27,  1.88s/it]

tensor([[-9.2979e-03, -4.5880e-03, -7.0633e-03,  ...,  2.6533e-03,
         -6.7739e-03, -1.2505e-02],
        [-7.9789e-03,  7.3278e-03, -3.3729e-04,  ..., -4.4922e-03,
         -2.9504e-03,  6.9656e-03],
        [ 2.3531e-03,  2.1474e-03,  8.9399e-03,  ...,  5.9873e-03,
         -1.7993e-02,  8.7722e-03],
        ...,
        [-7.5943e-03, -6.7948e-03,  1.1132e-02,  ...,  1.3660e-02,
         -9.6572e-03, -1.8199e-02],
        [-1.5726e-02, -2.4567e-03,  1.5453e-02,  ...,  2.3266e-02,
         -2.5060e-02, -1.0924e-02],
        [-4.1859e-03, -9.4730e-05,  3.8712e-03,  ...,  9.4585e-04,
         -1.4061e-02,  7.9164e-04]], device='cuda:0', grad_fn=<AddmmBackward0>)


 23%|██▎       | 218/965 [06:55<25:04,  2.01s/it]

tensor([[-0.0081, -0.0103,  0.0025,  ..., -0.0049, -0.0127,  0.0041],
        [ 0.0154, -0.0099,  0.0047,  ..., -0.0099, -0.0130, -0.0095],
        [-0.0119, -0.0045,  0.0028,  ...,  0.0003, -0.0182, -0.0054],
        ...,
        [ 0.0130,  0.0053,  0.0047,  ..., -0.0112, -0.0263,  0.0024],
        [ 0.0085, -0.0041, -0.0030,  ..., -0.0040, -0.0086, -0.0036],
        [ 0.0014, -0.0049,  0.0139,  ..., -0.0066, -0.0160, -0.0053]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 23%|██▎       | 219/965 [06:57<24:03,  1.94s/it]

tensor([[-0.0012, -0.0081, -0.0025,  ...,  0.0083, -0.0068,  0.0016],
        [ 0.0055, -0.0029, -0.0011,  ...,  0.0034, -0.0037,  0.0005],
        [ 0.0027,  0.0013, -0.0039,  ...,  0.0010, -0.0083, -0.0005],
        ...,
        [-0.0056, -0.0101,  0.0017,  ..., -0.0116, -0.0039, -0.0064],
        [ 0.0002, -0.0131,  0.0028,  ..., -0.0008, -0.0045, -0.0101],
        [ 0.0130, -0.0046,  0.0038,  ...,  0.0051, -0.0085, -0.0057]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 23%|██▎       | 220/965 [06:58<23:00,  1.85s/it]

tensor([[ 0.0026,  0.0068,  0.0137,  ..., -0.0036,  0.0074,  0.0100],
        [-0.0026, -0.0038,  0.0013,  ..., -0.0020, -0.0005, -0.0032],
        [ 0.0044, -0.0087, -0.0004,  ..., -0.0003, -0.0009, -0.0069],
        ...,
        [ 0.0041,  0.0083, -0.0035,  ...,  0.0117, -0.0097,  0.0053],
        [ 0.0042,  0.0017, -0.0023,  ...,  0.0022, -0.0105,  0.0020],
        [-0.0034, -0.0081,  0.0005,  ..., -0.0083, -0.0214, -0.0038]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 23%|██▎       | 221/965 [07:00<22:22,  1.81s/it]

tensor([[ 0.0029,  0.0059,  0.0029,  ..., -0.0074, -0.0214,  0.0040],
        [-0.0130, -0.0009,  0.0029,  ..., -0.0025, -0.0313,  0.0116],
        [-0.0121,  0.0036, -0.0102,  ...,  0.0031, -0.0199,  0.0072],
        ...,
        [ 0.0033,  0.0103,  0.0012,  ..., -0.0076, -0.0208, -0.0034],
        [-0.0063,  0.0050, -0.0067,  ...,  0.0195, -0.0216,  0.0141],
        [-0.0065, -0.0162,  0.0139,  ...,  0.0034, -0.0186, -0.0203]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 23%|██▎       | 222/965 [07:02<24:11,  1.95s/it]

tensor([[ 0.0033,  0.0013, -0.0048,  ..., -0.0024, -0.0226,  0.0122],
        [-0.0002, -0.0015,  0.0022,  ...,  0.0153, -0.0171,  0.0086],
        [ 0.0046,  0.0067, -0.0154,  ..., -0.0060, -0.0162,  0.0054],
        ...,
        [-0.0104,  0.0078,  0.0101,  ..., -0.0158, -0.0122,  0.0138],
        [-0.0078, -0.0016, -0.0040,  ..., -0.0097,  0.0015, -0.0021],
        [-0.0020, -0.0062, -0.0073,  ...,  0.0059, -0.0182, -0.0074]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 23%|██▎       | 223/965 [07:04<23:57,  1.94s/it]

tensor([[-0.0057,  0.0095, -0.0135,  ..., -0.0113, -0.0188,  0.0215],
        [-0.0119,  0.0023,  0.0040,  ..., -0.0059, -0.0228,  0.0004],
        [-0.0137, -0.0222,  0.0073,  ..., -0.0073, -0.0150, -0.0041],
        ...,
        [-0.0166,  0.0011,  0.0096,  ...,  0.0082, -0.0204, -0.0102],
        [-0.0032,  0.0126, -0.0087,  ..., -0.0158, -0.0076,  0.0091],
        [-0.0063,  0.0285, -0.0024,  ..., -0.0093, -0.0315,  0.0114]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 23%|██▎       | 224/965 [07:06<23:34,  1.91s/it]

tensor([[-1.3999e-02, -6.1226e-03,  6.0372e-03,  ...,  1.6307e-03,
         -8.5963e-03, -9.8918e-03],
        [ 5.6730e-03, -5.2157e-03,  8.4781e-03,  ..., -6.2327e-03,
         -1.4649e-02,  1.0594e-04],
        [-2.7693e-03, -9.3562e-03,  1.7570e-02,  ..., -4.4271e-03,
         -2.3860e-02, -9.0403e-05],
        ...,
        [ 5.8567e-04,  2.9797e-03, -8.3962e-03,  ..., -1.7068e-02,
          5.5989e-03, -4.1261e-03],
        [-6.3830e-03,  1.0984e-02, -6.2285e-03,  ..., -8.2713e-04,
         -2.0372e-02,  8.7169e-03],
        [ 1.0555e-03, -2.8172e-03,  8.7969e-03,  ...,  9.7329e-03,
         -1.5728e-03, -8.4949e-04]], device='cuda:0', grad_fn=<AddmmBackward0>)


 23%|██▎       | 225/965 [07:08<22:31,  1.83s/it]

tensor([[ 4.2815e-03, -4.4512e-03,  4.4027e-03,  ...,  2.8323e-03,
         -1.4335e-02,  2.9565e-03],
        [-1.6420e-03, -2.3876e-03,  3.5681e-04,  ..., -6.9302e-03,
         -1.1832e-02,  8.5606e-03],
        [-8.6926e-03,  1.0241e-02, -1.2556e-02,  ..., -1.5228e-03,
         -2.6152e-02,  1.2421e-02],
        ...,
        [ 5.0202e-05,  1.1532e-02, -3.8146e-03,  ..., -9.9389e-03,
         -1.1539e-02,  3.2961e-03],
        [-2.2632e-03,  4.3837e-03,  4.4292e-03,  ..., -9.8716e-03,
         -3.5261e-02,  3.7955e-03],
        [ 7.8702e-03,  1.2784e-02,  3.1828e-03,  ..., -3.4798e-03,
         -2.1246e-02,  7.7426e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 23%|██▎       | 226/965 [07:10<23:17,  1.89s/it]

tensor([[ 0.0071, -0.0144,  0.0014,  ...,  0.0074,  0.0045, -0.0117],
        [ 0.0072,  0.0034,  0.0038,  ...,  0.0008, -0.0068,  0.0012],
        [ 0.0064, -0.0002, -0.0106,  ..., -0.0060, -0.0038,  0.0009],
        ...,
        [ 0.0048,  0.0009,  0.0032,  ..., -0.0037, -0.0094, -0.0012],
        [-0.0064,  0.0129, -0.0139,  ..., -0.0009, -0.0185,  0.0234],
        [ 0.0050, -0.0029,  0.0012,  ..., -0.0008, -0.0121,  0.0001]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 24%|██▎       | 227/965 [07:12<23:09,  1.88s/it]

tensor([[-0.0084,  0.0103, -0.0101,  ..., -0.0119, -0.0299,  0.0082],
        [ 0.0025,  0.0083, -0.0005,  ...,  0.0058, -0.0166, -0.0072],
        [ 0.0069,  0.0139, -0.0074,  ..., -0.0027, -0.0215,  0.0071],
        ...,
        [ 0.0106,  0.0142, -0.0231,  ..., -0.0197, -0.0026,  0.0047],
        [-0.0112, -0.0061, -0.0011,  ...,  0.0043, -0.0095,  0.0021],
        [ 0.0045,  0.0110,  0.0101,  ..., -0.0021, -0.0001,  0.0099]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 24%|██▎       | 228/965 [07:14<22:48,  1.86s/it]

tensor([[-3.6503e-03,  1.7408e-03, -1.7462e-03,  ...,  1.3809e-02,
         -1.7453e-02,  7.8613e-03],
        [ 7.0004e-04, -1.1940e-02, -2.3094e-03,  ..., -1.0986e-02,
         -1.5823e-02, -7.0146e-03],
        [-1.8888e-02, -2.4643e-03,  5.3592e-03,  ...,  1.1126e-02,
         -7.1866e-03, -1.0464e-02],
        ...,
        [-1.2050e-02, -4.8683e-03, -8.4089e-03,  ..., -1.5076e-02,
         -2.5142e-02,  8.8530e-03],
        [-1.5820e-03,  7.8364e-03,  5.4249e-03,  ...,  4.3076e-05,
         -7.2988e-03,  5.0311e-03],
        [-1.0094e-02, -1.4021e-02,  4.3768e-03,  ...,  3.8893e-03,
         -6.9076e-03, -1.7437e-02]], device='cuda:0', grad_fn=<AddmmBackward0>)


 24%|██▎       | 229/965 [07:15<21:35,  1.76s/it]

tensor([[-1.1420e-02, -1.5768e-02,  1.4186e-02,  ...,  1.4134e-02,
         -2.0931e-03, -1.8778e-02],
        [-7.3835e-03,  1.5752e-03,  1.0181e-03,  ..., -1.3789e-03,
         -1.6110e-02,  9.2365e-03],
        [-2.0207e-03,  6.2831e-03,  4.7249e-03,  ..., -7.6596e-03,
          7.4380e-03,  1.8420e-03],
        ...,
        [-7.8427e-03,  2.2122e-02, -1.2978e-03,  ..., -7.6105e-03,
         -7.9534e-03,  1.3724e-02],
        [ 1.2848e-03, -1.7011e-03,  1.5685e-05,  ...,  5.9868e-03,
         -1.0476e-02,  2.5642e-03],
        [ 7.2899e-04, -9.3655e-03, -5.5186e-03,  ...,  1.3448e-02,
         -1.3217e-02, -1.2593e-02]], device='cuda:0', grad_fn=<AddmmBackward0>)


 24%|██▍       | 230/965 [07:17<20:41,  1.69s/it]

tensor([[-6.8147e-03,  1.0947e-03,  1.1760e-03,  ...,  8.8884e-05,
         -1.3777e-02,  3.8976e-03],
        [-2.9125e-04,  1.4956e-03, -9.5446e-04,  ..., -6.3930e-03,
         -3.0349e-02, -8.3471e-04],
        [-1.9925e-03, -5.7818e-04,  1.0449e-02,  ...,  1.6619e-02,
         -1.3990e-02, -1.2378e-02],
        ...,
        [-1.1009e-02,  8.3990e-03, -7.5115e-03,  ...,  9.1637e-03,
         -2.3588e-02,  7.2946e-03],
        [-5.7124e-03, -6.8898e-04, -4.8781e-04,  ...,  9.1280e-03,
         -1.6884e-02,  5.2656e-03],
        [ 6.6333e-03, -3.4123e-03, -1.0655e-03,  ..., -2.7055e-03,
         -1.0618e-02, -3.7590e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 24%|██▍       | 231/965 [07:19<21:51,  1.79s/it]

tensor([[-0.0039,  0.0119,  0.0057,  ...,  0.0039, -0.0066, -0.0031],
        [-0.0087,  0.0009,  0.0030,  ...,  0.0092, -0.0142, -0.0005],
        [-0.0130,  0.0197, -0.0089,  ...,  0.0068, -0.0146,  0.0062],
        ...,
        [-0.0119, -0.0015,  0.0076,  ..., -0.0007, -0.0126,  0.0024],
        [-0.0130,  0.0042, -0.0061,  ...,  0.0138, -0.0127,  0.0010],
        [-0.0061,  0.0071,  0.0005,  ..., -0.0004, -0.0137,  0.0102]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 24%|██▍       | 232/965 [07:21<22:33,  1.85s/it]

tensor([[ 0.0028, -0.0092,  0.0025,  ..., -0.0031,  0.0046, -0.0061],
        [ 0.0079,  0.0137, -0.0015,  ..., -0.0022, -0.0134, -0.0019],
        [-0.0058, -0.0032,  0.0018,  ..., -0.0250, -0.0107, -0.0038],
        ...,
        [ 0.0053, -0.0061, -0.0130,  ...,  0.0087, -0.0136, -0.0008],
        [-0.0044,  0.0019,  0.0105,  ..., -0.0020, -0.0105, -0.0019],
        [ 0.0195,  0.0048,  0.0012,  ..., -0.0059, -0.0117,  0.0046]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 24%|██▍       | 233/965 [07:22<21:20,  1.75s/it]

tensor([[ 0.0357, -0.0724, -0.0029,  ...,  0.0519, -0.0161, -0.0406],
        [-0.0094,  0.0109, -0.0112,  ..., -0.0048, -0.0169,  0.0001],
        [-0.0054, -0.0034,  0.0007,  ..., -0.0064, -0.0215, -0.0149],
        ...,
        [ 0.0020, -0.0002,  0.0022,  ..., -0.0015, -0.0028, -0.0056],
        [-0.0003, -0.0049,  0.0079,  ...,  0.0005, -0.0060, -0.0152],
        [-0.0078,  0.0126, -0.0018,  ..., -0.0044, -0.0196, -0.0082]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 24%|██▍       | 234/965 [07:24<22:26,  1.84s/it]

tensor([[-0.0012,  0.0063, -0.0013,  ...,  0.0037, -0.0035,  0.0091],
        [-0.0008,  0.0020, -0.0033,  ..., -0.0018, -0.0051, -0.0008],
        [-0.0100,  0.0046, -0.0069,  ...,  0.0093, -0.0101, -0.0016],
        ...,
        [ 0.0075, -0.0006, -0.0058,  ..., -0.0052, -0.0051,  0.0021],
        [-0.0009, -0.0023,  0.0008,  ..., -0.0028, -0.0091, -0.0100],
        [-0.0074,  0.0058, -0.0019,  ..., -0.0119, -0.0005,  0.0064]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 24%|██▍       | 235/965 [07:26<22:16,  1.83s/it]

tensor([[-0.0181, -0.0056, -0.0079,  ...,  0.0150, -0.0070, -0.0105],
        [-0.0089,  0.0165, -0.0132,  ..., -0.0012, -0.0141,  0.0183],
        [-0.0117,  0.0073,  0.0056,  ...,  0.0092, -0.0064, -0.0028],
        ...,
        [-0.0055,  0.0098,  0.0014,  ...,  0.0056,  0.0003,  0.0053],
        [-0.0051,  0.0010, -0.0021,  ..., -0.0052, -0.0143,  0.0067],
        [ 0.0002,  0.0040, -0.0070,  ...,  0.0025, -0.0112,  0.0073]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 24%|██▍       | 236/965 [07:28<22:21,  1.84s/it]

tensor([[ 1.9259e-03, -7.3524e-03, -7.6906e-04,  ...,  2.0628e-03,
         -9.1254e-03, -4.0375e-03],
        [ 1.2432e-02,  2.2386e-03, -5.1474e-03,  ..., -1.6253e-04,
         -8.4757e-03,  1.8407e-03],
        [-6.5639e-03, -1.1276e-02, -3.4010e-03,  ..., -1.7623e-04,
         -2.5768e-04, -8.1858e-03],
        ...,
        [ 4.4944e-03,  2.0896e-03, -9.7562e-03,  ..., -3.2634e-06,
         -1.3670e-02,  5.4267e-03],
        [ 3.2718e-03,  5.3426e-03, -1.0795e-02,  ..., -5.6455e-03,
         -5.5427e-03,  6.9870e-03],
        [-1.7862e-03,  6.2944e-03, -7.5890e-03,  ..., -2.8124e-03,
         -1.2285e-02,  1.0077e-02]], device='cuda:0', grad_fn=<AddmmBackward0>)


 25%|██▍       | 237/965 [07:30<22:22,  1.84s/it]

tensor([[-0.0070, -0.0187,  0.0040,  ..., -0.0008, -0.0227, -0.0005],
        [ 0.0009,  0.0012,  0.0017,  ...,  0.0141, -0.0020, -0.0020],
        [-0.0069, -0.0063,  0.0020,  ...,  0.0035, -0.0143, -0.0132],
        ...,
        [-0.0071, -0.0065, -0.0020,  ..., -0.0042, -0.0087, -0.0085],
        [-0.0016, -0.0217, -0.0066,  ..., -0.0095, -0.0174, -0.0033],
        [-0.0149,  0.0160, -0.0203,  ...,  0.0009, -0.0169,  0.0222]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 25%|██▍       | 238/965 [07:32<22:48,  1.88s/it]

tensor([[ 0.0063, -0.0052, -0.0072,  ...,  0.0022, -0.0158, -0.0060],
        [ 0.0123, -0.0086,  0.0032,  ...,  0.0131, -0.0089, -0.0023],
        [ 0.0022, -0.0197, -0.0002,  ..., -0.0047,  0.0025, -0.0209],
        ...,
        [ 0.0017, -0.0100, -0.0108,  ...,  0.0079, -0.0145, -0.0116],
        [ 0.0080,  0.0062, -0.0020,  ..., -0.0148, -0.0148,  0.0023],
        [ 0.0074,  0.0069,  0.0026,  ..., -0.0101, -0.0053, -0.0032]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 25%|██▍       | 239/965 [07:33<22:09,  1.83s/it]

tensor([[-0.0012,  0.0064,  0.0027,  ..., -0.0079, -0.0258,  0.0130],
        [ 0.0016, -0.0005,  0.0009,  ..., -0.0035, -0.0121,  0.0004],
        [ 0.0050,  0.0113,  0.0059,  ..., -0.0172, -0.0129,  0.0011],
        ...,
        [ 0.0051, -0.0062, -0.0015,  ..., -0.0078, -0.0122, -0.0092],
        [ 0.0110,  0.0133, -0.0044,  ...,  0.0023, -0.0179,  0.0062],
        [ 0.0079,  0.0136,  0.0075,  ..., -0.0053, -0.0104,  0.0058]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 25%|██▍       | 240/965 [07:35<21:45,  1.80s/it]

tensor([[ 0.0010,  0.0093, -0.0005,  ..., -0.0075, -0.0119, -0.0022],
        [-0.0074,  0.0283, -0.0079,  ..., -0.0149, -0.0312,  0.0194],
        [-0.0052,  0.0014,  0.0037,  ..., -0.0018, -0.0169, -0.0031],
        ...,
        [-0.0115,  0.0115, -0.0074,  ..., -0.0170, -0.0160,  0.0161],
        [ 0.0043, -0.0158,  0.0067,  ..., -0.0106, -0.0077, -0.0141],
        [ 0.0048,  0.0109, -0.0029,  ..., -0.0077, -0.0152,  0.0060]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 25%|██▍       | 241/965 [07:37<21:49,  1.81s/it]

tensor([[-7.4783e-03, -3.8219e-03,  5.5078e-03,  ..., -1.0850e-03,
         -1.7085e-02,  2.4037e-03],
        [-5.7818e-03,  8.9763e-03,  2.5707e-03,  ...,  3.7367e-03,
         -1.8024e-02, -5.2257e-03],
        [-1.3410e-02, -5.5487e-03,  2.1626e-03,  ..., -1.9918e-02,
         -1.7404e-02, -3.0555e-03],
        ...,
        [-1.7699e-05,  7.2753e-03, -2.5316e-03,  ..., -1.3802e-02,
         -1.2199e-02,  1.0719e-02],
        [-1.1593e-02, -3.1012e-03,  1.2680e-02,  ..., -1.5024e-03,
         -2.0985e-02, -1.0467e-02],
        [ 2.3355e-03, -2.0942e-04,  6.3452e-03,  ..., -3.4654e-03,
         -9.9883e-03,  2.5907e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 25%|██▌       | 242/965 [07:39<21:56,  1.82s/it]

tensor([[-4.3125e-03,  4.2509e-03, -1.1907e-03,  ..., -3.1856e-03,
         -7.1910e-03, -8.6691e-04],
        [-1.7973e-02,  1.6554e-02, -1.0245e-02,  ..., -2.5641e-02,
         -1.5470e-02,  1.0188e-02],
        [-4.2591e-03,  2.1086e-02, -9.6721e-03,  ..., -5.7237e-05,
         -2.3842e-02,  1.9704e-02],
        ...,
        [-1.6108e-02,  6.5465e-03,  1.5955e-03,  ..., -1.1717e-03,
          3.3609e-03,  6.6409e-03],
        [ 4.6724e-03, -1.9862e-03,  6.3203e-03,  ..., -1.0299e-02,
         -7.9506e-03, -8.4678e-03],
        [-2.4770e-03,  9.4418e-03,  3.3919e-03,  ..., -1.2648e-02,
         -1.3331e-02,  1.5927e-02]], device='cuda:0', grad_fn=<AddmmBackward0>)


 25%|██▌       | 243/965 [07:41<21:28,  1.79s/it]

tensor([[ 3.3219e-04,  9.4706e-03,  5.4779e-05,  ...,  5.0228e-03,
         -5.9246e-03, -1.3483e-03],
        [ 7.6909e-03, -1.6010e-03, -1.1160e-03,  ..., -1.8158e-03,
         -1.0082e-02,  2.6658e-04],
        [-6.3074e-03, -5.7568e-03,  3.1231e-03,  ..., -1.9308e-02,
         -1.7537e-02, -3.7903e-03],
        ...,
        [ 3.1486e-04, -4.4696e-04,  4.4825e-03,  ..., -1.3767e-03,
          1.2103e-03, -6.1764e-03],
        [-1.2610e-02, -1.5602e-03,  7.5398e-03,  ..., -8.0292e-03,
         -1.9074e-02,  1.1044e-02],
        [-2.3982e-03,  5.8621e-05,  1.1451e-02,  ..., -4.7645e-04,
         -5.1446e-03, -1.8017e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 25%|██▌       | 244/965 [07:42<22:03,  1.84s/it]

tensor([[-0.0086, -0.0019,  0.0089,  ...,  0.0002, -0.0060, -0.0064],
        [ 0.0063,  0.0059, -0.0175,  ..., -0.0201, -0.0058,  0.0075],
        [-0.0047, -0.0003,  0.0006,  ..., -0.0025, -0.0010,  0.0002],
        ...,
        [-0.0179,  0.0180, -0.0034,  ..., -0.0003, -0.0269,  0.0089],
        [ 0.0004, -0.0075, -0.0164,  ...,  0.0090, -0.0139, -0.0119],
        [-0.0093, -0.0026, -0.0072,  ...,  0.0017, -0.0150, -0.0052]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 25%|██▌       | 245/965 [07:44<22:15,  1.86s/it]

tensor([[-2.6277e-03, -1.8732e-03, -9.2303e-03,  ...,  5.8778e-04,
         -4.5527e-03, -4.0189e-04],
        [ 3.6164e-03, -9.4535e-03,  3.8425e-03,  ...,  5.2040e-03,
          5.6030e-03, -6.1069e-03],
        [-9.7193e-06, -3.1950e-03,  1.3210e-03,  ...,  4.8748e-03,
          5.0328e-03, -1.6146e-02],
        ...,
        [ 1.2521e-03,  4.0692e-04,  5.1121e-04,  ...,  5.1123e-03,
         -8.0187e-03, -6.4661e-03],
        [-6.4570e-03,  7.4178e-03, -5.9886e-03,  ...,  6.4093e-04,
         -4.3785e-03,  1.8232e-03],
        [-4.8445e-03,  9.0050e-03, -2.3110e-03,  ..., -5.9555e-03,
         -3.5356e-03,  2.1980e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 25%|██▌       | 246/965 [07:46<22:24,  1.87s/it]

tensor([[ 0.0031, -0.0126,  0.0045,  ...,  0.0025, -0.0003, -0.0192],
        [ 0.0014, -0.0043,  0.0014,  ...,  0.0047, -0.0126, -0.0005],
        [-0.0033, -0.0070,  0.0009,  ...,  0.0038,  0.0035, -0.0148],
        ...,
        [-0.0015, -0.0102,  0.0030,  ...,  0.0077,  0.0031, -0.0039],
        [-0.0129,  0.0041,  0.0094,  ...,  0.0089, -0.0047, -0.0097],
        [-0.0030, -0.0090, -0.0011,  ..., -0.0030, -0.0179, -0.0083]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 26%|██▌       | 247/965 [07:48<22:04,  1.85s/it]

tensor([[ 0.0102,  0.0128,  0.0075,  ..., -0.0058, -0.0166,  0.0044],
        [-0.0002,  0.0150, -0.0042,  ..., -0.0014, -0.0133, -0.0031],
        [-0.0193, -0.0044, -0.0118,  ...,  0.0160, -0.0194,  0.0040],
        ...,
        [-0.0075, -0.0162,  0.0096,  ..., -0.0032, -0.0075, -0.0133],
        [ 0.0035,  0.0304, -0.0082,  ..., -0.0146,  0.0008,  0.0060],
        [-0.0051, -0.0090,  0.0030,  ...,  0.0131, -0.0123, -0.0211]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 26%|██▌       | 248/965 [07:50<22:07,  1.85s/it]

tensor([[ 6.7238e-03, -5.1472e-03,  1.0266e-02,  ...,  3.6082e-03,
         -1.3817e-02, -1.7326e-03],
        [-1.4213e-03,  8.5063e-03,  1.0427e-02,  ..., -8.5557e-04,
         -1.3559e-02,  1.0776e-02],
        [-8.2331e-03,  1.0681e-03, -5.6523e-03,  ..., -1.5913e-02,
         -1.6950e-02,  5.6278e-03],
        ...,
        [ 1.1637e-02,  8.5570e-03,  3.6918e-06,  ...,  9.9916e-04,
         -1.5718e-02,  1.5540e-03],
        [ 2.8807e-02, -6.4620e-02,  7.1197e-03,  ...,  8.2567e-02,
         -1.7261e-02, -4.3826e-03],
        [ 1.0732e-02, -9.0987e-03,  4.7706e-03,  ...,  1.7082e-03,
         -2.1957e-03, -8.6834e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 26%|██▌       | 249/965 [07:52<22:03,  1.85s/it]

tensor([[ 0.0045,  0.0150, -0.0060,  ..., -0.0142, -0.0066,  0.0144],
        [-0.0076,  0.0013, -0.0153,  ..., -0.0059, -0.0350, -0.0008],
        [ 0.0068,  0.0173,  0.0068,  ..., -0.0118, -0.0132, -0.0021],
        ...,
        [-0.0158, -0.0070, -0.0021,  ..., -0.0113, -0.0170,  0.0132],
        [ 0.0024,  0.0067, -0.0021,  ..., -0.0071, -0.0138,  0.0079],
        [-0.0062, -0.0050, -0.0159,  ...,  0.0148, -0.0065, -0.0065]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 26%|██▌       | 250/965 [07:54<21:40,  1.82s/it]

tensor([[-3.8730e-03,  8.2393e-03, -1.0390e-02,  ..., -1.1996e-03,
         -8.8623e-03,  5.5639e-03],
        [-4.2479e-05, -3.4712e-03, -7.0635e-03,  ...,  3.4587e-03,
         -1.1050e-02, -3.0060e-03],
        [ 4.6777e-03,  6.9577e-03, -1.2918e-02,  ..., -8.7455e-03,
         -1.1234e-02,  1.0450e-03],
        ...,
        [-3.8305e-03,  1.9991e-02, -1.3889e-03,  ..., -6.8746e-03,
         -2.0478e-02,  6.6009e-03],
        [ 9.5920e-03,  1.8544e-02,  2.4328e-03,  ..., -1.0497e-02,
          4.8403e-03,  1.1859e-02],
        [-2.1861e-03, -1.1300e-03, -8.8220e-03,  ..., -1.5811e-02,
         -8.8304e-03,  2.4438e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 26%|██▌       | 251/965 [07:55<21:09,  1.78s/it]

tensor([[-4.0017e-03,  5.7198e-03, -9.0542e-03,  ..., -5.1722e-03,
         -1.1861e-02,  3.0302e-03],
        [-1.4762e-03,  8.0903e-03, -9.7927e-03,  ..., -3.0736e-03,
         -1.2252e-02,  4.0028e-03],
        [-3.7728e-03,  7.9932e-03, -5.5392e-03,  ..., -5.7174e-03,
         -1.2774e-02,  8.9382e-03],
        ...,
        [ 6.5499e-03, -9.7461e-05, -5.0647e-04,  ..., -2.9426e-03,
         -3.3668e-03,  2.9170e-03],
        [-3.1880e-03,  2.5090e-02, -2.3755e-02,  ..., -6.5701e-03,
         -7.9919e-03, -4.6509e-04],
        [ 2.8558e-04,  1.8125e-02, -6.3811e-03,  ..., -8.4324e-03,
         -1.3015e-02, -1.8928e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 26%|██▌       | 252/965 [07:57<21:55,  1.84s/it]

tensor([[ 0.0003,  0.0103, -0.0157,  ...,  0.0031, -0.0143,  0.0057],
        [ 0.0012, -0.0062, -0.0035,  ..., -0.0038, -0.0067, -0.0055],
        [ 0.0069, -0.0006,  0.0032,  ..., -0.0004, -0.0149, -0.0139],
        ...,
        [ 0.0025,  0.0027, -0.0079,  ..., -0.0004, -0.0131,  0.0039],
        [-0.0029,  0.0237, -0.0040,  ..., -0.0159, -0.0079,  0.0238],
        [-0.0032,  0.0177,  0.0049,  ..., -0.0027, -0.0228,  0.0009]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 26%|██▌       | 253/965 [07:59<22:16,  1.88s/it]

tensor([[ 7.3852e-04, -2.8999e-03, -4.2006e-03,  ...,  1.1218e-03,
         -9.9644e-03, -4.6148e-03],
        [-6.8812e-03,  1.0629e-02, -1.1540e-02,  ..., -4.5191e-04,
         -1.3988e-02,  1.2435e-02],
        [-7.1374e-03,  8.3415e-03, -1.7456e-02,  ...,  1.3900e-03,
         -1.7447e-02,  1.5282e-02],
        ...,
        [ 5.0621e-03, -1.3705e-03, -1.4842e-02,  ...,  5.1482e-03,
         -7.6533e-03, -4.7713e-03],
        [-8.7138e-05, -1.5423e-02, -5.4832e-03,  ...,  1.1555e-02,
          2.5158e-03, -1.3812e-02],
        [-7.6780e-04, -6.5349e-03, -6.4112e-03,  ...,  9.2814e-03,
         -2.3833e-03, -2.6594e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 26%|██▋       | 254/965 [08:01<21:40,  1.83s/it]

tensor([[-7.3496e-04, -9.2094e-03,  1.2857e-02,  ..., -5.4874e-03,
         -7.7448e-03, -3.0050e-03],
        [ 1.2532e-02,  7.4919e-03, -2.3761e-03,  ..., -1.4546e-02,
          2.0841e-03,  8.5371e-03],
        [-5.1492e-03, -8.6942e-03, -3.8760e-03,  ...,  1.0740e-03,
         -5.7097e-03,  3.2829e-05],
        ...,
        [ 1.0938e-02, -2.0733e-03, -4.6950e-03,  ..., -2.7534e-03,
         -2.5214e-03,  2.7820e-03],
        [-9.8135e-03, -5.4093e-03,  2.7994e-03,  ..., -1.4937e-02,
         -1.3167e-02, -2.6483e-03],
        [ 1.0845e-03, -1.8690e-04, -2.3479e-03,  ..., -1.4126e-02,
         -8.7533e-05, -3.6120e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 26%|██▋       | 255/965 [08:03<21:48,  1.84s/it]

tensor([[ 0.0047, -0.0034,  0.0063,  ..., -0.0052, -0.0204,  0.0020],
        [-0.0027,  0.0085,  0.0096,  ..., -0.0147, -0.0071,  0.0111],
        [-0.0020,  0.0094,  0.0016,  ..., -0.0070, -0.0276, -0.0073],
        ...,
        [ 0.0057, -0.0046,  0.0007,  ...,  0.0002, -0.0058, -0.0007],
        [ 0.0098,  0.0031, -0.0112,  ...,  0.0036, -0.0212, -0.0017],
        [-0.0044,  0.0042,  0.0117,  ..., -0.0143, -0.0040,  0.0111]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 27%|██▋       | 256/965 [08:05<22:10,  1.88s/it]

tensor([[-8.9226e-03, -3.5068e-03, -5.0561e-03,  ...,  9.8062e-03,
         -1.3925e-02, -9.2907e-03],
        [ 6.1471e-03,  9.7948e-03,  8.7041e-03,  ..., -3.3827e-03,
         -1.5281e-02,  6.3230e-03],
        [-3.0369e-03, -1.0811e-02, -4.7551e-03,  ...,  1.4701e-02,
         -4.9396e-03, -1.3605e-02],
        ...,
        [-4.7995e-03,  2.4359e-03, -3.6873e-05,  ..., -1.2251e-02,
         -1.6089e-03, -1.6682e-03],
        [-1.0440e-03, -1.1391e-02, -4.9648e-04,  ...,  1.3614e-02,
         -3.4205e-03,  1.5527e-03],
        [-1.7271e-02,  8.6034e-03, -6.7426e-03,  ..., -5.8540e-03,
         -1.7412e-02,  1.4191e-02]], device='cuda:0', grad_fn=<AddmmBackward0>)


 27%|██▋       | 257/965 [08:07<22:13,  1.88s/it]

tensor([[ 0.0132,  0.0053, -0.0056,  ..., -0.0166, -0.0024,  0.0059],
        [ 0.0029,  0.0064, -0.0121,  ..., -0.0030, -0.0024,  0.0119],
        [ 0.0054,  0.0104, -0.0086,  ..., -0.0126, -0.0011,  0.0027],
        ...,
        [ 0.0051, -0.0019, -0.0063,  ..., -0.0013, -0.0036,  0.0102],
        [ 0.0087, -0.0021, -0.0058,  ..., -0.0062,  0.0087, -0.0028],
        [ 0.0043, -0.0098,  0.0003,  ..., -0.0013, -0.0008, -0.0008]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 27%|██▋       | 258/965 [08:09<23:17,  1.98s/it]

tensor([[ 0.0018,  0.0146, -0.0159,  ..., -0.0255, -0.0312,  0.0144],
        [ 0.0063,  0.0283, -0.0068,  ..., -0.0123,  0.0023,  0.0250],
        [ 0.0133,  0.0070, -0.0141,  ..., -0.0123, -0.0084, -0.0060],
        ...,
        [ 0.0030,  0.0012, -0.0021,  ..., -0.0037, -0.0101,  0.0031],
        [-0.0269, -0.0031, -0.0037,  ...,  0.0061, -0.0163, -0.0066],
        [-0.0002, -0.0029, -0.0061,  ...,  0.0056, -0.0036, -0.0035]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 27%|██▋       | 259/965 [08:11<23:22,  1.99s/it]

tensor([[-1.4188e-03, -2.0232e-03,  4.0616e-04,  ...,  7.7145e-05,
         -6.9314e-03, -1.2220e-02],
        [-1.4156e-03,  1.7719e-02,  1.6600e-02,  ...,  7.0196e-03,
         -4.6737e-03, -4.8177e-03],
        [-5.2216e-03, -8.2702e-03,  1.1450e-02,  ...,  1.0494e-03,
         -1.1893e-02, -1.8939e-02],
        ...,
        [ 7.6434e-03,  2.1503e-02, -1.0355e-03,  ..., -1.6860e-02,
         -1.1996e-02,  1.5280e-02],
        [-2.7213e-02,  1.8834e-02,  1.1640e-02,  ..., -1.3294e-02,
         -2.4149e-02,  8.6042e-03],
        [-1.6032e-04,  5.0138e-03, -4.6249e-03,  ..., -2.3378e-03,
         -4.2174e-04, -7.0962e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 27%|██▋       | 260/965 [08:13<23:26,  2.00s/it]

tensor([[-6.2066e-03,  1.0692e-02, -1.3072e-02,  ..., -2.1233e-02,
         -7.0213e-03, -2.4711e-03],
        [ 1.1444e-02,  5.2190e-03, -8.2417e-04,  ..., -8.0321e-03,
         -1.4768e-02,  2.5458e-03],
        [ 4.3306e-05,  4.4085e-03, -4.9690e-03,  ..., -2.5040e-03,
         -1.1646e-02, -4.7942e-03],
        ...,
        [-7.4847e-03,  5.1832e-03, -8.0527e-03,  ...,  1.6388e-02,
         -6.6272e-03, -8.0824e-05],
        [ 7.1126e-03,  1.2535e-02, -1.4843e-02,  ..., -1.6201e-03,
         -1.9997e-02, -6.7913e-03],
        [ 6.8121e-03,  1.4161e-02, -6.0485e-03,  ..., -1.5581e-02,
          3.1047e-03,  8.0107e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 27%|██▋       | 261/965 [08:15<23:02,  1.96s/it]

tensor([[ 0.0118,  0.0113,  0.0031,  ...,  0.0069, -0.0125, -0.0117],
        [ 0.0113,  0.0167, -0.0055,  ..., -0.0005, -0.0029,  0.0085],
        [-0.0048, -0.0205, -0.0048,  ...,  0.0062, -0.0071, -0.0198],
        ...,
        [-0.0118, -0.0214,  0.0028,  ..., -0.0152, -0.0180, -0.0198],
        [ 0.0041,  0.0078,  0.0014,  ..., -0.0082, -0.0001,  0.0055],
        [-0.0027, -0.0037, -0.0020,  ..., -0.0037, -0.0135, -0.0099]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 27%|██▋       | 262/965 [08:16<22:15,  1.90s/it]

tensor([[ 2.1599e-03, -5.6732e-03,  4.7628e-05,  ...,  6.0945e-03,
         -2.7533e-03, -9.6458e-03],
        [ 8.5007e-03,  1.3366e-02, -3.2884e-03,  ..., -3.0348e-03,
         -5.5551e-03,  6.8703e-04],
        [-1.6849e-02, -5.2826e-03, -1.9691e-03,  ...,  1.2108e-02,
         -1.1509e-02, -1.5490e-02],
        ...,
        [-1.0362e-02,  1.3392e-02, -2.4085e-03,  ..., -1.2221e-02,
         -3.3940e-03, -3.4855e-03],
        [-9.7749e-03, -4.8694e-03,  1.3039e-02,  ...,  8.3452e-04,
         -9.0830e-04, -9.0304e-03],
        [-4.2903e-03,  3.5158e-03,  2.3865e-03,  ..., -1.0911e-03,
         -9.3799e-03, -2.7030e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 27%|██▋       | 263/965 [08:19<23:16,  1.99s/it]

tensor([[-0.0016, -0.0050,  0.0067,  ...,  0.0135, -0.0074, -0.0197],
        [ 0.0053, -0.0110,  0.0124,  ...,  0.0009,  0.0042, -0.0057],
        [-0.0147, -0.0206, -0.0076,  ...,  0.0097,  0.0050, -0.0066],
        ...,
        [-0.0029,  0.0005,  0.0040,  ..., -0.0134, -0.0095,  0.0082],
        [ 0.0058, -0.0018,  0.0088,  ..., -0.0014, -0.0087, -0.0021],
        [-0.0064,  0.0064, -0.0030,  ..., -0.0022, -0.0149, -0.0014]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 27%|██▋       | 264/965 [08:20<22:26,  1.92s/it]

tensor([[-2.2466e-03,  1.9599e-03,  3.3952e-03,  ..., -1.1081e-03,
         -7.7232e-03,  4.8816e-03],
        [-6.8525e-03,  2.4636e-03, -8.0217e-03,  ...,  1.3852e-02,
         -1.2144e-02, -2.0366e-03],
        [ 6.8862e-05,  1.8159e-02,  1.1032e-03,  ...,  5.0053e-04,
         -1.3439e-02,  9.5312e-03],
        ...,
        [-4.2797e-03, -4.5876e-04,  4.2978e-03,  ...,  3.6752e-03,
         -9.9281e-03, -3.3542e-03],
        [-7.0624e-03,  6.0385e-03, -6.5519e-03,  ...,  4.1499e-03,
         -1.2663e-02,  9.5454e-03],
        [-2.2430e-02,  3.7134e-03,  2.2415e-02,  ..., -8.8259e-03,
         -2.2433e-02,  2.1571e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 27%|██▋       | 265/965 [08:23<23:39,  2.03s/it]

tensor([[-0.0081, -0.0017,  0.0111,  ...,  0.0004, -0.0164,  0.0051],
        [-0.0041, -0.0021,  0.0031,  ..., -0.0025, -0.0033, -0.0015],
        [ 0.0021,  0.0048,  0.0076,  ..., -0.0030, -0.0049,  0.0066],
        ...,
        [-0.0001,  0.0048,  0.0043,  ..., -0.0124, -0.0153,  0.0073],
        [ 0.0073,  0.0045,  0.0091,  ..., -0.0080, -0.0060,  0.0043],
        [-0.0025,  0.0007,  0.0023,  ..., -0.0028,  0.0014,  0.0203]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 28%|██▊       | 266/965 [08:25<23:11,  1.99s/it]

tensor([[ 0.0108, -0.0022,  0.0024,  ..., -0.0088, -0.0145,  0.0022],
        [-0.0033, -0.0002,  0.0079,  ...,  0.0060, -0.0100, -0.0060],
        [ 0.0051,  0.0046, -0.0032,  ..., -0.0057, -0.0090,  0.0097],
        ...,
        [ 0.0177, -0.0019, -0.0033,  ...,  0.0004, -0.0081, -0.0007],
        [ 0.0059,  0.0068,  0.0089,  ..., -0.0084, -0.0065,  0.0132],
        [ 0.0031, -0.0006,  0.0093,  ..., -0.0048,  0.0035, -0.0018]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 28%|██▊       | 267/965 [08:27<23:14,  2.00s/it]

tensor([[ 1.1100e-03, -2.0474e-03,  6.4408e-03,  ...,  1.9302e-03,
         -7.1054e-03,  1.2095e-02],
        [-6.9448e-03, -3.9628e-03, -8.5536e-03,  ...,  1.2686e-02,
         -9.1689e-03,  3.0405e-03],
        [-1.9457e-03,  9.1529e-03, -3.4924e-03,  ..., -4.6447e-03,
         -2.3619e-03,  1.6554e-02],
        ...,
        [-1.3188e-02, -4.2345e-05, -3.4797e-03,  ..., -6.1146e-03,
         -2.0858e-02,  6.1223e-03],
        [ 4.6562e-05,  6.6548e-03, -6.8457e-03,  ..., -5.0055e-03,
         -4.6296e-03,  1.2950e-02],
        [-2.4249e-03,  1.0137e-02, -4.3067e-03,  ..., -1.2647e-02,
         -1.4963e-02,  2.0156e-02]], device='cuda:0', grad_fn=<AddmmBackward0>)


 28%|██▊       | 268/965 [08:29<23:57,  2.06s/it]

tensor([[-4.6006e-03, -4.4150e-03,  9.1642e-03,  ..., -8.9341e-03,
         -2.8839e-03, -4.1580e-03],
        [-6.8349e-04, -1.1305e-02, -2.4400e-03,  ...,  1.5422e-02,
         -7.9535e-03, -8.0219e-05],
        [ 1.2836e-03,  7.0943e-03,  2.8409e-03,  ..., -1.5188e-04,
         -4.3648e-03,  7.0831e-03],
        ...,
        [ 1.0169e-02, -4.0049e-03,  1.3969e-03,  ..., -6.5022e-03,
         -1.8550e-03,  1.7058e-03],
        [ 9.0684e-03,  1.0663e-02, -2.0318e-03,  ..., -1.1438e-02,
         -1.3135e-02,  6.4748e-03],
        [ 1.6128e-03,  1.0755e-03, -2.5960e-04,  ...,  1.7376e-03,
         -7.8304e-03,  5.9127e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 28%|██▊       | 269/965 [08:31<23:29,  2.03s/it]

tensor([[ 6.9071e-04, -5.6114e-05, -2.4669e-03,  ..., -1.7651e-03,
         -1.0573e-02,  3.7962e-03],
        [-2.0612e-03,  1.4485e-03, -2.6765e-03,  ..., -8.6449e-03,
         -8.3406e-03,  8.1481e-03],
        [-7.2848e-03, -4.7382e-04, -2.2761e-03,  ..., -1.4477e-02,
         -2.3063e-02,  9.5582e-03],
        ...,
        [ 1.6490e-03, -1.6441e-03,  5.9759e-03,  ..., -4.6407e-03,
         -1.6702e-02, -2.5590e-04],
        [-8.9865e-03, -1.3262e-02, -5.2233e-03,  ..., -4.5766e-03,
         -2.6031e-02, -4.4397e-03],
        [ 1.4110e-03,  4.1405e-03, -9.7578e-04,  ...,  9.6230e-03,
         -1.4289e-02,  5.0519e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 28%|██▊       | 270/965 [08:33<22:49,  1.97s/it]

tensor([[-0.0069,  0.0054,  0.0178,  ..., -0.0105, -0.0072, -0.0083],
        [-0.0078,  0.0055,  0.0067,  ...,  0.0039, -0.0170, -0.0015],
        [ 0.0026, -0.0014, -0.0027,  ..., -0.0018, -0.0101, -0.0022],
        ...,
        [ 0.0025,  0.0018, -0.0045,  ..., -0.0027, -0.0102,  0.0011],
        [-0.0043, -0.0039,  0.0008,  ...,  0.0032, -0.0059,  0.0034],
        [ 0.0021, -0.0014, -0.0156,  ..., -0.0167, -0.0319, -0.0019]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 28%|██▊       | 271/965 [08:34<22:28,  1.94s/it]

tensor([[-5.4500e-03,  1.7402e-02, -9.6530e-03,  ..., -8.1285e-03,
         -1.4477e-02,  6.7768e-03],
        [-5.5810e-03, -5.6671e-03,  2.1712e-03,  ...,  1.9887e-04,
         -1.4439e-02,  3.3637e-03],
        [ 4.7841e-03, -5.3654e-03, -8.4689e-03,  ...,  2.7394e-03,
         -6.2989e-03, -1.0473e-02],
        ...,
        [ 1.4472e-03, -5.7390e-03, -1.0393e-02,  ...,  5.5472e-03,
         -1.0339e-02, -1.0502e-02],
        [ 8.6139e-03,  2.6015e-03, -7.3414e-04,  ..., -1.5994e-03,
         -1.4465e-02, -3.4247e-03],
        [-8.6954e-03,  1.5123e-03, -8.7505e-03,  ..., -7.7782e-03,
         -2.2001e-02, -1.6659e-05]], device='cuda:0', grad_fn=<AddmmBackward0>)


 28%|██▊       | 272/965 [08:36<21:47,  1.89s/it]

tensor([[-0.0039,  0.0099, -0.0063,  ..., -0.0029,  0.0034,  0.0063],
        [ 0.0005,  0.0035, -0.0128,  ..., -0.0098,  0.0054, -0.0002],
        [-0.0120,  0.0104,  0.0028,  ..., -0.0135, -0.0080,  0.0052],
        ...,
        [ 0.0004,  0.0067, -0.0121,  ...,  0.0044, -0.0185,  0.0024],
        [ 0.0055, -0.0038,  0.0140,  ..., -0.0061,  0.0083,  0.0024],
        [-0.0007, -0.0003, -0.0014,  ...,  0.0062, -0.0049,  0.0033]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 28%|██▊       | 273/965 [08:38<21:02,  1.82s/it]

tensor([[ 0.0045, -0.0040, -0.0007,  ...,  0.0003, -0.0064, -0.0039],
        [ 0.0040,  0.0118, -0.0016,  ..., -0.0103,  0.0012,  0.0036],
        [-0.0022, -0.0007,  0.0028,  ...,  0.0090, -0.0139, -0.0079],
        ...,
        [-0.0026,  0.0072,  0.0115,  ..., -0.0050,  0.0027,  0.0081],
        [-0.0023, -0.0080,  0.0081,  ..., -0.0086, -0.0252, -0.0061],
        [ 0.0023,  0.0027, -0.0007,  ..., -0.0040, -0.0058, -0.0050]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 28%|██▊       | 274/965 [08:40<20:31,  1.78s/it]

tensor([[-0.0048,  0.0005,  0.0027,  ..., -0.0082, -0.0081, -0.0049],
        [ 0.0064, -0.0031,  0.0020,  ...,  0.0008, -0.0084, -0.0005],
        [-0.0021, -0.0014,  0.0030,  ..., -0.0095, -0.0035, -0.0018],
        ...,
        [-0.0073,  0.0007,  0.0007,  ..., -0.0019, -0.0094,  0.0051],
        [ 0.0036,  0.0032,  0.0102,  ..., -0.0047,  0.0021,  0.0073],
        [-0.0119,  0.0144, -0.0121,  ...,  0.0062, -0.0248,  0.0173]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 28%|██▊       | 275/965 [08:42<20:59,  1.83s/it]

tensor([[-0.0110,  0.0038,  0.0031,  ...,  0.0123, -0.0126, -0.0003],
        [-0.0040, -0.0022, -0.0006,  ..., -0.0067, -0.0062, -0.0019],
        [ 0.0075,  0.0106, -0.0049,  ..., -0.0155, -0.0035,  0.0033],
        ...,
        [-0.0037, -0.0042,  0.0048,  ...,  0.0003, -0.0054,  0.0046],
        [ 0.0031,  0.0030, -0.0027,  ...,  0.0070, -0.0089,  0.0043],
        [ 0.0041,  0.0022, -0.0003,  ..., -0.0079, -0.0250,  0.0032]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 29%|██▊       | 276/965 [08:43<20:57,  1.83s/it]

tensor([[-0.0011,  0.0072, -0.0118,  ..., -0.0065, -0.0099,  0.0081],
        [-0.0056,  0.0249, -0.0121,  ..., -0.0036, -0.0239,  0.0195],
        [ 0.0037, -0.0044,  0.0018,  ..., -0.0012, -0.0134,  0.0059],
        ...,
        [ 0.0038, -0.0096,  0.0063,  ..., -0.0065, -0.0020, -0.0012],
        [-0.0005,  0.0032, -0.0055,  ..., -0.0041, -0.0055,  0.0076],
        [ 0.0042, -0.0027,  0.0018,  ...,  0.0005, -0.0064, -0.0012]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 29%|██▊       | 277/965 [08:45<21:19,  1.86s/it]

tensor([[-7.7758e-03,  1.4468e-02, -1.6946e-02,  ...,  9.6047e-03,
         -1.5599e-02,  1.9038e-03],
        [ 9.4375e-03,  1.7819e-02, -9.1519e-03,  ..., -4.8392e-06,
         -2.1958e-02,  1.1068e-02],
        [-1.2599e-04, -1.8134e-02,  2.2430e-02,  ..., -6.1835e-03,
         -2.7617e-03, -1.1909e-02],
        ...,
        [ 8.6510e-03,  5.7573e-03, -2.9730e-03,  ..., -9.7031e-03,
         -9.2426e-03,  7.2180e-03],
        [ 6.0294e-05, -7.1488e-03,  2.4817e-03,  ..., -5.4916e-03,
         -6.8382e-03, -6.1372e-04],
        [ 2.3333e-03,  5.9216e-03, -1.6168e-03,  ..., -2.3810e-03,
         -1.0474e-02,  2.3652e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 29%|██▉       | 278/965 [08:47<20:41,  1.81s/it]

tensor([[ 0.0098,  0.0006, -0.0021,  ..., -0.0118, -0.0038, -0.0016],
        [ 0.0013,  0.0060, -0.0091,  ..., -0.0105, -0.0028,  0.0073],
        [ 0.0055,  0.0015, -0.0037,  ..., -0.0016, -0.0105, -0.0091],
        ...,
        [-0.0003,  0.0053, -0.0013,  ..., -0.0088, -0.0154,  0.0097],
        [-0.0030, -0.0041, -0.0012,  ..., -0.0062, -0.0070, -0.0053],
        [-0.0003,  0.0071,  0.0006,  ..., -0.0068, -0.0105,  0.0058]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 29%|██▉       | 279/965 [08:49<21:46,  1.91s/it]

tensor([[-0.0033,  0.0128, -0.0107,  ..., -0.0051, -0.0202,  0.0087],
        [-0.0124,  0.0087,  0.0023,  ..., -0.0010, -0.0289, -0.0050],
        [ 0.0004,  0.0093, -0.0029,  ...,  0.0014, -0.0159,  0.0021],
        ...,
        [-0.0080,  0.0112, -0.0026,  ...,  0.0053, -0.0160, -0.0049],
        [ 0.0074,  0.0050,  0.0018,  ..., -0.0029,  0.0025, -0.0076],
        [ 0.0031,  0.0049, -0.0030,  ...,  0.0028, -0.0124,  0.0075]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 29%|██▉       | 280/965 [08:51<21:17,  1.87s/it]

tensor([[ 0.0083, -0.0007,  0.0014,  ...,  0.0084, -0.0084, -0.0023],
        [ 0.0051,  0.0059,  0.0087,  ..., -0.0120, -0.0008, -0.0024],
        [ 0.0082,  0.0062,  0.0037,  ...,  0.0050, -0.0053, -0.0072],
        ...,
        [ 0.0062,  0.0125, -0.0027,  ..., -0.0100,  0.0011,  0.0078],
        [ 0.0010,  0.0054,  0.0119,  ..., -0.0021, -0.0134, -0.0085],
        [ 0.0100,  0.0091, -0.0008,  ..., -0.0023, -0.0113, -0.0005]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 29%|██▉       | 281/965 [08:53<20:54,  1.83s/it]

tensor([[ 0.0108, -0.0048,  0.0029,  ..., -0.0005,  0.0062, -0.0011],
        [ 0.0002,  0.0029, -0.0155,  ..., -0.0024, -0.0019, -0.0012],
        [ 0.0056,  0.0011,  0.0012,  ..., -0.0062, -0.0035,  0.0037],
        ...,
        [-0.0047, -0.0117,  0.0154,  ...,  0.0026, -0.0167, -0.0149],
        [ 0.0053, -0.0053,  0.0013,  ...,  0.0004, -0.0068, -0.0072],
        [-0.0026, -0.0035, -0.0061,  ..., -0.0169, -0.0193,  0.0034]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 29%|██▉       | 282/965 [08:54<20:45,  1.82s/it]

tensor([[-4.3378e-03, -6.1035e-03,  6.2873e-03,  ..., -3.4654e-03,
         -4.5423e-03, -1.9036e-03],
        [ 4.5393e-03, -3.3541e-03, -4.6026e-03,  ..., -2.8437e-03,
         -1.0141e-02, -2.2433e-03],
        [-2.5304e-03,  8.0690e-03, -7.4538e-03,  ...,  4.0360e-03,
         -1.7549e-02,  6.8657e-03],
        ...,
        [-1.6265e-04, -7.9043e-03,  8.9321e-04,  ..., -1.4972e-02,
         -5.3257e-03, -5.4082e-03],
        [ 1.1534e-02,  8.9242e-03,  3.5117e-03,  ..., -2.9071e-03,
         -4.9466e-03, -5.5800e-03],
        [-3.0367e-03,  3.5256e-03, -3.0773e-03,  ..., -1.3295e-02,
          3.9198e-05,  3.3105e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 29%|██▉       | 283/965 [08:57<22:08,  1.95s/it]

tensor([[-0.0027, -0.0002, -0.0015,  ..., -0.0060, -0.0130,  0.0029],
        [-0.0018,  0.0016, -0.0041,  ...,  0.0043, -0.0118,  0.0060],
        [-0.0002, -0.0048, -0.0043,  ...,  0.0038, -0.0107,  0.0041],
        ...,
        [ 0.0013, -0.0004,  0.0045,  ...,  0.0047, -0.0047,  0.0009],
        [ 0.0092,  0.0126,  0.0012,  ...,  0.0018, -0.0046,  0.0059],
        [ 0.0037, -0.0065,  0.0015,  ...,  0.0097, -0.0062, -0.0046]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 29%|██▉       | 284/965 [08:59<22:35,  1.99s/it]

tensor([[-0.0050,  0.0136, -0.0154,  ...,  0.0081, -0.0154,  0.0084],
        [-0.0040, -0.0103, -0.0016,  ...,  0.0038,  0.0014, -0.0037],
        [-0.0129, -0.0020, -0.0131,  ...,  0.0016, -0.0036,  0.0108],
        ...,
        [ 0.0061, -0.0031,  0.0019,  ..., -0.0024, -0.0046, -0.0044],
        [ 0.0019, -0.0033,  0.0014,  ...,  0.0010, -0.0083, -0.0041],
        [-0.0049,  0.0031,  0.0117,  ..., -0.0043, -0.0193,  0.0028]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 30%|██▉       | 285/965 [09:01<21:49,  1.93s/it]

tensor([[-0.0003, -0.0163,  0.0008,  ...,  0.0093,  0.0031, -0.0068],
        [ 0.0031,  0.0004, -0.0144,  ...,  0.0030, -0.0029,  0.0005],
        [ 0.0057,  0.0033,  0.0062,  ...,  0.0029, -0.0127,  0.0003],
        ...,
        [-0.0101, -0.0117,  0.0141,  ...,  0.0052, -0.0056, -0.0132],
        [-0.0052, -0.0157, -0.0010,  ...,  0.0224, -0.0109, -0.0202],
        [-0.0053, -0.0142, -0.0045,  ...,  0.0035, -0.0230, -0.0057]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 30%|██▉       | 286/965 [09:03<22:00,  1.95s/it]

tensor([[-0.0049,  0.0061, -0.0075,  ..., -0.0085, -0.0114,  0.0039],
        [-0.0052, -0.0113, -0.0068,  ...,  0.0132, -0.0012, -0.0108],
        [-0.0031, -0.0031,  0.0064,  ..., -0.0027, -0.0011,  0.0084],
        ...,
        [-0.0027,  0.0206,  0.0076,  ..., -0.0053, -0.0193,  0.0155],
        [ 0.0085, -0.0034,  0.0015,  ..., -0.0014, -0.0115, -0.0036],
        [-0.0146,  0.0038, -0.0053,  ...,  0.0039, -0.0103,  0.0166]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 30%|██▉       | 287/965 [09:04<22:00,  1.95s/it]

tensor([[-0.0005, -0.0085,  0.0042,  ..., -0.0084, -0.0063, -0.0090],
        [-0.0030,  0.0025,  0.0009,  ..., -0.0030, -0.0100,  0.0035],
        [ 0.0058, -0.0041,  0.0072,  ...,  0.0003, -0.0095, -0.0022],
        ...,
        [ 0.0043, -0.0018,  0.0001,  ..., -0.0047, -0.0081, -0.0025],
        [-0.0134,  0.0119,  0.0029,  ..., -0.0041, -0.0167,  0.0043],
        [-0.0032,  0.0037, -0.0090,  ..., -0.0176, -0.0126,  0.0077]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 30%|██▉       | 288/965 [09:06<21:25,  1.90s/it]

tensor([[-0.0104,  0.0147, -0.0177,  ..., -0.0143, -0.0072,  0.0217],
        [ 0.0018,  0.0085, -0.0036,  ..., -0.0142, -0.0044,  0.0080],
        [ 0.0024, -0.0044,  0.0005,  ..., -0.0112, -0.0096, -0.0036],
        ...,
        [ 0.0058, -0.0002,  0.0039,  ..., -0.0026, -0.0080,  0.0001],
        [-0.0006, -0.0034, -0.0033,  ...,  0.0029,  0.0001, -0.0006],
        [-0.0002,  0.0025,  0.0022,  ..., -0.0079, -0.0063,  0.0080]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 30%|██▉       | 289/965 [09:08<21:56,  1.95s/it]

tensor([[ 0.0075,  0.0042,  0.0054,  ...,  0.0017, -0.0108, -0.0012],
        [-0.0013, -0.0064, -0.0060,  ..., -0.0199, -0.0181, -0.0012],
        [-0.0041,  0.0056,  0.0007,  ..., -0.0026, -0.0078,  0.0057],
        ...,
        [ 0.0201,  0.0016,  0.0067,  ...,  0.0068, -0.0103, -0.0087],
        [-0.0061,  0.0100, -0.0020,  ..., -0.0198, -0.0101,  0.0140],
        [-0.0091,  0.0173, -0.0185,  ..., -0.0056, -0.0062,  0.0147]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 30%|███       | 290/965 [09:10<21:20,  1.90s/it]

tensor([[-4.2059e-05,  7.8210e-03, -6.7517e-03,  ..., -2.2316e-02,
         -2.2378e-02,  1.0680e-02],
        [ 7.8477e-03,  1.5342e-03, -1.6175e-03,  ..., -7.8347e-03,
         -6.1071e-03,  7.6012e-03],
        [-3.4839e-03,  8.8403e-03, -3.6853e-03,  ..., -2.2176e-02,
         -9.2625e-03,  4.4971e-03],
        ...,
        [ 5.1179e-03,  5.6431e-03, -2.6654e-03,  ..., -1.0793e-02,
         -8.4375e-03,  4.2974e-03],
        [ 7.2641e-03, -2.4230e-03,  8.0143e-04,  ...,  2.0791e-03,
         -1.2561e-03, -2.9397e-03],
        [ 8.0399e-03, -1.7431e-03, -1.6437e-03,  ..., -4.6838e-03,
         -5.5403e-03, -3.7081e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 30%|███       | 291/965 [09:12<20:41,  1.84s/it]

tensor([[-0.0022,  0.0029,  0.0012,  ...,  0.0021, -0.0022,  0.0064],
        [-0.0003,  0.0125, -0.0038,  ..., -0.0070,  0.0019,  0.0115],
        [-0.0051,  0.0086, -0.0036,  ..., -0.0061, -0.0133,  0.0049],
        ...,
        [-0.0139,  0.0062, -0.0145,  ...,  0.0134, -0.0186,  0.0021],
        [-0.0049,  0.0156, -0.0137,  ..., -0.0024, -0.0085,  0.0137],
        [ 0.0052, -0.0035, -0.0011,  ..., -0.0029,  0.0018, -0.0042]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 30%|███       | 292/965 [09:14<20:26,  1.82s/it]

tensor([[ 1.2430e-03, -1.2038e-02, -5.7845e-03,  ..., -1.8425e-03,
         -3.5610e-03, -1.1144e-02],
        [-6.1438e-03, -6.2355e-03, -7.0193e-03,  ...,  6.0268e-03,
         -1.0017e-02,  1.4931e-03],
        [-1.8083e-03,  5.5085e-03, -1.1161e-02,  ..., -4.2220e-03,
         -2.0820e-03,  7.8627e-03],
        ...,
        [-8.0893e-03, -8.6876e-03,  5.7091e-05,  ..., -4.1514e-03,
         -9.2194e-03, -1.7745e-03],
        [-3.2704e-03, -7.2924e-03, -7.4335e-03,  ...,  1.2231e-02,
         -5.6700e-03, -5.1852e-03],
        [-2.6996e-03, -7.3116e-04, -5.7797e-03,  ...,  6.1048e-03,
         -2.8314e-03, -3.8405e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 30%|███       | 293/965 [09:16<21:16,  1.90s/it]

tensor([[-0.0178, -0.0094, -0.0123,  ..., -0.0077, -0.0237, -0.0074],
        [-0.0028,  0.0109,  0.0049,  ..., -0.0047, -0.0037,  0.0076],
        [-0.0015, -0.0042, -0.0010,  ...,  0.0082, -0.0034,  0.0005],
        ...,
        [ 0.0056, -0.0088, -0.0131,  ..., -0.0055, -0.0049, -0.0137],
        [-0.0120, -0.0094,  0.0121,  ...,  0.0087, -0.0184, -0.0147],
        [ 0.0039, -0.0067,  0.0041,  ...,  0.0067,  0.0024, -0.0130]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 30%|███       | 294/965 [09:18<21:03,  1.88s/it]

tensor([[-0.0108, -0.0006,  0.0047,  ..., -0.0068, -0.0064,  0.0061],
        [ 0.0041,  0.0022,  0.0075,  ...,  0.0026, -0.0132,  0.0102],
        [-0.0017,  0.0097,  0.0004,  ..., -0.0091, -0.0265,  0.0059],
        ...,
        [ 0.0008,  0.0025, -0.0015,  ...,  0.0074, -0.0010,  0.0085],
        [-0.0181, -0.0225,  0.0133,  ...,  0.0252, -0.0088, -0.0055],
        [-0.0065,  0.0185, -0.0045,  ..., -0.0086, -0.0326, -0.0017]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 31%|███       | 295/965 [09:19<20:23,  1.83s/it]

tensor([[-0.0031, -0.0113,  0.0103,  ...,  0.0090, -0.0073, -0.0222],
        [-0.0014, -0.0099,  0.0064,  ...,  0.0121, -0.0084, -0.0143],
        [ 0.0072, -0.0060,  0.0010,  ...,  0.0056, -0.0087, -0.0040],
        ...,
        [ 0.0035, -0.0009,  0.0037,  ..., -0.0111, -0.0184,  0.0020],
        [ 0.0108,  0.0097, -0.0053,  ..., -0.0108, -0.0110,  0.0030],
        [-0.0024,  0.0103, -0.0018,  ..., -0.0196, -0.0242,  0.0057]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 31%|███       | 296/965 [09:21<20:29,  1.84s/it]

tensor([[-0.0025, -0.0124,  0.0019,  ...,  0.0098, -0.0061, -0.0041],
        [ 0.0024, -0.0109,  0.0051,  ..., -0.0021, -0.0114, -0.0083],
        [ 0.0118,  0.0167, -0.0094,  ..., -0.0036, -0.0199, -0.0063],
        ...,
        [ 0.0004, -0.0064, -0.0014,  ...,  0.0055, -0.0112,  0.0019],
        [ 0.0007, -0.0073,  0.0104,  ...,  0.0129, -0.0058, -0.0079],
        [ 0.0006,  0.0040,  0.0003,  ..., -0.0203, -0.0146,  0.0014]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 31%|███       | 297/965 [09:23<21:16,  1.91s/it]

tensor([[ 0.0044,  0.0063,  0.0110,  ..., -0.0148, -0.0100, -0.0038],
        [ 0.0092,  0.0094,  0.0023,  ..., -0.0067, -0.0173,  0.0054],
        [ 0.0020, -0.0022, -0.0027,  ..., -0.0118, -0.0179,  0.0072],
        ...,
        [ 0.0039,  0.0176, -0.0038,  ..., -0.0137, -0.0188,  0.0154],
        [-0.0043,  0.0069, -0.0023,  ..., -0.0053, -0.0104,  0.0092],
        [ 0.0047, -0.0136,  0.0064,  ..., -0.0036,  0.0042, -0.0103]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 31%|███       | 298/965 [09:25<21:49,  1.96s/it]

tensor([[-0.0075,  0.0075,  0.0089,  ..., -0.0106, -0.0172,  0.0036],
        [ 0.0056,  0.0123, -0.0026,  ..., -0.0064, -0.0112,  0.0129],
        [-0.0061, -0.0028,  0.0029,  ..., -0.0031, -0.0052, -0.0072],
        ...,
        [ 0.0033,  0.0015, -0.0069,  ...,  0.0023, -0.0161, -0.0020],
        [-0.0027,  0.0161, -0.0068,  ..., -0.0084, -0.0184,  0.0164],
        [-0.0032,  0.0055,  0.0023,  ...,  0.0013, -0.0117,  0.0028]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 31%|███       | 299/965 [09:27<21:31,  1.94s/it]

tensor([[-0.0008,  0.0105, -0.0001,  ..., -0.0056, -0.0016,  0.0127],
        [-0.0023,  0.0107, -0.0010,  ..., -0.0112, -0.0124,  0.0023],
        [-0.0053, -0.0120,  0.0094,  ..., -0.0052, -0.0191, -0.0013],
        ...,
        [-0.0122,  0.0084,  0.0010,  ..., -0.0134, -0.0179, -0.0047],
        [-0.0167, -0.0059,  0.0080,  ..., -0.0019, -0.0255, -0.0088],
        [ 0.0059, -0.0027,  0.0090,  ..., -0.0117, -0.0055, -0.0026]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 31%|███       | 300/965 [09:29<20:43,  1.87s/it]

tensor([[-0.0083,  0.0098, -0.0042,  ..., -0.0038, -0.0126,  0.0035],
        [ 0.0078, -0.0252, -0.0041,  ...,  0.0366, -0.0142, -0.0157],
        [-0.0024,  0.0114,  0.0028,  ..., -0.0081,  0.0042,  0.0100],
        ...,
        [-0.0066,  0.0092,  0.0128,  ...,  0.0041, -0.0028, -0.0093],
        [-0.0045,  0.0004,  0.0035,  ...,  0.0072, -0.0131, -0.0074],
        [-0.0112,  0.0084, -0.0040,  ..., -0.0155, -0.0336,  0.0123]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 31%|███       | 301/965 [09:31<20:05,  1.82s/it]

tensor([[ 0.0002, -0.0027,  0.0051,  ...,  0.0039,  0.0049,  0.0005],
        [ 0.0112, -0.0068,  0.0083,  ...,  0.0134, -0.0057, -0.0082],
        [-0.0060, -0.0004,  0.0116,  ..., -0.0007, -0.0075, -0.0115],
        ...,
        [ 0.0023, -0.0028,  0.0081,  ...,  0.0028,  0.0061,  0.0009],
        [-0.0032, -0.0100,  0.0022,  ..., -0.0009, -0.0028, -0.0132],
        [-0.0103,  0.0003,  0.0138,  ...,  0.0121, -0.0014, -0.0134]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 31%|███▏      | 302/965 [09:32<19:34,  1.77s/it]

tensor([[ 2.7499e-03, -9.0802e-03, -5.2201e-03,  ...,  8.3118e-03,
         -6.7790e-03, -7.8449e-03],
        [-1.2494e-02, -1.1029e-02, -1.3059e-02,  ..., -2.0198e-02,
         -1.7082e-02, -3.0238e-03],
        [-5.3904e-03,  5.7614e-03,  3.7147e-03,  ..., -1.1060e-02,
         -6.6932e-03,  9.0763e-03],
        ...,
        [-2.3555e-05, -1.6293e-03,  1.5543e-03,  ...,  3.4875e-03,
          5.7063e-04, -1.6131e-03],
        [-5.8023e-04,  2.5559e-03,  2.5231e-03,  ...,  3.3416e-03,
          2.5894e-04,  2.6194e-03],
        [-4.0956e-03,  1.6712e-02, -8.2298e-03,  ..., -1.2822e-02,
          6.0874e-03,  1.3900e-02]], device='cuda:0', grad_fn=<AddmmBackward0>)


 31%|███▏      | 303/965 [09:34<19:57,  1.81s/it]

tensor([[-2.1405e-03,  2.6318e-03, -1.5822e-03,  ..., -2.4708e-03,
         -6.3447e-03,  1.0315e-02],
        [-1.3669e-02, -7.0590e-04, -1.3423e-03,  ...,  7.4039e-03,
         -6.9810e-03,  7.0296e-04],
        [-2.6289e-03,  1.9462e-02, -1.5361e-02,  ...,  1.0278e-02,
         -1.8221e-02,  3.8227e-04],
        ...,
        [ 3.8208e-03,  4.3505e-03, -1.2252e-02,  ...,  3.1926e-03,
         -5.6642e-03, -4.7698e-04],
        [ 7.4986e-03,  7.8294e-03,  8.9253e-03,  ..., -2.9307e-03,
          3.2137e-03,  7.7362e-03],
        [ 7.5372e-03,  3.2566e-05,  1.0349e-02,  ..., -4.0079e-04,
         -9.6565e-03, -5.0316e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 32%|███▏      | 304/965 [09:36<20:07,  1.83s/it]

tensor([[ 0.0045,  0.0039, -0.0012,  ..., -0.0075, -0.0071,  0.0025],
        [ 0.0029,  0.0094, -0.0052,  ..., -0.0056, -0.0066,  0.0112],
        [-0.0072,  0.0085,  0.0003,  ..., -0.0084, -0.0080,  0.0077],
        ...,
        [ 0.0095,  0.0060,  0.0116,  ...,  0.0008, -0.0011,  0.0015],
        [ 0.0018,  0.0044, -0.0021,  ..., -0.0026,  0.0075,  0.0127],
        [-0.0050,  0.0009,  0.0076,  ...,  0.0065, -0.0062, -0.0027]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 32%|███▏      | 305/965 [09:38<19:18,  1.76s/it]

tensor([[-0.0103,  0.0067, -0.0011,  ..., -0.0056, -0.0221,  0.0113],
        [-0.0058,  0.0056,  0.0010,  ...,  0.0015,  0.0032,  0.0104],
        [ 0.0060,  0.0218, -0.0022,  ..., -0.0122, -0.0159,  0.0105],
        ...,
        [ 0.0125,  0.0125,  0.0017,  ...,  0.0047, -0.0161,  0.0046],
        [-0.0125, -0.0038, -0.0105,  ...,  0.0051, -0.0016,  0.0026],
        [-0.0009, -0.0052,  0.0048,  ...,  0.0019, -0.0045, -0.0104]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 32%|███▏      | 306/965 [09:39<19:25,  1.77s/it]

tensor([[-8.7892e-03,  6.0954e-03,  1.3374e-03,  ..., -5.0662e-03,
         -6.9210e-03,  8.7097e-03],
        [ 6.2308e-03, -9.8883e-03,  1.2414e-02,  ...,  6.0090e-03,
         -1.1370e-02, -1.1563e-02],
        [-1.2751e-03, -7.4046e-03,  6.7962e-03,  ..., -6.8599e-03,
         -1.5851e-02, -5.9837e-03],
        ...,
        [ 3.6123e-03,  2.8478e-03,  5.7793e-03,  ..., -2.1519e-03,
         -2.3834e-05,  2.5138e-03],
        [-3.9630e-03, -2.5470e-03, -6.8562e-03,  ...,  2.3033e-03,
         -8.7387e-03, -2.0809e-04],
        [-4.4245e-03,  1.6919e-02, -5.5366e-03,  ..., -1.2378e-02,
          1.1405e-04,  1.4459e-02]], device='cuda:0', grad_fn=<AddmmBackward0>)


 32%|███▏      | 307/965 [09:41<20:04,  1.83s/it]

tensor([[-0.0052, -0.0183,  0.0155,  ..., -0.0007, -0.0076, -0.0078],
        [ 0.0008,  0.0132,  0.0026,  ...,  0.0015, -0.0089, -0.0062],
        [-0.0001, -0.0053, -0.0028,  ...,  0.0001,  0.0015,  0.0022],
        ...,
        [ 0.0039, -0.0065, -0.0009,  ...,  0.0097, -0.0067,  0.0013],
        [-0.0072,  0.0005, -0.0024,  ...,  0.0093, -0.0069,  0.0028],
        [-0.0012, -0.0047, -0.0017,  ...,  0.0072, -0.0043,  0.0048]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 32%|███▏      | 308/965 [09:43<20:13,  1.85s/it]

tensor([[-0.0008,  0.0146, -0.0075,  ..., -0.0110, -0.0036,  0.0112],
        [ 0.0058,  0.0028, -0.0017,  ...,  0.0040, -0.0121,  0.0047],
        [ 0.0025, -0.0035,  0.0070,  ...,  0.0016, -0.0032,  0.0003],
        ...,
        [ 0.0088,  0.0180, -0.0012,  ..., -0.0141, -0.0221,  0.0113],
        [ 0.0044, -0.0008, -0.0002,  ...,  0.0040, -0.0095, -0.0016],
        [-0.0175,  0.0040,  0.0059,  ...,  0.0070, -0.0116,  0.0016]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 32%|███▏      | 309/965 [09:45<19:04,  1.75s/it]

tensor([[-0.0013,  0.0038, -0.0014,  ..., -0.0116, -0.0049, -0.0012],
        [ 0.0030, -0.0061, -0.0001,  ...,  0.0032, -0.0034, -0.0037],
        [-0.0061,  0.0127, -0.0027,  ..., -0.0019, -0.0157,  0.0015],
        ...,
        [ 0.0053,  0.0123,  0.0081,  ..., -0.0037,  0.0034,  0.0076],
        [ 0.0028, -0.0054,  0.0060,  ...,  0.0014, -0.0022, -0.0021],
        [ 0.0015,  0.0164,  0.0025,  ..., -0.0099, -0.0031,  0.0199]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 32%|███▏      | 310/965 [09:46<18:51,  1.73s/it]

tensor([[ 0.0002,  0.0018,  0.0027,  ..., -0.0032, -0.0094,  0.0026],
        [-0.0040,  0.0121, -0.0055,  ..., -0.0142, -0.0330,  0.0051],
        [ 0.0012,  0.0137,  0.0011,  ..., -0.0005, -0.0112, -0.0013],
        ...,
        [ 0.0062,  0.0081, -0.0092,  ..., -0.0039, -0.0079,  0.0058],
        [ 0.0092,  0.0092,  0.0022,  ..., -0.0107, -0.0041,  0.0069],
        [-0.0001,  0.0039,  0.0003,  ..., -0.0005, -0.0072, -0.0004]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 32%|███▏      | 311/965 [09:48<19:49,  1.82s/it]

tensor([[ 1.1957e-03, -3.5723e-04,  1.2151e-02,  ..., -6.6329e-03,
         -9.3231e-03, -3.3717e-03],
        [-7.1133e-03,  1.1756e-02,  4.1918e-03,  ..., -2.7881e-03,
         -9.1396e-03,  8.9204e-03],
        [-8.3878e-03,  6.8396e-03, -1.1816e-02,  ...,  1.1564e-03,
         -1.4483e-02,  7.7236e-03],
        ...,
        [ 1.7813e-03,  5.3965e-05,  5.2366e-03,  ..., -4.7697e-03,
          2.3047e-03, -2.9228e-03],
        [-1.8594e-04,  1.1207e-02,  1.1987e-02,  ..., -5.8665e-03,
         -1.6095e-02,  1.5967e-02],
        [-1.2619e-02, -1.7331e-02,  9.5493e-03,  ...,  1.1855e-02,
         -1.3115e-02, -1.5736e-02]], device='cuda:0', grad_fn=<AddmmBackward0>)


 32%|███▏      | 312/965 [09:51<21:23,  1.97s/it]

tensor([[-0.0058, -0.0075, -0.0141,  ...,  0.0006, -0.0053, -0.0120],
        [ 0.0061,  0.0170, -0.0027,  ..., -0.0054, -0.0311,  0.0106],
        [ 0.0068,  0.0055, -0.0015,  ...,  0.0120, -0.0102, -0.0002],
        ...,
        [-0.0061,  0.0081,  0.0046,  ..., -0.0005, -0.0145,  0.0134],
        [ 0.0036, -0.0047,  0.0168,  ..., -0.0116, -0.0138, -0.0041],
        [-0.0078,  0.0096, -0.0025,  ..., -0.0024, -0.0061,  0.0077]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 32%|███▏      | 313/965 [09:52<20:29,  1.89s/it]

tensor([[-1.4163e-02,  3.1830e-03,  7.7315e-03,  ...,  8.8212e-03,
         -1.4636e-02, -9.9212e-03],
        [ 2.4236e-03,  6.8269e-03,  1.1707e-02,  ...,  4.3041e-03,
         -1.1635e-02, -6.7888e-04],
        [-3.0856e-03, -9.2036e-03, -1.1585e-02,  ...,  9.9925e-03,
         -5.2715e-03, -3.2513e-03],
        ...,
        [ 5.3074e-04,  2.8153e-03, -4.9467e-03,  ..., -9.4466e-05,
         -8.3338e-03,  4.2550e-03],
        [ 5.6590e-03,  2.2259e-03, -2.6602e-03,  ..., -6.5053e-03,
          1.1431e-02,  1.6052e-03],
        [-1.2240e-02, -9.4126e-04, -5.0187e-03,  ..., -5.1824e-03,
         -1.0445e-02,  9.6369e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 33%|███▎      | 314/965 [09:54<19:37,  1.81s/it]

tensor([[-0.0062,  0.0153, -0.0043,  ...,  0.0001, -0.0043,  0.0120],
        [-0.0013,  0.0040, -0.0117,  ...,  0.0073,  0.0005,  0.0087],
        [ 0.0006,  0.0028,  0.0116,  ..., -0.0004,  0.0065, -0.0060],
        ...,
        [ 0.0011, -0.0035, -0.0014,  ..., -0.0003,  0.0011, -0.0004],
        [-0.0013, -0.0110,  0.0109,  ..., -0.0034,  0.0007, -0.0110],
        [-0.0017,  0.0198, -0.0029,  ..., -0.0094, -0.0045,  0.0120]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 33%|███▎      | 315/965 [09:56<19:03,  1.76s/it]

tensor([[-0.0016, -0.0016,  0.0071,  ...,  0.0022, -0.0067, -0.0019],
        [-0.0037, -0.0076, -0.0036,  ...,  0.0074, -0.0063, -0.0053],
        [-0.0143,  0.0083,  0.0005,  ..., -0.0058, -0.0170,  0.0144],
        ...,
        [-0.0029, -0.0072,  0.0071,  ...,  0.0094, -0.0049, -0.0133],
        [-0.0026, -0.0069, -0.0010,  ..., -0.0044, -0.0135, -0.0026],
        [-0.0043,  0.0023,  0.0028,  ...,  0.0040, -0.0049,  0.0042]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 33%|███▎      | 316/965 [09:58<19:14,  1.78s/it]

tensor([[-0.0006, -0.0027,  0.0025,  ...,  0.0056, -0.0015,  0.0046],
        [ 0.0012, -0.0055,  0.0021,  ...,  0.0042,  0.0038,  0.0006],
        [-0.0169,  0.0246,  0.0018,  ..., -0.0040, -0.0145,  0.0022],
        ...,
        [ 0.0038, -0.0017, -0.0063,  ...,  0.0081, -0.0096, -0.0080],
        [-0.0004,  0.0087, -0.0041,  ..., -0.0006,  0.0002,  0.0031],
        [ 0.0050, -0.0002,  0.0018,  ..., -0.0035, -0.0016, -0.0010]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 33%|███▎      | 317/965 [09:59<18:36,  1.72s/it]

tensor([[-1.1780e-04,  1.9665e-03,  3.2363e-03,  ..., -2.3337e-03,
          2.2858e-03,  4.0733e-03],
        [-3.1498e-03,  9.7273e-04, -1.7122e-03,  ...,  1.0805e-02,
         -5.0444e-03, -1.9461e-03],
        [-1.4035e-04,  4.5763e-03,  9.8099e-04,  ..., -5.9613e-04,
         -2.8256e-03, -3.9514e-03],
        ...,
        [ 2.4425e-03,  1.0769e-02,  2.5411e-03,  ..., -5.6310e-03,
          1.3695e-04,  7.5794e-03],
        [ 6.9905e-05,  3.6546e-03, -5.4676e-03,  ...,  3.7439e-07,
         -2.8538e-04,  4.3005e-03],
        [ 3.1508e-04,  1.9398e-03, -4.5278e-03,  ..., -6.4929e-03,
          7.1518e-03, -2.7150e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 33%|███▎      | 318/965 [10:01<19:35,  1.82s/it]

tensor([[ 0.0054, -0.0027,  0.0056,  ...,  0.0049, -0.0105, -0.0022],
        [-0.0024,  0.0027,  0.0057,  ..., -0.0013, -0.0112,  0.0014],
        [ 0.0043,  0.0003,  0.0012,  ...,  0.0025, -0.0060,  0.0018],
        ...,
        [-0.0070,  0.0042, -0.0010,  ..., -0.0064, -0.0133,  0.0068],
        [ 0.0012,  0.0033,  0.0026,  ...,  0.0048, -0.0108, -0.0032],
        [ 0.0033,  0.0126, -0.0065,  ..., -0.0064, -0.0116,  0.0044]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 33%|███▎      | 319/965 [10:03<20:24,  1.90s/it]

tensor([[-0.0062, -0.0038, -0.0082,  ...,  0.0016, -0.0035, -0.0012],
        [ 0.0002, -0.0072,  0.0118,  ...,  0.0020, -0.0036, -0.0061],
        [-0.0053,  0.0107, -0.0053,  ..., -0.0041, -0.0145,  0.0125],
        ...,
        [-0.0142,  0.0131, -0.0012,  ...,  0.0009, -0.0085,  0.0115],
        [-0.0104,  0.0116, -0.0145,  ..., -0.0053, -0.0142,  0.0185],
        [ 0.0046,  0.0098,  0.0030,  ..., -0.0098, -0.0071,  0.0051]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 33%|███▎      | 320/965 [10:05<19:14,  1.79s/it]

tensor([[-0.0060,  0.0152, -0.0017,  ..., -0.0062, -0.0021,  0.0133],
        [-0.0020, -0.0055,  0.0094,  ..., -0.0117, -0.0092, -0.0011],
        [-0.0074,  0.0010,  0.0067,  ...,  0.0004, -0.0087, -0.0105],
        ...,
        [-0.0031,  0.0168, -0.0089,  ..., -0.0178,  0.0070,  0.0106],
        [-0.0003,  0.0066, -0.0027,  ..., -0.0038, -0.0193, -0.0039],
        [-0.0096,  0.0169,  0.0037,  ..., -0.0039, -0.0225,  0.0038]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 33%|███▎      | 321/965 [10:07<19:15,  1.79s/it]

tensor([[ 0.0037, -0.0039, -0.0056,  ...,  0.0018, -0.0133, -0.0041],
        [-0.0135,  0.0154, -0.0174,  ..., -0.0071, -0.0103,  0.0197],
        [-0.0069, -0.0090, -0.0047,  ...,  0.0111, -0.0015,  0.0017],
        ...,
        [ 0.0023,  0.0037,  0.0065,  ..., -0.0059,  0.0010,  0.0077],
        [ 0.0010,  0.0026, -0.0033,  ..., -0.0016, -0.0125,  0.0030],
        [ 0.0019,  0.0052, -0.0065,  ..., -0.0012, -0.0063,  0.0045]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 33%|███▎      | 322/965 [10:08<18:40,  1.74s/it]

tensor([[ 0.0035, -0.0072,  0.0059,  ..., -0.0043, -0.0095, -0.0020],
        [-0.0050, -0.0125,  0.0099,  ...,  0.0113, -0.0044, -0.0099],
        [-0.0147, -0.0168,  0.0058,  ..., -0.0034, -0.0158, -0.0175],
        ...,
        [ 0.0012,  0.0094,  0.0010,  ..., -0.0114,  0.0040,  0.0087],
        [-0.0008, -0.0047,  0.0067,  ..., -0.0005, -0.0087,  0.0010],
        [ 0.0032,  0.0032, -0.0073,  ..., -0.0103, -0.0062,  0.0053]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 33%|███▎      | 323/965 [10:10<20:12,  1.89s/it]

tensor([[-0.0009, -0.0138, -0.0147,  ...,  0.0098,  0.0047, -0.0030],
        [ 0.0036, -0.0093, -0.0058,  ...,  0.0166, -0.0115, -0.0117],
        [ 0.0083, -0.0033, -0.0094,  ...,  0.0074,  0.0019,  0.0059],
        ...,
        [-0.0031,  0.0015, -0.0104,  ..., -0.0035, -0.0033,  0.0095],
        [ 0.0057,  0.0192, -0.0093,  ..., -0.0061,  0.0015,  0.0118],
        [-0.0078,  0.0020,  0.0036,  ..., -0.0009, -0.0040, -0.0064]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 34%|███▎      | 324/965 [10:12<20:06,  1.88s/it]

tensor([[ 0.0030,  0.0032,  0.0016,  ..., -0.0137, -0.0057,  0.0033],
        [-0.0096, -0.0141, -0.0121,  ...,  0.0015, -0.0197, -0.0058],
        [ 0.0009, -0.0009, -0.0023,  ...,  0.0060, -0.0057, -0.0076],
        ...,
        [ 0.0004, -0.0042, -0.0088,  ..., -0.0071, -0.0183,  0.0087],
        [-0.0014,  0.0014, -0.0098,  ..., -0.0022, -0.0115,  0.0025],
        [ 0.0068,  0.0162, -0.0053,  ..., -0.0061,  0.0023,  0.0091]],
       device='cuda:0', grad_fn=<AddmmBackward0>)


 34%|███▎      | 325/965 [10:14<20:24,  1.91s/it]

tensor([[ 1.4242e-05,  9.8087e-03,  6.1818e-04,  ..., -1.7927e-03,
         -6.1715e-03,  4.5619e-03],
        [-2.8941e-03,  1.6620e-03,  7.9679e-03,  ...,  2.9539e-03,
         -1.5807e-02,  9.4477e-04],
        [ 2.9147e-03,  5.2847e-04, -5.8097e-03,  ...,  7.9174e-04,
         -6.3703e-03,  5.7978e-03],
        ...,
        [-1.4370e-03,  1.2474e-02,  5.3783e-03,  ..., -1.0676e-02,
         -5.2105e-03,  4.9269e-03],
        [-7.5708e-03,  7.0105e-03, -4.5037e-03,  ..., -7.5956e-03,
         -1.1309e-02,  3.5172e-03],
        [-1.7659e-03, -3.7510e-03, -1.7791e-03,  ...,  5.5896e-03,
         -7.5896e-03, -1.3715e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 34%|███▍      | 326/965 [10:16<20:30,  1.93s/it]

tensor([[-1.1753e-02,  4.3964e-03,  3.7866e-03,  ...,  1.2612e-02,
         -1.8956e-02,  1.1124e-02],
        [-6.9810e-03,  1.0927e-02, -1.8885e-03,  ..., -8.3905e-03,
         -1.2991e-02,  9.4365e-03],
        [-1.7881e-07,  2.6573e-02, -1.1231e-02,  ..., -2.5710e-02,
         -1.3888e-02,  1.5882e-02],
        ...,
        [-5.6013e-03,  1.0157e-02,  1.0789e-02,  ..., -2.0180e-02,
         -6.5469e-03,  8.2262e-03],
        [-2.4656e-03,  1.3417e-02,  2.7459e-03,  ..., -9.4254e-04,
         -1.2591e-03,  7.4222e-03],
        [-4.4705e-03,  4.5868e-03,  3.3178e-03,  ..., -5.3383e-03,
          7.5683e-05, -4.5320e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 34%|███▍      | 327/965 [10:18<19:27,  1.83s/it]

tensor([[-2.0344e-02, -8.3226e-03, -3.2223e-03,  ..., -7.1712e-03,
         -7.3127e-03,  3.2354e-03],
        [ 4.9225e-03, -7.0192e-03, -4.2753e-03,  ...,  1.7471e-03,
         -6.5385e-03, -1.0228e-02],
        [ 3.0407e-03,  1.1856e-03,  5.9602e-03,  ..., -1.5773e-03,
         -6.4218e-03, -8.1700e-04],
        ...,
        [ 6.4235e-04,  1.0368e-02, -6.1669e-04,  ..., -5.4458e-03,
         -3.1524e-03,  6.7588e-03],
        [-1.5460e-02,  8.9118e-03,  5.9598e-04,  ...,  6.3831e-05,
         -1.9226e-02,  1.1731e-02],
        [-3.9331e-03,  2.4422e-03, -4.2927e-03,  ..., -3.9557e-03,
          2.1856e-03, -3.4254e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 34%|███▍      | 328/965 [10:20<20:15,  1.91s/it]

tensor([[-9.1923e-03, -5.3860e-03, -5.1086e-04,  ..., -1.4663e-02,
         -9.8473e-03, -4.5229e-03],
        [-2.0145e-03, -1.8967e-03,  4.2104e-03,  ...,  3.8008e-03,
         -3.4464e-03,  5.0436e-03],
        [-1.4956e-02,  1.4554e-03, -3.7369e-03,  ...,  1.6953e-02,
         -4.8899e-03, -6.1975e-03],
        ...,
        [-3.3464e-03,  5.0933e-03, -3.8880e-03,  ..., -5.2161e-03,
         -5.8530e-03,  9.3206e-03],
        [-1.5511e-02,  1.0186e-02, -2.8088e-05,  ..., -1.1971e-02,
         -6.7979e-03,  1.1683e-02],
        [ 2.0151e-03,  1.1042e-02,  6.8493e-03,  ..., -2.1827e-03,
         -4.2502e-03,  1.0438e-03]], device='cuda:0', grad_fn=<AddmmBackward0>)


 34%|███▍      | 328/965 [10:21<20:06,  1.89s/it]


KeyboardInterrupt: 