In [1]:
import matplotlib.pylab as plt
%matplotlib inline
import numpy as np
import torch
import tqdm
from sklearn.model_selection import StratifiedShuffleSplit
import json

In [4]:
#!rm *txt
#!rm *jsonl


In [2]:
EPOCH_NUM = 1000
TEST_NUM = 2000
TRAIN_NUM = 16 # training dataset size
POLY_NUM = 3
D = 4 # number of domains
HEAD_NUM = 8 #D * 8
RUN_NUM = 20
GRAD_CLIP = 1500.0 # for unstable gradients pruning

def rmse(y, y_hat):
    return torch.sqrt(((y-y_hat)**2).mean())
crit2 = rmse # eval criterion

In [3]:
def make_angles():
    if D == 2:
        return [0, 180] # hard-coding angles for 2D cases (to be opposite)
    step = 360/D
    return [i*step for i in range(D)]
make_angles()

[0.0, 90.0, 180.0, 270.0]

In [4]:
# data generation
from scipy.ndimage import rotate
def gen(N1, N2, sigma=.1, seed=42, angle = 0.0):
    """
    N1: train size
    N2: test size
    """
    angle = np.pi/2 * angle/90
    N = N1+N2
    rs = np.random.RandomState(seed)
    x = rs.randn(N, 2) 
    y = x[:,0]*0.25 + x[:,1]*0.75 # y = X1 * 0.25 + x2 * 0.75, other  X components are irrelevant 
    
    if angle: # making rotation if angle != 0
        M = np.array([[np.cos(angle), -np.sin(angle)], [np.sin(angle), np.cos(angle)]])
        x[:,:2] = (M@x[:,:2].T).T
    # shuffling
    elems = list(range(len(x)))
    rs.shuffle(elems)
    trainval, test = elems[:N1], elems[N1:]
    xtrainval, ytrainval = x[trainval], y[trainval]
    xtest,ytest = x[test], y[test]
    elems = list(range(len(xtrainval)))
    rs.shuffle(elems)
    # actually this code is a bit messy, it was inherited from classification task
    train, val = elems[:N1//2], elems[N1//2:]
    
    xtrain, ytrain = xtrainval[train], ytrainval[train]
    xval, yval = xtrainval[val], ytrainval[val]
    return (xtrain, ytrain), (xval, yval), (xtest, ytest)
xy, _,_ = gen(4, 2, angle=0)


In [5]:
def make_inference(x, rots, models, gammas, use_softmax=True, only_angle=False, c_head = False, t=1.0):
    
    if use_softmax:
        g0 = torch.distributions.RelaxedOneHotCategorical(t, logits=gammas[0]).rsample((x.shape[0],))
        x = torch.sum(torch.cat([(r(x)*g0[:, i].view(-1, 1)).unsqueeze(2) for i, r in enumerate(rots)], 2), 2)
    
    else:
        if len(gammas[0].shape) == 1:
            g0 = (gammas[0] == gammas[0].max()) * 1
            x = torch.sum(torch.cat([(r(x)*g0[i]).unsqueeze(2) for i, r in enumerate(rots)], 2), 2)
        else: # sample from rl
            g0 = gammas[0]
            x = torch.sum(torch.cat([(r(x)*g0[:, i].view(-1, 1)).unsqueeze(2) for i, r in enumerate(rots)], 2), 2)
    
    
    if only_angle:
        return x 
    x = poly_generate(x)
    if c_head:
        g1 = torch.zeros(gammas[1].shape)
        g1[0] += 0
        
    if use_softmax:
        g1 = torch.distributions.RelaxedOneHotCategorical(t, logits=gammas[1]).rsample((x.shape[0],))
        x = torch.sum(torch.cat([(m(x)*g1[:, i].view(-1, 1)).unsqueeze(2) for i, m in enumerate(models)], 2), 2)
       
    else:
        if len(gammas[0].shape) == 1:
            g1 = (gammas[1] == gammas[1].max()) * 1
            x = torch.sum(torch.cat([(m(x)*g1[i]).unsqueeze(2) for i, m in enumerate(models)], 2), 2)
        else: # sample from rl
            g1 = gammas[1]
            x = torch.sum(torch.cat([(m(x)*g1[:, i].view(-1, 1)).unsqueeze(2) for i, m in enumerate(models)], 2), 2)
    
    return x



In [21]:
xy[0][0]

array([-0.46341769, -0.46572975])

In [6]:
def poly_generate(x):
    result = [x]
    for poly in range(POLY_NUM):
        deg = poly+2 
        new_x = x ** deg#/(deg)
        result.append(new_x)
    result =  torch.hstack(result)
    return result
poly_generate(torch.tensor(xy[0])).shape

torch.Size([2, 8])

In [7]:
# one-model for strictly one domain, no NAS
accs = []
for k in range(RUN_NUM):
    torch.manual_seed(42+k)
    (x_train, y_train), (x_val, y_val), (x_test, y_test) = gen(TRAIN_NUM, TEST_NUM, seed=42+k)
    x_train = np.concatenate([x_train, x_val]) # we don't use train/validation step
    y_train = np.concatenate([y_train, y_val])
    
    lin_model = torch.nn.Linear(2*(POLY_NUM+1),1)
    model = lambda x: lin_model(poly_generate(x))
    opt = torch.optim.SGD(lin_model.parameters(), lr=1e-3)
    crit = torch.nn.MSELoss()
    tq = tqdm.tqdm_notebook(range(EPOCH_NUM))
    
    for e in tq:
        opt.zero_grad()
        out = model(torch.tensor(x_train).float())[:,0]
        loss = crit(out, torch.tensor(y_train).float())
        loss.backward()
        torch.nn.utils.clip_grad_value_(lin_model.parameters(), GRAD_CLIP)
        opt.step()
        tq.set_description(str(loss.item()))
    acc = crit2(model(torch.tensor(x_test).float())[:,0], torch.tensor(y_test).float()).item()
    accs.append(acc)
    
print (accs, np.mean(accs)) 

with open('one_model.txt', 'a') as out:
    out.write(f'D={D}, HEAD_NUM={HEAD_NUM}, RUN_NUM={RUN_NUM}, CRIT={np.mean(accs)}+-{np.std(accs)}.   MIN={np.min(accs)}, MAX={np.max(accs)}\n')
with open('one_model.jsonl', 'a') as out:
    out.write(json.dumps({'D': D, 'HEAD_NUM':HEAD_NUM, 'RUN_NUM': RUN_NUM, 'accs': accs})+'\n')
    

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  tq = tqdm.tqdm_notebook(range(EPOCH_NUM))


  0%|          | 0/1000 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [35]:
# shared-model:
# one-model for all the domains, without any rotation, without NAS
accs = []
for k in range(RUN_NUM):
    torch.manual_seed(42+k)
    x_train, y_train, x_test, y_test = [],[],[],[]
    for angle in make_angles():
        (_x_train, _y_train), (x_val, y_val), (_x_test, _y_test) = gen(TRAIN_NUM,
                                                                       TEST_NUM, seed=42+k+int(angle),
                                                                       angle=angle)
        
            
        
        _x_train = np.concatenate([_x_train, x_val]) # we don't use train/validation step
        _y_train = np.concatenate([_y_train, y_val])
        x_train.extend(_x_train)
        y_train.extend(_y_train)
        x_test.extend(_x_test)
        y_test.extend(_y_test)
    
    lin_model = torch.nn.Linear(2*(1+POLY_NUM),1) 
    opt = torch.optim.SGD(lin_model.parameters(), lr=1e-3)
    model = lambda x: lin_model(poly_generate(x))
    crit = torch.nn.MSELoss()
    tq = tqdm.tqdm_notebook(range(EPOCH_NUM  * D)) # in darts we make D iterations per epoch. So here we multiply EPOCH_NUM  * D for compensation
    
    for e in tq:
        opt.zero_grad()
        out = model(torch.tensor(x_train).float())[:,0]
        loss = crit(out, torch.tensor(y_train).float())
        loss.backward()
        torch.nn.utils.clip_grad_value_(lin_model.parameters(), GRAD_CLIP)
        opt.step()
        tq.set_description(str(loss.item()))
    acc = crit2(model(torch.tensor(x_test).float())[:,0], torch.tensor(y_test).float()).item()
    accs.append(acc)
    print (acc)
print (accs, np.mean(accs)) 

with open('shared_model.txt', 'a') as out:
    out.write(f'D={D}, HEAD_NUM={HEAD_NUM}, RUN_NUM={RUN_NUM},  CRIT={np.mean(accs)}+-{np.std(accs)}. MIN={np.min(accs)}, MAX={np.max(accs)}\n')
with open('shared_model.jsonl', 'a') as out:
    out.write(json.dumps({'D': D, 'HEAD_NUM':HEAD_NUM, 'RUN_NUM': RUN_NUM, 'accs': accs})+'\n')

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  tq = tqdm.tqdm_notebook(range(EPOCH_NUM  * D)) # in darts we make D iterations per epoch. So here we multiply EPOCH_NUM  * D for compensation


  0%|          | 0/4000 [00:00<?, ?it/s]

1.3885122537612915


  0%|          | 0/4000 [00:00<?, ?it/s]

1.4878994226455688


  0%|          | 0/4000 [00:00<?, ?it/s]

0.8789204359054565


  0%|          | 0/4000 [00:00<?, ?it/s]

1.004591941833496


  0%|          | 0/4000 [00:00<?, ?it/s]

0.8532770276069641
[1.3885122537612915, 1.4878994226455688, 0.8789204359054565, 1.004591941833496, 0.8532770276069641] 1.1226402163505553


In [None]:
lin_model.weight

In [9]:
# perfect-case:
# one model for all the data, but the data is perfectly rotated
accs = []
for k in range(RUN_NUM):
    torch.manual_seed(42+k)
    x_train, y_train, x_test, y_test = [],[],[],[]
    for angle in make_angles():
        (_x_train, _y_train), (x_val, y_val), (_x_test, _y_test) = gen(TRAIN_NUM, 2000, seed=42+k+int(angle),
                                                                       angle=0.0)
        
        _x_train = np.concatenate([_x_train, x_val]) # we don't use train/validation step
        _y_train = np.concatenate([_y_train, y_val])
        x_train.extend(_x_train)
        y_train.extend(_y_train)
        x_test.extend(_x_test)
        y_test.extend(_y_test)
    
    
    lin_model = torch.nn.Linear(2*(POLY_NUM+1),1) 
    opt = torch.optim.SGD(lin_model.parameters(), lr=1e-3)
    model = lambda x: lin_model(poly_generate(x))
    crit = torch.nn.MSELoss()
    tq = tqdm.tqdm_notebook(range(EPOCH_NUM * D))
    
    for e in tq:
        opt.zero_grad()
        out = model(torch.tensor(x_train).float())[:,0]
        loss = crit(out, torch.tensor(y_train).float())
        loss.backward()
        torch.nn.utils.clip_grad_value_(lin_model.parameters(), GRAD_CLIP)
        opt.step()
        tq.set_description(str(loss.item()))
    acc = crit2(model(torch.tensor(x_test).float())[:,0], torch.tensor(y_test).float()).item()
    accs.append(acc)
    print (acc)
print(np.mean(accs)) 
with open('perfect_model.txt', 'a') as out:
    out.write(f'D={D}, HEAD_NUM={HEAD_NUM}, RUN_NUM={RUN_NUM}, CRIT={np.mean(accs)}+-{np.std(accs)}.   MIN={np.min(accs)}, MAX={np.max(accs)}\n')
with open('perfect_model.jsonl', 'a') as out:
    out.write(json.dumps({'D': D, 'HEAD_NUM':HEAD_NUM, 'RUN_NUM': RUN_NUM, 'accs': accs})+'\n')

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  tq = tqdm.tqdm_notebook(range(EPOCH_NUM * D))


  0%|          | 0/4000 [00:00<?, ?it/s]

  out = model(torch.tensor(x_train).float())[:,0]


0.0724780336022377


  0%|          | 0/4000 [00:00<?, ?it/s]

0.35640406608581543


  0%|          | 0/4000 [00:00<?, ?it/s]

0.11975093930959702


  0%|          | 0/4000 [00:00<?, ?it/s]

0.16009056568145752


  0%|          | 0/4000 [00:00<?, ?it/s]

0.0829872339963913


  0%|          | 0/4000 [00:00<?, ?it/s]

0.14346402883529663


  0%|          | 0/4000 [00:00<?, ?it/s]

0.16480834782123566


  0%|          | 0/4000 [00:00<?, ?it/s]

0.2231200933456421


  0%|          | 0/4000 [00:00<?, ?it/s]

0.576605498790741


  0%|          | 0/4000 [00:00<?, ?it/s]

0.17726732790470123


  0%|          | 0/4000 [00:00<?, ?it/s]

0.2745930552482605


  0%|          | 0/4000 [00:00<?, ?it/s]

0.17707431316375732


  0%|          | 0/4000 [00:00<?, ?it/s]

0.15575726330280304


  0%|          | 0/4000 [00:00<?, ?it/s]

0.17612233757972717


  0%|          | 0/4000 [00:00<?, ?it/s]

0.6122799515724182


  0%|          | 0/4000 [00:00<?, ?it/s]

0.10054340958595276


  0%|          | 0/4000 [00:00<?, ?it/s]

0.34368160367012024


  0%|          | 0/4000 [00:00<?, ?it/s]

0.1256198137998581


  0%|          | 0/4000 [00:00<?, ?it/s]

0.3906998634338379


  0%|          | 0/4000 [00:00<?, ?it/s]

0.5258259773254395
0.2479586862027645


In [8]:
# rotation module for torch. Angles are in degrees
class Rot(torch.nn.Module):
    def __init__(self, angle = None):
        super().__init__()
        if angle is None:
            angle = torch.randn(1)*90
        else:
            angle = torch.tensor(angle).float()
        self.angle = torch.nn.Parameter(angle)
    
    def forward(self, x):
        angle = self.angle * np.pi/2 / 90
        c = torch.cos(angle)
        s = torch.sin(angle)
        M = torch.vstack([torch.hstack([c, -s]), torch.hstack([s, c])])
        result = x * 1.0
        result[:,:2] = (M@x[:,:2].T).T
        return result


In [9]:
nn = torch.nn
# simple triplet loss: for each objects takes a random triplet (with triplet conditions)
class MdTripletLoss(nn.Module):
    def __init__(self, m =0.0, p=2, subset_size = 9999, sample_num = 1):
        super(MdTripletLoss, self).__init__()
        self.triplet_loss = nn.TripletMarginLoss(margin=m, p=p)
        self.rs = np.random.RandomState(21)
        self.subset_size = subset_size
        self.sample_num = sample_num
    
    def forward(self, h1: torch.Tensor, h2: torch.Tensor, labels1: torch.LongTensor, labels2: torch.LongTensor):
        """
        :param: h1: hidden representations of size (bs, *), anchors
        :param: h1: hidden representations of size (bs, *), positives and negatives candidates
        """
        FEATURE_TRIPLET_DIM = 2
        h1 = h1[:, :FEATURE_TRIPLET_DIM]
        h2 = h2[:, :FEATURE_TRIPLET_DIM]
        
        bs = h1.size(0)
        if bs > self.subset_size:
            elems = list(range(bs))
            self.rs.shuffle(elems)
            h1 = h1[elems[:self.subset_size]]
            labels1 = labels1[elems[:self.subset_size]]
            self.rs.shuffle(elems)
            h2 = h2[elems[:self.subset_size]]
            labels2 = labels2[elems[:self.subset_size]]
            bs = self.subset_size
            
        
        h1 = h1.view(bs, -1)
        h2 = h2.view(bs, -1)
        
        loss = 0.0
        anch =  []
        pos = []
        neg = []
        ids1 = list(range(len(h1)))
        
        ids2 = list(range(len(h2)))
        rs = self.rs
        for _ in range(self.sample_num):
           
            #ids2 = ids2[::-1]
            rs.shuffle(ids1)
            rs.shuffle(ids2)
            h1 = h1[ids1]
            h2 = h2[ids2]
            labels1 = labels1[ids1]
            labels2 = labels2[ids2]

            for i in range(len(h1)):
                found = False 
                for j in range(len(h2)):

                    if found:
                        break
                    for k in range(j+1, len(h2)):
                        if   abs(labels1[i] - labels2[j]) > abs(labels1[i] - labels2[k]): #\
                        #and torch.linalg.norm(h1[i] - h2[j]) < torch.linalg.norm(h1[i] - h2[k]):
                            anch.append(h1[i])
                            pos.append(h2[k])
                            neg.append(h2[j])

                            found = True
                            break
        
        if len(anch) >0:
            a = torch.vstack(anch)
            p = torch.vstack(pos)
            n = torch.vstack(neg)
            loss = self.triplet_loss(a, p, n)
            return loss
        else:
            return 0.0

In [32]:
#model = ArchModel()
#len(list((model.models[0].parameters()))[0][0])


9

In [10]:
# our architecture
# consists of HEAD_NUM rotations and HEAD_NUN linear models

class ArchModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        
        self.gammas = [torch.nn.Parameter(torch.rand(2, HEAD_NUM)) for _ in range(D)]
        self.rots = torch.nn.ModuleList([Rot() for _ in range(HEAD_NUM)])
        self.models = torch.nn.ModuleList([torch.nn.Linear(2*(POLY_NUM+1), 1) for _ in range(HEAD_NUM)])
    def forward(self, x, d, gammas = None, only_angle = False, use_softmax = True, t=1.0):
        if gammas is None:
            gammas = self.gammas[d]
        return make_inference(x, self.rots, self.models, self.gammas[d],
                              use_softmax=use_softmax, only_angle=only_angle, t=t)
   

def multi_js_divergence(alphas) -> torch.tensor:
    js = 0.0
    for j in range(2): # rotation and head
        alpha_full = torch.stack([a[j] for a in alphas], dim=0)
        alpha_full = torch.softmax(alpha_full, dim=1)
        
        centroid = torch.mean(alpha_full, 0)
        
        d_centroid = torch.distributions.Categorical(probs=centroid)
        
        for alpha in alphas:
            d = torch.distributions.Categorical(probs=torch.softmax(alpha[j], 0))
            js += 1.0/len(alphas) * \
                torch.distributions.kl.kl_divergence(d, d_centroid)
    return js

# param number calculation   
def calc_params(gammas):
    linear_model_param_num = 2* ( POLY_NUM + 1) + 1
    cnt = 0
    rots = set()
    for i in range(D):
        rots.add(gammas[i][0].argmax().item())
    
    linears = set()
    for i in range(D):
        linears.add(gammas[i][1].argmax().item())
    return len(rots) + len(linears)*(linear_model_param_num)

#gammas = [torch.randn(2, 5) for _ in range(3)]
#multi_js_divergence(gammas)

In [11]:
def dartslike(fname, triplet_coef_search = 0.0, triplet_coef_train = 0.0, js_coef = 0.0, unroll: bool = True,
              lr=1e-3, lr0=0.1, lr2=.01):
    h = []
    accs = []
    param_num = []

    
    tl = MdTripletLoss(p=2.0, subset_size=9999, sample_num=1) # oleg
    
    for k in range(RUN_NUM):
        torch.manual_seed(42+k)
        x_train, y_train, x_val, y_val, x_test, y_test = [],[],[],[],[],[]
        for angle in make_angles():
            (_x_train, _y_train), (_x_val, _y_val), (_x_test, _y_test) = gen(TRAIN_NUM, TEST_NUM, seed=42+k+int(angle),
                                                                             angle=angle)
            x_train.append(_x_train)
            y_train.append(_y_train)
            x_val.append(_x_val)
            y_val.append(_y_val)
            x_test.append(_x_test)
            y_test.append(_y_test)
        model = ArchModel()
        
    
        rs = np.random.RandomState(42+k)
        
        opt = torch.optim.SGD([{'params': model.rots.parameters(), 'lr': lr0},
                              {'params': model.models.parameters(), 'lr': lr}],
                              lr=lr)
                              
                              
        opt2 = torch.optim.SGD(model.gammas, lr=lr2)

        crit = torch.nn.MSELoss()
        tq = tqdm.tqdm_notebook(range(EPOCH_NUM )) # taking half of epoch num for NAS
        losses = []
        losses2 = []
        for e in tq:
            t = max(0.2, 1.0 - e/EPOCH_NUM)
            
            for d in range(D):
            
                opt2.zero_grad()
                d2 = rs.choice([i for i in range(D) if i != d])
            
                # unrolling step: done manually without approximation
                if unroll:
                    opt.zero_grad()
                    x = torch.tensor(x_train[d]).float()
                    y = torch.tensor(y_train[d]).float()

                    x2 = torch.tensor(x_train[d2]).float()
                    y2 = torch.tensor(y_train[d2]).float()

                    out_angle1 = model(x, d, only_angle=True, t=t)
                    out_angle2 = model(x2, d2, only_angle=True, t=t)

                    out = model(x, d, t=t)[:,0]
                    loss = crit(out, y)
                    if triplet_coef_search:
                        tloss = tl(out_angle1, out_angle2, y, y2) * triplet_coef_search
                        loss += tloss
                    grads = torch.autograd.grad(loss, model.parameters())
                    grads = [torch.clamp(g, -GRAD_CLIP, GRAD_CLIP) for g in grads]
                        
                    with torch.no_grad():
                        for p,g in zip(model.parameters(), grads):
                            p = p + lr * g
                            
                x = torch.tensor(x_val[d]).float()
                y = torch.tensor(y_val[d]).float()

                x2 = torch.tensor(x_val[d2]).float()
                y2 = torch.tensor(y_val[d2]).float()

                out_angle1 = model(x, d, only_angle=True, t=t)
                out_angle2 = model(x2, d2, only_angle=True, t=t)
                

                out = model(x, d, t=t)[:,0]
                loss2 = crit(out, y) 
                if triplet_coef_search:
                        tloss = tl(out_angle1, out_angle2, y, y2) * triplet_coef_search
                        loss2 += tloss
                if js_coef:
                    loss2 += multi_js_divergence(model.gammas).mean() * js_coef
                loss2.backward()
                torch.nn.utils.clip_grad_value_(model.gammas, GRAD_CLIP)
                opt2.step()
                
                if unroll:
                    with torch.no_grad():
                        for i,g in zip(model.parameters(), grads):
                            p = p - lr * g
                            
                            
                opt.zero_grad()
                x = torch.tensor(x_train[d]).float()
                y = torch.tensor(y_train[d]).float()

                x2 = torch.tensor(x_train[d2]).float()
                y2 = torch.tensor(y_train[d2]).float()

                out_angle1 = model(x, d, only_angle=True, t=t)
                out_angle2 = model(x2, d2, only_angle=True, t=t)
                
                out = model(x, d)[:,0]
                loss = crit(out, y)
                if triplet_coef_search:
                    tloss = tl(out_angle1, out_angle2, y, y2) * triplet_coef_search
                    loss += tloss
                loss.backward()
                torch.nn.utils.clip_grad_value_(model.parameters(), GRAD_CLIP)
                opt.step()
                losses.append(loss.item())
                losses2.append(loss2.item())
            
            tq.set_description(str(np.mean(losses))+';'+str(np.mean(losses2)))

        tq = tqdm.tqdm_notebook(range(EPOCH_NUM))
        
        # search step. Now we concatenate train and validation
        x_train = np.concatenate([x_train, x_val], axis=1)
        y_train =  np.concatenate([y_train, y_val], axis=1)
        
        
        for g in model.gammas:
            print (g.argmax(1))
        
        for g in model.gammas:
            print (g)
        
        opt = torch.optim.SGD([{'params': model.rots.parameters(), 'lr': lr0},
                              {'params': model.models.parameters(), 'lr': lr}],
                              lr=lr)
        
        for e in tq:
            
            for d in range(D):
                loss = 0.0
                opt.zero_grad()

                d2 = rs.choice([i for i in range(D) if i != d])

                x = torch.tensor(x_train[d]).float()
                y = torch.tensor(y_train[d]).float()
                out = model(x, d, use_softmax=False)[:,0]

               
                loss = crit(out, y) 
                if triplet_coef_train:
                    x2 = torch.tensor(x_train[d2]).float()
                    y2 = torch.tensor(y_train[d2]).float()

                    out_angle1 = model(x, d, only_angle=True, use_softmax=False) # note, using one-hot here
                    out_angle2 = model(x2, d2, only_angle=True, use_softmax=False)

                    tloss = tl(out_angle1, out_angle2, y, y2)* triplet_coef_train
                    loss += tloss
                
                
                loss.backward()
                torch.nn.utils.clip_grad_value_(model.parameters(), GRAD_CLIP)
                opt.step()
            angles= [str(r.angle.item()) for r in model.rots]
            #print (angles)
            tq.set_description(' '.join(angles))
        total = 0
        for d in range(D):

            out = model(torch.tensor(x_test[d]).float(), d, use_softmax=False)
            total += crit2(out[:,0], torch.tensor(y_test[d]).float()).item()
            print ('acc ', d, crit2(out[:,0], torch.tensor(y_test[d]).float()).item())
        acc = total/D
        accs.append(acc)
        param_num.append(calc_params(model.gammas))
        print (acc, param_num[-1])
        
    with open(fname+'.txt', 'a') as out:
        out.write(f'D={D}, HEAD_NUM={HEAD_NUM}, RUN_NUM={RUN_NUM}, CRIT={np.mean(accs)}+-{np.std(accs)}.  MIN={np.min(accs)}, MAX={np.max(accs)}')
        out.write(f' PARAMS={np.mean(param_num)}+-{np.std(param_num)}. MIN={np.min(param_num)}, MAX={np.max(param_num)}\n')
    
    with open(fname+'.jsonl', 'a') as out:
        out.write(json.dumps({'D': D, 'HEAD_NUM': HEAD_NUM, 'RUN_NUM': RUN_NUM, 'accs': accs, 'params': param_num})+'\n')
    print (model.gammas)
    for r in model.rots:
        print (r.angle)
    return (np.mean(accs), np.mean(param_num))


In [12]:
# helper for make inference for a model

#for D in [2,3,4]:
#    for HEAD_NUM in [D, 8]:
for D in [4]:
    for HEAD_NUM in [8]:

        dartslike('triplets_gs', 1000.0, 1000.0)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  tq = tqdm.tqdm_notebook(range(EPOCH_NUM )) # taking half of epoch num for NAS


  0%|          | 0/1000 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  tq = tqdm.tqdm_notebook(range(EPOCH_NUM))


  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([6, 0])
tensor([0, 6])
tensor([3, 6])
tensor([1, 0])
Parameter containing:
tensor([[-0.0225, -3.1178, -1.4113, -0.3959,  0.9707,  1.3132,  9.2108, -1.3661],
        [ 0.9071,  0.2940,  0.6892,  0.6719,  0.6382,  0.7046,  0.7425,  0.5624]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.0739e+01, -1.6952e+00,  6.3089e-03, -2.4048e+00,  5.2030e-01,
          1.9893e+00, -1.7392e+00, -3.2227e+00],
        [ 1.3185e-01,  2.5738e-01,  1.3150e-01,  6.5252e-02,  6.4609e-01,
         -9.4677e-02,  1.1486e+00,  2.2714e-01]], requires_grad=True)
Parameter containing:
tensor([[-8.1441e-01, -3.5143e-01,  4.1263e-01,  1.0004e+01, -2.1638e+00,
         -1.4262e+00,  1.2386e-01, -7.9007e-01],
        [ 8.3684e-01,  8.5815e-01,  1.0491e+00,  7.3535e-01,  1.0527e+00,
          4.4803e-03,  1.0565e+00, -4.8895e-01]], requires_grad=True)
Parameter containing:
tensor([[ 1.4904, 11.1621, -2.8820, -2.3336, -0.7658, -1.1921, -0.9870, -0.8530],
        [ 1.9639,  0.6081,  1.2531, -0.0895,

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([5, 7])
tensor([7, 7])
tensor([2, 1])
tensor([3, 7])
Parameter containing:
tensor([[-0.1023,  1.6528, -2.6882, -0.7597, -0.6959,  9.2927, -1.2390, -2.6519],
        [ 1.0110,  0.5195,  0.9386, -0.3878,  0.1175,  0.7735,  0.6606,  1.1090]],
       requires_grad=True)
Parameter containing:
tensor([[-1.7094,  1.0117, -2.9540, -1.3571, -2.0054, -0.6332, -2.1315, 13.5249],
        [-0.3122,  0.9393,  0.7192,  1.5869,  0.2697, -0.7294, -0.4165,  1.9422]],
       requires_grad=True)
Parameter containing:
tensor([[-2.7364, -0.9561, 11.7981, -0.8606, -0.0224, -0.2963, -2.1074, -0.1596],
        [ 0.0463,  1.1838,  0.5872,  0.1593,  0.6796,  0.9760,  0.4476, -0.0387]],
       requires_grad=True)
Parameter containing:
tensor([[-0.9935, -0.1735, -1.4061,  9.3223, -0.2071, -0.3590, -1.0706, -0.7979],
        [ 0.4061,  0.4962,  0.1698,  0.0401,  0.0494,  0.5820, -0.0820,  1.3155]],
       requires_grad=True)
acc  0 0.5890401005744934
acc  1 0.619239330291748
acc  2 1.1555438041687012
acc  3 

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 4])
tensor([5, 5])
tensor([3, 1])
tensor([2, 7])
Parameter containing:
tensor([[ 2.5898,  9.6746, -1.6482, -2.0517,  0.3385, -1.3963, -0.6868, -2.5502],
        [ 0.9572,  0.7244,  0.9318,  0.5839,  1.2681,  0.3911,  0.7413, -0.0671]],
       requires_grad=True)
Parameter containing:
tensor([[-1.7052, -1.6386, -2.1446, -1.6124,  0.1239,  9.9974, -0.8818,  3.0914],
        [ 0.2233,  0.6474,  0.0215,  0.5323,  0.8184,  0.8688,  0.2773,  0.4035]],
       requires_grad=True)
Parameter containing:
tensor([[-1.5838, -1.2656, -1.7131, 11.1090,  0.3003, -1.0421, -0.5125, -1.4939],
        [ 0.1847,  1.2850, -0.1450,  0.5264,  0.7860,  0.3591,  0.1225, -0.0454]],
       requires_grad=True)
Parameter containing:
tensor([[-0.7682, -0.6174, 10.5216, -1.9044,  1.5230, -1.1692, -2.7379, -1.2112],
        [ 0.3683,  0.9265,  0.3396,  0.6162,  0.1227,  0.4799,  0.4733,  0.9635]],
       requires_grad=True)
acc  0 1.3073983192443848
acc  1 0.9512713551521301
acc  2 0.7430656552314758
acc  3

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([6, 0])
tensor([2, 1])
tensor([0, 7])
tensor([4, 4])
Parameter containing:
tensor([[-0.6973, -1.4660, -2.3971, -1.0507,  0.0798, -2.1230, 11.6338,  0.1999],
        [ 0.8082,  0.2417,  0.0789,  0.1788,  0.5058,  0.1985,  0.7991,  0.2677]],
       requires_grad=True)
Parameter containing:
tensor([[-0.2253, -0.5645,  9.1727,  1.9240, -2.7035, -0.7988, -1.1181, -1.0502],
        [ 0.6067,  0.7934,  0.2532,  0.3652,  0.5784,  0.7506,  0.3419,  0.4971]],
       requires_grad=True)
Parameter containing:
tensor([[11.3288, -1.2635, -0.8306, -1.1643, -1.7379, -0.0321, -2.6926,  0.2187],
        [ 0.5202,  0.1529,  0.6735,  0.1151,  0.1970,  0.0364,  0.5514,  0.6744]],
       requires_grad=True)
Parameter containing:
tensor([[-1.2927, -1.6086, -0.3481, -0.9758, 10.5704, -2.7894,  0.4705, -0.2901],
        [ 0.2679,  0.7218,  0.4609,  0.7679,  0.9364,  0.6002,  0.3528,  0.2798]],
       requires_grad=True)
acc  0 0.6184577941894531
acc  1 0.831360399723053
acc  2 0.968535304069519
acc  3 0

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([7, 4])
tensor([5, 5])
tensor([6, 2])
tensor([2, 1])
Parameter containing:
tensor([[-1.3309,  0.9636, -1.5366, -2.3746, -1.1496, -0.5332, -1.4597, 10.9436],
        [-0.0144,  0.4355,  0.6479,  0.0788,  1.1260,  0.5670,  1.0423,  0.7003]],
       requires_grad=True)
Parameter containing:
tensor([[-0.7022, -1.9274, -1.0049,  2.3657,  1.6262, 10.0410, -1.2816, -4.8446],
        [ 2.4822, -0.5060, -1.0232, -1.1750, -0.3060,  2.6559,  2.0081,  1.5861]],
       requires_grad=True)
Parameter containing:
tensor([[-0.3888, -1.5476, -1.1338, -1.7993, -0.3081, -1.9875, 13.3955, -2.5555],
        [-0.3413,  0.0821,  2.7723, -1.2322, -0.4237,  1.1142,  0.5471,  0.6137]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.1598, -1.0593, 11.5544, -1.5081, -3.6930,  0.1578,  1.5533, -3.5319],
        [ 0.0659,  1.1700,  0.5617,  0.7474,  0.3219,  0.4600,  0.3725,  0.9543]],
       requires_grad=True)
acc  0 0.872482419013977
acc  1 0.4767111837863922
acc  2 1.005407452583313
acc  3 0

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([6, 0])
tensor([6, 7])
tensor([4, 6])
tensor([3, 5])
Parameter containing:
tensor([[-1.1283,  0.1483, -1.3795,  2.1932, -1.2243, -3.8157, 10.2556, -1.0244],
        [ 4.3740, -0.3257, -0.6998, -1.5442,  1.2234, -1.0433,  0.5666,  0.7456]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.2037, -0.5816, -2.2042, -4.7106, -0.9959,  1.5861,  9.1454,  0.4471],
        [-0.5725, -0.7263,  0.1539,  0.8603, -0.3339,  0.5731,  1.6840,  2.0267]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.8745, -1.9395, -0.0782, -1.0358,  9.5624,  1.5304, -2.5721, -3.0255],
        [ 0.4519,  0.2725,  0.5887, -0.1207,  0.3508,  0.2964,  0.6101,  0.5825]],
       requires_grad=True)
Parameter containing:
tensor([[-1.7005, -1.3692, -5.6214, 11.5437,  0.5217,  0.1575,  0.5633, -1.1789],
        [-0.1833,  0.4060,  0.0542,  0.6691,  0.7795,  0.9525,  0.4981,  0.3457]],
       requires_grad=True)
acc  0 1.0404953956604004
acc  1 0.9049862027168274
acc  2 1.1027122735977173
acc  3

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 5])
tensor([3, 3])
tensor([4, 4])
tensor([5, 4])
Parameter containing:
tensor([[ 9.7549, -0.4341,  0.2737, -1.0205, -0.9533,  0.1001, -0.8637, -2.9689],
        [ 0.7790,  0.1467,  0.1374,  0.6906,  0.6473,  0.9718,  0.5680, -0.0998]],
       requires_grad=True)
Parameter containing:
tensor([[ 2.8015e-01, -1.1297e+00,  2.6136e+00,  1.1594e+01,  4.8116e-03,
         -4.2885e+00, -9.7657e-01, -3.9754e+00],
        [ 1.0395e+00, -5.2471e-01,  6.6595e-01,  1.4076e+00,  1.3318e+00,
          1.1934e+00,  4.8679e-01, -3.9791e-01]], requires_grad=True)
Parameter containing:
tensor([[-2.1654, -1.8075, -3.0319, -1.2576,  9.4463, -2.3169,  5.1957,  0.6365],
        [ 0.8250,  0.0868,  0.6822,  0.8938,  1.2570,  1.0584,  0.6006, -0.0347]],
       requires_grad=True)
Parameter containing:
tensor([[-0.7009,  2.9681, -3.6870, -1.1291, -2.5035,  9.8638, -0.5831,  0.5303],
        [ 0.8258, -0.1883,  0.7391,  0.8102,  1.0790,  0.6966,  1.0003, -0.3994]],
       requires_grad=True)
acc  0 0.

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([7, 0])
tensor([5, 7])
tensor([0, 5])
tensor([6, 0])
Parameter containing:
tensor([[-0.1714,  1.5012, -0.7981, -2.5765, -1.4369, -2.5569, -2.3193, 11.2008],
        [ 1.1767,  0.5173,  0.1699,  0.2476, -0.1160,  0.3317,  0.8286,  0.9637]],
       requires_grad=True)
Parameter containing:
tensor([[-2.1503, -2.0543,  1.3905,  2.6364, -2.0867, 11.2130, -0.7189, -3.7469],
        [ 0.5923, -0.1959, -0.5295, -0.4790,  0.6825,  0.0469,  1.7936,  1.7940]],
       requires_grad=True)
Parameter containing:
tensor([[11.5576, -2.1275, -4.1108, -0.1125, -1.4703, -0.5303,  1.0209, -0.3521],
        [ 1.9150, -2.5171,  1.2694, -2.3601, -1.5769,  6.6896, -0.1860,  0.1687]],
       requires_grad=True)
Parameter containing:
tensor([[-0.2997,  0.8652, -1.0934, -1.3003, -0.6107, -2.9901,  9.4734, -0.5545],
        [ 1.6337, -0.4230,  1.0706, -0.8369, -0.1714,  1.1664,  0.5430,  0.2370]],
       requires_grad=True)
acc  0 0.28632068634033203
acc  1 1.5111130475997925
acc  2 0.4836585819721222
acc  

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([6, 1])
tensor([2, 5])
tensor([7, 4])
tensor([3, 6])
Parameter containing:
tensor([[-2.8766,  1.2440, -1.1438, -1.1520,  0.9257, -0.7890, 10.2777, -3.8759],
        [ 0.4891,  1.0723,  0.0841,  0.4649,  0.6275,  1.0657,  0.5318,  0.3735]],
       requires_grad=True)
Parameter containing:
tensor([[ 2.0232,  0.3770, 15.5917, -2.6625, -2.5693, -5.7677, -2.5443, -1.1544],
        [-1.4880,  0.4088, -1.2518,  0.4908,  1.0283,  5.1454, -0.1670, -1.2739]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.3694, -3.2803, -1.9750,  0.0781,  0.1592, -1.5416, -1.1836, 10.4852],
        [ 0.6615,  0.0855,  0.2617,  0.2553,  0.8408, -0.0235,  0.3034,  0.5459]],
       requires_grad=True)
Parameter containing:
tensor([[-1.6219, -1.1378, -1.6291, 10.2722, -0.1778, -0.6681, -0.7194,  0.2732],
        [-0.3493,  0.4239,  0.3106,  0.3509,  0.3124,  0.5204,  1.0037,  0.6163]],
       requires_grad=True)
acc  0 1.1806377172470093
acc  1 0.746360182762146
acc  2 0.8336579203605652
acc  3 

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 2])
tensor([6, 2])
tensor([2, 6])
tensor([2, 7])
Parameter containing:
tensor([[11.0255, -2.3229, -3.2946,  0.1216,  0.0543, -0.7140, -1.0380,  1.0216],
        [-0.1150,  0.0548,  1.4360,  0.2613,  0.2660,  1.0402, -0.0683,  0.6537]],
       requires_grad=True)
Parameter containing:
tensor([[-3.6606, -0.4803,  0.7676, -2.3398, -0.0717,  1.3641, 12.0155, -2.6149],
        [-0.4099,  0.0519,  3.2065, -0.2388,  0.2605,  0.2080, -0.2957,  0.3928]],
       requires_grad=True)
Parameter containing:
tensor([[-1.5770e+00, -1.0464e+00,  1.1809e+01, -1.6391e+00, -4.3442e+00,
         -1.5074e-01, -7.0520e-01,  1.6307e+00],
        [ 1.2795e-01,  7.4449e-03,  4.1535e-01,  4.5204e-01,  2.5004e-01,
          4.5849e-01,  1.4836e+00,  3.0227e-01]], requires_grad=True)
Parameter containing:
tensor([[-2.3698e-01, -7.5783e-01,  1.1604e+01, -2.4783e+00, -9.6058e-01,
         -2.2886e+00,  5.1689e-01, -1.3263e+00],
        [ 5.7698e-01,  5.9310e-01,  9.5168e-01,  9.4219e-01,  4.8886e-01,
    

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([7, 2])
tensor([4, 2])
tensor([4, 1])
tensor([3, 6])
Parameter containing:
tensor([[-4.3068e-01, -6.8891e-01, -1.9403e-01, -4.9567e-01, -7.9092e-01,
         -1.4191e+00,  1.8419e-03,  9.5413e+00],
        [ 1.6084e-01,  7.0374e-01,  9.7059e-01,  3.3439e-01,  8.8398e-01,
          7.5115e-01,  5.0672e-01,  4.2276e-01]], requires_grad=True)
Parameter containing:
tensor([[-1.6075e+00, -2.4470e+00, -8.4525e-01, -9.4309e-01,  9.9952e+00,
          1.6590e+00, -1.4099e+00,  1.1806e+00],
        [ 3.3570e-01,  3.1722e-01,  1.0784e+00,  9.2152e-01,  9.3404e-01,
          2.1389e-01,  3.7332e-01, -4.4325e-03]], requires_grad=True)
Parameter containing:
tensor([[-1.2618, -0.3292, -1.8424, -2.5689, 12.8126,  1.1159, -2.2573, -2.2871],
        [-0.6790,  1.5509, -0.1850, -0.2375,  0.5769,  0.9751,  0.9386,  0.5448]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.0093e+00, -9.3160e-01, -1.1404e+00,  1.3059e+01, -3.6643e+00,
         -4.9920e-01, -2.0313e+00, -1.9553e+00],
   

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 0])
tensor([2, 7])
tensor([5, 7])
tensor([1, 6])
Parameter containing:
tensor([[-2.9391e+00, -1.4739e+00,  1.4060e+01, -8.0158e-01, -4.9977e+00,
         -3.6311e-01,  5.0562e-02,  8.0766e-01],
        [ 1.0220e+00,  1.2910e-02,  3.7962e-01,  6.5083e-01,  9.7626e-01,
         -3.7869e-02,  5.6153e-01,  9.4796e-01]], requires_grad=True)
Parameter containing:
tensor([[ 0.4422, -2.4852, 11.5938, -0.4249, -1.8968, -1.5447, -0.6799, -1.8129],
        [ 0.1946,  0.2688,  0.9503,  0.7661,  0.0346,  0.4182,  0.6197,  1.1114]],
       requires_grad=True)
Parameter containing:
tensor([[-0.9540, -0.7733, -3.0993,  5.7746, -1.0227,  6.1223,  0.8954, -2.7669],
        [ 0.4659,  0.9062, -0.1725, -0.0396, -0.2453,  0.0789,  0.8018,  1.7395]],
       requires_grad=True)
Parameter containing:
tensor([[-2.0314, 11.5021, -1.5562,  0.5351, -0.1553,  0.9975, -1.6917, -2.5870],
        [ 0.6157,  0.2943,  0.1728,  0.2217,  0.1174,  0.4398,  0.7553,  0.2386]],
       requires_grad=True)
acc  0 0.

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 6])
tensor([6, 4])
tensor([6, 6])
tensor([7, 4])
Parameter containing:
tensor([[ 1.0314e+00,  1.1678e+01, -1.3279e+00, -2.6056e+00, -1.9805e+00,
         -2.2734e+00,  6.3619e-01,  1.1552e-02],
        [ 2.0837e-01,  5.3893e-01, -3.3982e-02,  2.3668e-01,  2.0952e-01,
          6.4635e-01,  7.8772e-01,  6.9415e-01]], requires_grad=True)
Parameter containing:
tensor([[-2.7657, -1.9640, -3.0259, -0.2972,  0.8366, -1.1505, 13.5590, -0.5094],
        [ 0.6435,  0.6859,  0.1326,  0.2128,  1.1638,  0.0163,  0.0153,  1.0320]],
       requires_grad=True)
Parameter containing:
tensor([[-2.5487, -1.6933, -1.3307, -0.6770,  1.6925, -0.1253,  9.7785, -1.7471],
        [ 0.8533,  0.7327,  0.1114,  0.4698,  0.7993,  0.1952,  1.0930,  0.7969]],
       requires_grad=True)
Parameter containing:
tensor([[-1.7706, -1.4111,  0.1752, -2.7811, -0.2835,  1.1108, -1.6901, 10.9985],
        [ 0.0486,  0.8930,  0.4431,  0.6365,  1.2365, -0.2682,  0.5187,  0.2855]],
       requires_grad=True)
acc  0 1.

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 4])
tensor([0, 7])
tensor([6, 2])
tensor([3, 1])
Parameter containing:
tensor([[ 9.8356, -1.0866,  1.0867, -0.7187, -0.2611, -2.5062, -3.4387,  2.3783],
        [ 1.0310,  0.3254,  0.2552, -0.3267,  1.2653,  0.6416,  0.0183,  1.0442]],
       requires_grad=True)
Parameter containing:
tensor([[10.5822, -4.6555, -2.7299, -1.1991, -0.7049,  3.0731, -0.7294,  0.2585],
        [ 0.3833,  0.3993,  0.2879, -0.2149,  0.8521,  0.1482, -0.0743,  1.0909]],
       requires_grad=True)
Parameter containing:
tensor([[-1.1421e+00,  2.4111e+00, -1.3609e+00, -2.6519e-01, -1.1615e+00,
         -2.8223e+00,  1.0177e+01, -1.6040e+00],
        [-2.4512e+00,  1.3911e+00,  5.1218e+00,  2.9598e-01, -9.8574e-01,
          8.3742e-01, -4.9107e-03, -5.6196e-01]], requires_grad=True)
Parameter containing:
tensor([[ 0.8590, -1.6836,  0.5877,  8.0743,  0.5972, -2.5787, -1.5706,  0.0802],
        [ 0.3463,  1.4201,  0.6097, -0.2948,  0.5719,  0.2080,  0.1202,  1.1229]],
       requires_grad=True)
acc  0 0.

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 0])
tensor([7, 4])
tensor([2, 0])
tensor([1, 2])
Parameter containing:
tensor([[ 8.5658, -0.2185,  0.0443, -0.5463,  1.2170,  0.8943, -7.1031,  1.9044],
        [ 1.0188,  0.5415,  0.3606,  0.6013,  0.2442,  0.2847,  0.5902,  0.3735]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.8404, -3.6189, -1.7628, -1.2920, -1.4623, -0.1336, -1.1618, 11.3490],
        [ 0.0498,  0.1787,  1.0051,  0.5156,  1.1197,  0.3070,  0.9350,  0.4440]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.3996, -2.4564, 11.2944, -0.8275,  0.2397, -1.5417,  0.0834, -3.6334],
        [ 1.4372,  0.3148,  0.6342,  0.2632, -0.0433,  0.7722, -0.1242, -0.0666]],
       requires_grad=True)
Parameter containing:
tensor([[-2.8279, 13.5847,  0.3526,  0.0888, -0.3094,  0.1896,  1.5927, -7.9324],
        [-0.2235, -0.1430,  3.6195, -0.0522, -0.5507, -0.8128,  1.8955,  0.8874]],
       requires_grad=True)
acc  0 0.5211672782897949
acc  1 1.252135992050171
acc  2 0.5324611067771912
acc  3 

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 7])
tensor([0, 7])
tensor([0, 6])
tensor([4, 7])
Parameter containing:
tensor([[-1.2811,  0.9341, 10.1998, -2.8725, -0.9224, -1.9447,  1.8896, -2.5286],
        [ 1.3368, -1.8461, -2.1516,  0.2351,  1.7168,  0.1305, -0.3036,  5.3859]],
       requires_grad=True)
Parameter containing:
tensor([[10.8244, -2.0987, -2.7830, -2.5812, -1.7153, -0.7075, -0.5873,  2.3848],
        [ 0.1383, -1.1572, -0.4450, -0.3993,  0.6164, -0.3136, -0.1341,  5.4922]],
       requires_grad=True)
Parameter containing:
tensor([[ 8.9353, -0.6266, -0.9063, -0.9963, -1.3584, -0.0801, -0.7867, -0.5384],
        [ 0.6728,  0.5850,  0.2659,  0.5142,  0.0192,  0.6147,  0.9999,  0.8518]],
       requires_grad=True)
Parameter containing:
tensor([[-3.0171,  1.6226,  0.2190, -0.9646,  9.7429,  0.6812, -1.7144, -1.3098],
        [ 0.7372,  0.4385,  0.5937,  0.0457, -0.2109, -0.3884, -0.5160,  4.6941]],
       requires_grad=True)
acc  0 0.26110634207725525
acc  1 0.3048781156539917
acc  2 0.7574127316474915
acc  

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 5])
tensor([6, 0])
tensor([4, 6])
tensor([6, 5])
Parameter containing:
tensor([[ 1.0363e+01, -4.4917e+00,  5.3406e-01, -8.0294e-01,  1.5492e-03,
         -1.2235e+00,  1.8024e-03, -1.0546e+00],
        [-2.7324e-01,  1.2774e+00,  5.9968e-02,  4.7239e-01, -7.2063e-01,
          1.4410e+00,  7.8142e-01,  1.3847e+00]], requires_grad=True)
Parameter containing:
tensor([[-0.7836, -8.2090,  1.3767, -0.7575, -3.3012,  1.9453, 14.3987, -0.1640],
        [ 4.9624,  0.3118,  0.9470, -0.3406, -0.6593,  0.0236, -0.4994, -0.7782]],
       requires_grad=True)
Parameter containing:
tensor([[-2.4029, -0.7523,  0.1527, -1.8186, 10.0194, -0.7184, -1.0981,  0.6503],
        [ 0.1572,  0.9233,  0.1421, -0.5050, -0.5198,  1.0535,  1.0551,  0.2750]],
       requires_grad=True)
Parameter containing:
tensor([[-0.1738, -2.6556, -0.6807, -2.9619, -1.6304, -0.9080, 13.3342, -0.0900],
        [ 0.0798,  1.2091,  1.6623, -2.6161, -2.3112,  6.5340,  0.2260, -1.1371]],
       requires_grad=True)
acc  0 0.

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([6, 3])
tensor([3, 4])
tensor([0, 4])
tensor([7, 7])
Parameter containing:
tensor([[-2.7496, -1.4489, -5.8061, -0.2310,  1.0837, -1.1674, 15.0009,  0.4387],
        [ 0.8222,  0.5763,  0.2033,  1.1654,  0.6999,  0.1127,  0.4177,  0.5335]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.4018, -2.6207, -1.8532,  9.8605,  0.2380, -0.2560, -0.7890, -1.2985],
        [ 0.1840, -0.2126,  0.4624, -0.1818,  1.5526,  0.4313,  0.4215,  0.1953]],
       requires_grad=True)
Parameter containing:
tensor([[11.1885, -0.0188,  0.0260, -0.4095, -2.7190, -1.7721, -2.1279, -0.7410],
        [ 1.0063, -0.1453,  0.4679,  0.2563,  1.1528,  0.1273,  1.0905,  0.2457]],
       requires_grad=True)
Parameter containing:
tensor([[-6.3925e-01,  3.1089e-01,  1.4502e+00, -2.2537e+00, -2.1873e+00,
         -1.3419e+00, -1.8551e+00,  1.0220e+01],
        [ 4.9237e-01,  2.1559e-01,  5.4607e-01,  6.5478e-01, -3.8914e-01,
          3.9232e-01,  9.0378e-04,  8.2544e-01]], requires_grad=True)
acc  0 0.

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([6, 1])
tensor([4, 7])
tensor([1, 0])
tensor([0, 1])
Parameter containing:
tensor([[ 0.8196, -1.5378, -2.0365,  0.0433, -1.4232, -1.5676, 11.4848, -1.5119],
        [ 0.5362,  1.6306,  1.0948,  0.1645,  0.2557, -0.3713,  1.3761, -0.7070]],
       requires_grad=True)
Parameter containing:
tensor([[-1.2612,  0.7653, -1.7499,  1.1703, 11.5981,  0.6716, -3.3347, -3.2835],
        [ 0.4290,  0.7219,  0.1211, -0.0492,  0.6288,  0.0428,  0.1842,  0.7867]],
       requires_grad=True)
Parameter containing:
tensor([[-2.0845, 10.0749, -0.7338, -0.9913, -1.6352, -0.2901, -0.7865,  0.6639],
        [ 1.4981,  1.2533,  0.3771,  0.8232,  0.8809,  0.0705,  1.3597, -0.2719]],
       requires_grad=True)
Parameter containing:
tensor([[11.6024, -1.9473, -0.7191, -3.4519, -1.4556, -2.3969,  0.0226,  2.0182],
        [ 0.6921,  0.8319,  0.8190, -0.1846,  0.2523,  0.1244,  0.3609, -0.0652]],
       requires_grad=True)
acc  0 0.6464376449584961
acc  1 1.7932721376419067
acc  2 0.5077188014984131
acc  3

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 2])
tensor([1, 6])
tensor([5, 5])
tensor([0, 1])
Parameter containing:
tensor([[10.6168, -0.5528, -0.6010, -1.0621, -1.2049, -1.7602,  0.4434, -0.7555],
        [ 0.0508,  0.5931,  1.0343,  0.8964,  0.5063,  0.6903, -0.0740,  0.1210]],
       requires_grad=True)
Parameter containing:
tensor([[-2.2080, 13.1822, -1.6983, -0.7355, -1.7869,  0.2776, -1.8081, -0.9574],
        [ 0.5310,  0.5460,  0.1802, -0.3401,  0.8060,  0.4814,  2.3477,  0.5110]],
       requires_grad=True)
Parameter containing:
tensor([[-0.1738, -0.5638, -0.4170, -2.0826, -1.5136, 11.8069, -4.6158,  2.3615],
        [ 0.1926,  0.7038,  0.1911,  0.0407,  0.4767,  1.0140,  0.0431,  0.6654]],
       requires_grad=True)
Parameter containing:
tensor([[ 9.0580, -3.7388,  0.8577, -0.5009, -1.9497,  0.2266, -1.3708,  0.3624],
        [ 0.6263,  0.8620,  0.7798,  0.3756,  0.6927,  0.0956,  0.7488,  0.5942]],
       requires_grad=True)
acc  0 0.7519489526748657
acc  1 0.4081135094165802
acc  2 0.5345773696899414
acc  3

In [12]:
# no reg
for D in [2,3,4]:
    for HEAD_NUM in [D, 8]:
        dartslike('darts', 0.0, 0.0)


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  tq = tqdm.tqdm_notebook(range(EPOCH_NUM )) # taking half of epoch num for NAS


  0%|          | 0/1000 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  tq = tqdm.tqdm_notebook(range(EPOCH_NUM))


  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 0])
tensor([0, 1])
Parameter containing:
tensor([[1.0506, 0.7466],
        [1.2429, 0.0993]], requires_grad=True)
Parameter containing:
tensor([[ 1.2469, -0.2555],
        [-0.1186,  1.1688]], requires_grad=True)
acc  0 1.224833607673645
acc  1 1.0574719905853271
1.141152799129486 19


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 0])
tensor([0, 1])
Parameter containing:
tensor([[0.3264, 0.3240],
        [1.2226, 0.0446]], requires_grad=True)
Parameter containing:
tensor([[ 0.4209, -0.1870],
        [-0.6924,  1.3493]], requires_grad=True)
acc  0 0.910137414932251
acc  1 1.6114027500152588
1.2607700824737549 19


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 0])
tensor([0, 1])
Parameter containing:
tensor([[0.2109, 1.2393],
        [0.5838, 0.3783]], requires_grad=True)
Parameter containing:
tensor([[ 1.1991,  0.1586],
        [-1.1880,  1.6874]], requires_grad=True)
acc  0 0.3214581310749054
acc  1 0.6801073551177979
0.5007827430963516 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 0])
tensor([1, 0])
Parameter containing:
tensor([[0.5219, 0.6264],
        [0.9170, 0.6653]], requires_grad=True)
Parameter containing:
tensor([[-0.0490,  0.6550],
        [ 1.9880, -1.1453]], requires_grad=True)
acc  0 1.4725072383880615
acc  1 1.7453312873840332
1.6089192628860474 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 1])
tensor([0, 0])
Parameter containing:
tensor([[ 0.1019,  0.6192],
        [-0.1051,  0.7822]], requires_grad=True)
Parameter containing:
tensor([[ 2.6601, -1.0661],
        [ 2.1679, -1.6374]], requires_grad=True)
acc  0 1.4470078945159912
acc  1 0.8840746879577637
1.1655412912368774 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 0])
tensor([1, 1])
Parameter containing:
tensor([[ 1.4194, -1.3165],
        [ 3.8012, -2.4579]], requires_grad=True)
Parameter containing:
tensor([[-0.3983,  1.7527],
        [ 0.2168,  1.0072]], requires_grad=True)
acc  0 0.938224196434021
acc  1 1.17597496509552
1.0570995807647705 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 1])
tensor([1, 0])
Parameter containing:
tensor([[ 0.3934,  0.2640],
        [-0.4605,  0.9199]], requires_grad=True)
Parameter containing:
tensor([[ 0.2838,  1.2475],
        [ 1.9533, -0.7131]], requires_grad=True)
acc  0 0.4101826548576355
acc  1 0.81427401304245
0.6122283339500427 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 0])
tensor([1, 1])
Parameter containing:
tensor([[0.0707, 0.2529],
        [1.1414, 0.0332]], requires_grad=True)
Parameter containing:
tensor([[-2.3969,  3.2579],
        [-2.6405,  3.1242]], requires_grad=True)
acc  0 0.9339236617088318
acc  1 0.3799470067024231
0.6569353342056274 19


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 0])
tensor([0, 1])
Parameter containing:
tensor([[ 0.8930, -0.2063],
        [ 1.5189, -1.0893]], requires_grad=True)
Parameter containing:
tensor([[ 1.4683, -0.9228],
        [-0.5754,  1.5235]], requires_grad=True)
acc  0 1.0325909852981567
acc  1 0.8631703853607178
0.9478806853294373 19


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 1])
tensor([1, 0])
Parameter containing:
tensor([[-0.6242,  2.3518],
        [-0.6336,  1.4355]], requires_grad=True)
Parameter containing:
tensor([[ 0.3246,  0.8982],
        [ 2.5131, -1.4118]], requires_grad=True)
acc  0 1.0765717029571533
acc  1 0.6356830596923828
0.8561273813247681 19


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 1])
tensor([0, 0])
Parameter containing:
tensor([[ 1.2449,  0.2552],
        [-0.0084,  1.1354]], requires_grad=True)
Parameter containing:
tensor([[ 1.1192,  0.5333],
        [ 2.0747, -0.8304]], requires_grad=True)
acc  0 0.6999037861824036
acc  1 1.1524709463119507
0.9261873662471771 19


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 1])
tensor([0, 0])
Parameter containing:
tensor([[ 0.8290,  0.6779],
        [-1.1287,  2.4310]], requires_grad=True)
Parameter containing:
tensor([[ 1.3513, -0.2028],
        [ 0.3767,  0.0088]], requires_grad=True)
acc  0 0.6901290416717529
acc  1 1.0414007902145386
0.8657649159431458 19


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 1])
tensor([1, 0])
Parameter containing:
tensor([[0.5667, 0.6471],
        [0.2807, 1.3529]], requires_grad=True)
Parameter containing:
tensor([[ 0.3298,  0.8246],
        [ 1.1904, -0.0223]], requires_grad=True)
acc  0 1.8261343240737915
acc  1 0.654711902141571
1.2404231131076813 19


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 0])
tensor([0, 1])
Parameter containing:
tensor([[0.4821, 1.2914],
        [1.2516, 0.0598]], requires_grad=True)
Parameter containing:
tensor([[ 1.2602,  0.0845],
        [-1.7654,  2.6253]], requires_grad=True)
acc  0 0.8132089376449585
acc  1 0.655907154083252
0.7345580458641052 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 1])
tensor([1, 0])
Parameter containing:
tensor([[ 1.8151, -0.3013],
        [ 0.4224,  0.8766]], requires_grad=True)
Parameter containing:
tensor([[-0.6759,  1.8716],
        [ 0.4688,  0.2805]], requires_grad=True)
acc  0 0.8009930849075317
acc  1 0.6335859894752502
0.717289537191391 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 0])
tensor([1, 1])
Parameter containing:
tensor([[-2.6024,  3.6209],
        [ 3.5897, -3.1747]], requires_grad=True)
Parameter containing:
tensor([[ 0.1918,  0.4628],
        [-0.5797,  1.9657]], requires_grad=True)
acc  0 0.5676606297492981
acc  1 0.528969407081604
0.548315018415451 19


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 1])
tensor([0, 1])
Parameter containing:
tensor([[-0.1098,  0.9327],
        [-0.7138,  1.4115]], requires_grad=True)
Parameter containing:
tensor([[ 1.2580,  0.1335],
        [-0.1342,  0.5499]], requires_grad=True)
acc  0 2.2512900829315186
acc  1 1.6684573888778687
1.9598737359046936 11


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 0])
tensor([1, 1])
Parameter containing:
tensor([[ 1.0594,  0.4755],
        [ 0.9292, -0.3346]], requires_grad=True)
Parameter containing:
tensor([[ 0.1320,  1.4637],
        [-0.7340,  2.1292]], requires_grad=True)
acc  0 0.5186915397644043
acc  1 1.7910584211349487
1.1548749804496765 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 1])
tensor([1, 1])
Parameter containing:
tensor([[-0.5444,  0.8787],
        [-1.2791,  2.6363]], requires_grad=True)
Parameter containing:
tensor([[ 0.0054,  1.4272],
        [-3.5453,  4.6920]], requires_grad=True)
acc  0 1.4132064580917358
acc  1 1.7988104820251465
1.6060084700584412 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 1])
tensor([1, 0])
Parameter containing:
tensor([[0.2094, 0.6372],
        [0.0452, 1.0909]], requires_grad=True)
Parameter containing:
tensor([[0.3324, 1.1006],
        [1.0585, 0.6495]], requires_grad=True)
acc  0 1.1164650917053223
acc  1 1.3665415048599243
1.2415032982826233 19
[Parameter containing:
tensor([[0.2094, 0.6372],
        [0.0452, 1.0909]], requires_grad=True), Parameter containing:
tensor([[0.3324, 1.1006],
        [1.0585, 0.6495]], requires_grad=True)]
Parameter containing:
tensor([34.1728], requires_grad=True)
Parameter containing:
tensor([60.4540], requires_grad=True)


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([3, 0])
tensor([3, 3])
Parameter containing:
tensor([[0.9982, 0.7727, 0.4641, 1.1442, 0.4592, 0.4492, 0.2292, 0.6644],
        [1.2683, 0.2828, 0.5890, 0.5484, 1.2443, 0.4131, 0.7105, 0.1534]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.8067,  0.4449,  0.3552,  0.9134,  0.0627,  0.4766,  0.6793,  0.4542],
        [ 1.3844,  1.4376, -0.7971,  1.7137,  0.7814, -0.3076, -0.6385, -1.0607]],
       requires_grad=True)
acc  0 1.448608160018921
acc  1 0.3471243977546692
0.897866278886795 19


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 2])
tensor([7, 4])
Parameter containing:
tensor([[ 0.5135,  0.3333,  0.7434,  0.4729,  0.1162,  0.0751,  0.6768, -0.1228],
        [ 0.4799,  0.5740,  1.0115,  0.1170,  0.7575,  0.7390,  0.6299,  0.4331]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.0751,  0.3638,  1.0612,  0.1081, -0.2157,  0.3324,  0.2330,  1.7883],
        [ 0.5127,  0.7810,  0.7851,  0.3574,  0.8937,  0.2488,  0.5572, -0.1368]],
       requires_grad=True)
acc  0 0.43714553117752075
acc  1 1.270887851715088
0.8540166914463043 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([4, 1])
tensor([4, 7])
Parameter containing:
tensor([[ 0.5171,  0.2707,  0.8426,  0.0693,  1.4373,  0.5898,  0.0139,  0.5287],
        [ 0.0941,  1.6599,  1.0036,  1.1162,  1.5330,  0.2135,  0.1418, -0.2313]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.7552, -0.0564,  0.9711,  0.4401,  1.3008,  0.8114,  0.7580,  0.2499],
        [ 0.4305,  0.2237,  0.5418,  0.6243,  0.2304,  0.4454, -0.5472,  1.8437]],
       requires_grad=True)
acc  0 0.7885763049125671
acc  1 1.3971202373504639
1.0928482711315155 19


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 7])
tensor([1, 7])
Parameter containing:
tensor([[ 0.2183,  1.4206,  0.3187,  0.9008,  0.1354,  0.7921,  0.3545,  0.0387],
        [ 0.8012,  0.4279, -0.0993,  0.4559,  0.3478,  0.0132,  0.1534,  0.9788]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.6131,  1.0476,  0.6824,  0.8321,  0.1710,  0.2126,  0.4341,  0.6435],
        [-0.0509,  0.8851,  0.1568,  0.9169,  0.7159, -0.0371, -0.0610,  1.6607]],
       requires_grad=True)
acc  0 1.3870577812194824
acc  1 0.9898136854171753
1.1884357333183289 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([4, 4])
tensor([1, 6])
Parameter containing:
tensor([[ 0.7081, -0.1369,  0.5821,  0.1653,  0.9425,  0.6899,  0.0596,  0.5121],
        [ 0.1998,  0.7797,  0.5338,  0.1802,  1.1385,  0.3117,  1.0861,  0.3538]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.1312,  2.3184, -0.1371, -0.1577, -0.2347,  1.3447,  0.5867,  0.4208],
        [ 1.7027,  0.6491, -0.3378,  0.7159, -0.3840, -0.3060,  4.3196, -0.6376]],
       requires_grad=True)
acc  0 0.8830745220184326
acc  1 0.8036795258522034
0.843377023935318 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([7, 0])
tensor([7, 2])
Parameter containing:
tensor([[ 0.5363, -0.4484, -0.1762, -0.9295, -0.4888, -0.2572,  2.0002,  3.7882],
        [ 4.6693,  1.8622, -0.5174, -1.6905,  0.0560, -1.0650,  0.6326, -0.6504]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.6844, -0.0834,  0.2905,  0.3507,  0.2439,  0.6828, -0.1108,  0.8321],
        [ 0.6163,  1.1135,  1.2691, -1.1311,  0.1239,  0.0075,  0.8344,  0.8317]],
       requires_grad=True)
acc  0 0.390470027923584
acc  1 0.6096987128257751
0.5000843703746796 19


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([4, 0])
tensor([1, 3])
Parameter containing:
tensor([[-0.1225, -0.0562,  0.4195,  0.3159,  1.2151,  0.8469,  0.5216,  0.7479],
        [ 1.0270,  0.4869,  0.2165,  0.1100,  0.0284,  0.6419,  0.8994,  0.4308]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.4700,  1.9496, -0.0112,  0.4269,  0.0468, -0.3202,  0.1435,  0.4174],
        [ 0.5510,  0.2944,  0.1972,  1.3032,  1.2767,  1.2225,  0.1932,  0.1643]],
       requires_grad=True)
acc  0 0.3328164219856262
acc  1 1.0235564708709717
0.678186446428299 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([3, 0])
tensor([6, 6])
Parameter containing:
tensor([[-0.0912,  0.3825,  0.2687,  1.0923,  0.1915,  0.8375,  0.2197, -0.0580],
        [ 1.0321,  0.8549,  0.0407,  0.3414,  0.0108,  0.1517,  0.7890,  0.8990]],
       requires_grad=True)
Parameter containing:
tensor([[-1.0303,  0.1906,  1.0091, -1.4829, -0.0282,  2.1204,  4.2830, -0.5790],
        [ 0.5870, -0.4990, -1.9684,  0.4423, -1.6810,  2.4758,  5.5206, -1.1723]],
       requires_grad=True)
acc  0 0.2536277770996094
acc  1 0.8967685699462891
0.5751981735229492 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 0])
tensor([5, 6])
Parameter containing:
tensor([[ 0.8142,  0.2053,  0.1731,  0.0277,  0.4122,  0.0600,  0.4075,  0.5102],
        [ 1.0107,  0.7564,  0.0485,  0.6375,  0.8933,  0.7360, -0.1003,  0.7266]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.2319, -0.2725,  0.3072,  0.3916,  0.4511,  1.8604,  0.4287, -0.1046],
        [-0.3977,  1.1612,  0.1481,  0.4906, -0.9704,  1.2765,  1.5619, -0.3775]],
       requires_grad=True)
acc  0 1.4556933641433716
acc  1 0.6853159070014954
1.0705046355724335 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([3, 0])
tensor([2, 0])
Parameter containing:
tensor([[ 0.8284,  0.4835,  0.9564,  1.8446,  0.1279,  0.2968,  0.4810, -0.1650],
        [ 2.0299,  0.3472, -1.1806, -0.6631,  0.5096,  0.9483, -0.1698,  1.7074]],
       requires_grad=True)
Parameter containing:
tensor([[-0.1432,  0.1103,  1.6012,  1.2800,  0.2015,  1.1982,  0.4473,  0.2847],
        [ 0.8950,  0.8298,  0.4542, -0.0135, -0.3017,  0.8613,  0.5500, -0.0996]],
       requires_grad=True)
acc  0 0.9900749921798706
acc  1 1.4120930433273315
1.201084017753601 11


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([4, 2])
tensor([4, 0])
Parameter containing:
tensor([[ 0.4472,  1.0284,  0.5529,  0.4000,  1.0425,  0.9337,  0.1285,  0.9905],
        [-0.0945,  1.1443,  1.3216,  0.5467,  0.9030,  0.3219,  0.1994,  0.3917]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.3988,  0.7586,  0.1616,  0.5252,  1.5742,  1.2997, -0.0294,  0.8933],
        [ 1.9594, -0.3884,  0.9348, -0.4244,  0.1630,  1.3054,  0.6320, -0.0122]],
       requires_grad=True)
acc  0 1.3989133834838867
acc  1 1.7813761234283447
1.5901447534561157 19


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([3, 3])
tensor([5, 2])
Parameter containing:
tensor([[ 0.5060,  0.8216,  0.8251,  0.8668,  0.6507,  0.4070,  0.1289,  0.1371],
        [ 0.2676,  0.0729,  0.5914,  1.5584,  0.3541, -0.1153,  0.6090,  1.1751]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.6507, -0.0290, -0.0362,  0.2137,  0.3047,  1.2123,  0.1082,  0.7671],
        [-0.2071,  0.9071,  1.5571,  0.0132,  0.2561,  0.6083,  0.4583,  0.7706]],
       requires_grad=True)
acc  0 0.6518075466156006
acc  1 0.9312534928321838
0.7915305197238922 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 5])
tensor([7, 7])
Parameter containing:
tensor([[ 0.5500,  0.5877,  1.2144,  0.6038,  0.2075,  0.7212,  0.6063,  0.6790],
        [-0.0642,  0.7213,  0.0914,  0.0889,  0.1879,  0.9458,  0.7278,  0.5890]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.9346,  0.4201,  0.6381, -0.0288,  0.2108,  0.8869,  0.6792,  0.9420],
        [ 0.2009,  0.7970,  0.7099, -0.1030,  0.8925,  0.5205, -0.0518,  0.9361]],
       requires_grad=True)
acc  0 0.51042640209198
acc  1 0.5445628762245178
0.5274946391582489 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 0])
tensor([7, 7])
Parameter containing:
tensor([[ 1.2940,  1.3705,  0.3210,  0.7749,  0.2977,  0.6731, -0.0452,  0.6034],
        [ 2.0869,  0.1751, -0.3018, -0.0790,  1.2147,  0.3731,  0.2716,  0.5137]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.6094, -0.3593,  0.8144, -0.0232,  0.3632,  0.6282,  0.6263,  1.2362],
        [-0.0235, -0.5592,  0.5384,  0.0547,  0.0221, -0.7647, -0.0744,  3.6793]],
       requires_grad=True)
acc  0 1.108759880065918
acc  1 0.6654680967330933
0.8871139883995056 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 3])
tensor([3, 2])
Parameter containing:
tensor([[ 1.0964,  0.6024,  0.2734,  1.0793,  0.5704,  0.5579,  0.3331,  0.2451],
        [ 0.0883,  0.8646,  0.4383,  1.2791,  0.6304, -0.0645,  0.4225,  0.3562]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.7916,  0.4187,  0.1841,  0.9259,  0.0832,  0.3682, -0.1473,  0.1336],
        [-0.4306, -0.1757,  1.4314, -0.0167,  1.1307,  1.0079,  1.2326,  0.3751]],
       requires_grad=True)
acc  0 1.616738200187683
acc  1 1.2053345441818237
1.4110363721847534 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 1])
tensor([2, 0])
Parameter containing:
tensor([[ 4.5092, -0.6837,  1.1598, -1.0166, -0.4567, -0.5878,  1.0404, -0.4904],
        [ 1.2425,  4.1350, -0.3778, -0.7348,  1.8575,  0.0564, -1.3922, -0.2829]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.5453,  0.0308,  0.7750,  0.5474,  0.4181,  0.1588, -0.0217,  0.2825],
        [ 1.1113, -0.1722, -0.0223,  0.5971,  0.6898,  0.4464,  0.6138,  0.5336]],
       requires_grad=True)
acc  0 0.4819974899291992
acc  1 0.6727848649024963
0.5773911774158478 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 0])
tensor([0, 5])
Parameter containing:
tensor([[ 2.0407,  0.5480,  0.8385, -0.4326,  1.2600, -0.4616,  0.3323, -0.7977],
        [ 1.8738,  0.1077,  1.0390, -0.1955, -0.1611,  0.8206, -0.1047,  1.0431]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.2534,  0.2694,  0.0481,  0.5683,  0.9172,  0.6146,  0.5939,  0.2404],
        [ 0.4823,  0.8716,  0.5388, -0.6723,  0.2463,  2.7397,  0.1949, -0.4342]],
       requires_grad=True)
acc  0 1.1107027530670166
acc  1 1.0765899419784546
1.0936463475227356 19


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 3])
tensor([6, 5])
Parameter containing:
tensor([[0.6277, 0.8959, 0.3641, 0.3400, 0.7310, 0.6445, 0.7739, 0.7432],
        [0.4287, 0.6201, 0.4149, 1.2548, 0.5026, 0.3938, 0.3252, 0.5910]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.9893,  0.5527, -0.5725,  0.4354,  0.3184,  0.7961,  1.3003, -0.1371],
        [ 0.4706, -0.7326,  0.3537, -0.0943,  0.2336,  1.9700,  0.6135,  0.0382]],
       requires_grad=True)
acc  0 0.9922395944595337
acc  1 1.2818471193313599
1.1370433568954468 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 3])
tensor([7, 7])
Parameter containing:
tensor([[-0.0989,  0.8887,  1.5229,  0.3781,  0.8639,  0.8688,  0.0038, -0.1565],
        [ 0.2386,  0.1656, -0.1276,  1.4797, -0.0733,  0.1518,  1.1718,  0.9731]],
       requires_grad=True)
Parameter containing:
tensor([[-1.1359, -0.2621, -0.0470,  2.2652, -0.8085,  0.8406,  0.6672,  3.0567],
        [ 0.1632, -1.1987, -1.2113,  0.3699, -0.4650, -0.1164,  2.3920,  2.9316]],
       requires_grad=True)
acc  0 1.4843803644180298
acc  1 1.2185566425323486
1.3514685034751892 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([5, 1])
tensor([3, 2])
Parameter containing:
tensor([[-0.2431,  0.8574,  0.1237,  0.7251,  0.6813,  1.4342,  0.5443,  1.0008],
        [ 0.3041,  1.3843,  1.3602,  0.2106,  0.2510,  0.0121,  0.1662,  0.1295]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.3928,  1.2911, -0.0390,  1.6090, -0.2074,  0.8578, -0.1406,  0.5019],
        [ 1.2132,  0.5548,  1.8175,  0.2011,  0.8640, -0.2759,  0.3733,  0.3149]],
       requires_grad=True)
acc  0 0.9353120923042297
acc  1 0.9031091332435608
0.9192106127738953 20
[Parameter containing:
tensor([[-0.2431,  0.8574,  0.1237,  0.7251,  0.6813,  1.4342,  0.5443,  1.0008],
        [ 0.3041,  1.3843,  1.3602,  0.2106,  0.2510,  0.0121,  0.1662,  0.1295]],
       requires_grad=True), Parameter containing:
tensor([[ 0.3928,  1.2911, -0.0390,  1.6090, -0.2074,  0.8578, -0.1406,  0.5019],
        [ 1.2132,  0.5548,  1.8175,  0.2011,  0.8640, -0.2759,  0.3733,  0.3149]],
       requires_grad=True)]
Parameter containing:
tensor([116.679

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 1])
tensor([2, 1])
tensor([0, 1])
Parameter containing:
tensor([[0.5017, 0.5169, 1.1616],
        [0.4545, 0.9783, 0.5179]], requires_grad=True)
Parameter containing:
tensor([[0.1278, 0.5885, 1.2747],
        [0.1764, 0.8342, 0.6508]], requires_grad=True)
Parameter containing:
tensor([[1.1700, 0.7836, 0.2246],
        [0.3482, 0.8732, 0.6673]], requires_grad=True)
acc  0 0.7171001434326172
acc  1 0.8471923470497131
acc  2 0.608246922492981
0.7241798043251038 11


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 2])
tensor([1, 2])
tensor([1, 1])
Parameter containing:
tensor([[ 0.7957, -0.1327,  0.9085],
        [ 0.2511, -0.5029,  0.8319]], requires_grad=True)
Parameter containing:
tensor([[-1.4443,  4.0488, -1.2000],
        [-1.3469, -1.1319,  4.1771]], requires_grad=True)
Parameter containing:
tensor([[-0.2623,  1.7207,  0.1317],
        [-0.6358,  2.2719, -0.2266]], requires_grad=True)
acc  0 0.6686499118804932
acc  1 0.8957465887069702
acc  2 0.7853823900222778
0.7832596302032471 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 0])
tensor([2, 0])
tensor([0, 2])
Parameter containing:
tensor([[ 0.2038,  1.0297,  1.0445],
        [ 1.4300,  0.4333, -0.3712]], requires_grad=True)
Parameter containing:
tensor([[0.2207, 0.1993, 1.0631],
        [0.9965, 0.9082, 0.7162]], requires_grad=True)
Parameter containing:
tensor([[ 2.0852,  0.4700, -0.9642],
        [-0.4061, -0.7785,  2.7667]], requires_grad=True)
acc  0 0.7490819692611694
acc  1 1.1833902597427368
acc  2 0.4712948799133301
0.8012557029724121 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 0])
tensor([0, 0])
tensor([1, 0])
Parameter containing:
tensor([[ 0.0740,  1.2251,  0.5326],
        [ 0.8796, -0.0463,  0.6714]], requires_grad=True)
Parameter containing:
tensor([[ 1.4690, -1.1081,  1.2661],
        [ 1.1252, -0.8832,  0.0485]], requires_grad=True)
Parameter containing:
tensor([[ 0.0959,  0.8843,  0.4898],
        [ 1.5391, -0.2607,  0.5918]], requires_grad=True)
acc  0 1.135402798652649
acc  1 0.6121475100517273
acc  2 0.5691223740577698
0.772224227587382 11


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 0])
tensor([2, 2])
tensor([1, 1])
Parameter containing:
tensor([[ 0.4987,  0.3367,  0.4031],
        [ 1.4000, -0.1893,  0.5430]], requires_grad=True)
Parameter containing:
tensor([[ 0.6925, -0.9605,  0.9462],
        [-0.9436,  0.0712,  2.1819]], requires_grad=True)
Parameter containing:
tensor([[ 0.3001,  1.9073,  0.0391],
        [-0.2781,  2.4529,  0.1773]], requires_grad=True)
acc  0 1.1591747999191284
acc  1 1.8748688697814941
acc  2 0.9621943831443787
1.3320793509483337 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 2])
tensor([2, 2])
tensor([1, 0])
Parameter containing:
tensor([[-0.4947, -0.2235,  1.2888],
        [ 0.1212, -1.2576,  3.3666]], requires_grad=True)
Parameter containing:
tensor([[-0.0068, -0.4496,  1.8272],
        [ 0.4925, -1.2383,  2.7674]], requires_grad=True)
Parameter containing:
tensor([[ 0.1693,  0.7651, -0.2672],
        [ 1.5571,  0.5643, -0.8815]], requires_grad=True)
acc  0 1.0264949798583984
acc  1 0.5190938711166382
acc  2 0.9189780950546265
0.8215223153432211 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 1])
tensor([2, 0])
tensor([1, 2])
Parameter containing:
tensor([[ 0.5813,  0.2571,  0.0617],
        [-0.1540,  1.2136,  0.6883]], requires_grad=True)
Parameter containing:
tensor([[ 0.4350,  0.5760,  0.9900],
        [ 1.4961, -0.3863,  0.0281]], requires_grad=True)
Parameter containing:
tensor([[0.3362, 0.7045, 0.6578],
        [0.3221, 0.4547, 0.5161]], requires_grad=True)
acc  0 0.8771125078201294
acc  1 0.5099698901176453
acc  2 0.4309404194355011
0.6060076057910919 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 0])
tensor([2, 0])
tensor([2, 0])
Parameter containing:
tensor([[0.1829, 0.1148, 0.2738],
        [0.7598, 0.3872, 0.6407]], requires_grad=True)
Parameter containing:
tensor([[ 0.1562,  0.2896,  0.9568],
        [ 0.8291, -0.0245,  0.2347]], requires_grad=True)
Parameter containing:
tensor([[0.0285, 0.3607, 0.8760],
        [0.8612, 0.0915, 0.4512]], requires_grad=True)
acc  0 0.655053436756134
acc  1 1.0648201704025269
acc  2 1.2068630456924438
0.9755788842837015 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 2])
tensor([1, 1])
tensor([1, 0])
Parameter containing:
tensor([[ 1.0299,  0.0761, -0.0301],
        [ 0.1365, -0.3906,  0.8399]], requires_grad=True)
Parameter containing:
tensor([[-0.1409,  0.9886,  0.6931],
        [ 0.4190,  0.8156,  0.2565]], requires_grad=True)
Parameter containing:
tensor([[0.7102, 0.7247, 0.6672],
        [1.0121, 0.0502, 0.4085]], requires_grad=True)
acc  0 1.6107456684112549
acc  1 0.9910134077072144
acc  2 0.5491377711296082
1.0502989490826924 29


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 2])
tensor([2, 2])
tensor([1, 2])
Parameter containing:
tensor([[ 1.4457,  0.5294,  0.1988],
        [-1.5851,  1.1286,  2.0349]], requires_grad=True)
Parameter containing:
tensor([[ 0.7221, -0.0958,  1.2334],
        [ 0.3457,  0.1297,  0.4013]], requires_grad=True)
Parameter containing:
tensor([[-1.1642,  3.1656, -0.8118],
        [-0.5140,  0.8081,  0.8114]], requires_grad=True)
acc  0 1.519741177558899
acc  1 1.061795711517334
acc  2 0.9413827061653137
1.1743065317471821 12


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 2])
tensor([1, 0])
tensor([2, 1])
Parameter containing:
tensor([[ 1.6064, -0.1726,  0.7524],
        [-0.2440,  0.2057,  2.1317]], requires_grad=True)
Parameter containing:
tensor([[-0.4802,  2.1344, -0.2492],
        [ 1.2908,  0.1205,  0.1309]], requires_grad=True)
Parameter containing:
tensor([[ 1.2381, -0.1821,  1.6639],
        [-0.0655,  2.0039, -0.3054]], requires_grad=True)
acc  0 1.1489789485931396
acc  1 0.40739408135414124
acc  2 0.5142348408699036
0.6902026236057281 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 0])
tensor([1, 2])
tensor([2, 1])
Parameter containing:
tensor([[0.3164, 0.8460, 1.0029],
        [0.9193, 0.6850, 0.1882]], requires_grad=True)
Parameter containing:
tensor([[-0.3309,  3.3753, -1.6809],
        [ 0.0233, -1.5375,  3.0155]], requires_grad=True)
Parameter containing:
tensor([[0.6928, 0.1562, 0.8003],
        [0.3019, 0.5919, 0.2835]], requires_grad=True)
acc  0 0.8007076978683472
acc  1 0.39973849058151245
acc  2 1.08837890625
0.7629416982332865 29


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 2])
tensor([0, 0])
tensor([1, 1])
Parameter containing:
tensor([[0.4006, 0.7832, 1.0192],
        [0.6826, 0.4181, 0.6979]], requires_grad=True)
Parameter containing:
tensor([[ 0.9362, -0.1053,  0.5402],
        [ 0.3664,  0.1873,  0.2175]], requires_grad=True)
Parameter containing:
tensor([[0.3462, 0.8844, 0.5396],
        [0.0487, 1.3742, 0.5245]], requires_grad=True)
acc  0 0.6563237309455872
acc  1 0.4625089466571808
acc  2 1.3943015336990356
0.8377114037672678 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 2])
tensor([2, 2])
tensor([0, 1])
Parameter containing:
tensor([[ 2.0665,  1.0386, -0.5877],
        [ 0.8378, -0.3041,  1.3784]], requires_grad=True)
Parameter containing:
tensor([[-0.1487,  0.5431,  1.4331],
        [ 0.1491, -0.0252,  0.2239]], requires_grad=True)
Parameter containing:
tensor([[ 1.4611,  1.1573, -0.4799],
        [-0.8347,  2.3151,  0.3693]], requires_grad=True)
acc  0 0.36928680539131165
acc  1 0.4216200113296509
acc  2 1.6915874481201172
0.8274980882803599 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 1])
tensor([0, 2])
tensor([1, 1])
Parameter containing:
tensor([[ 0.3130, -0.0853,  1.6397],
        [ 0.4769,  0.9543,  0.7100]], requires_grad=True)
Parameter containing:
tensor([[ 0.6405,  0.6019,  0.2453],
        [-0.2505,  0.6995,  1.0754]], requires_grad=True)
Parameter containing:
tensor([[-0.0252,  1.0292,  0.1225],
        [-1.1039,  2.3458,  0.4874]], requires_grad=True)
acc  0 1.443132758140564
acc  1 0.9669588804244995
acc  2 1.8360216617584229
1.4153711001078289 21


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 1])
tensor([2, 0])
tensor([1, 0])
Parameter containing:
tensor([[-0.5409, -1.7241,  3.2855],
        [-0.5557,  3.9021, -2.2790]], requires_grad=True)
Parameter containing:
tensor([[ 0.4896,  0.1039,  1.6772],
        [ 1.3030, -0.4280,  0.9612]], requires_grad=True)
Parameter containing:
tensor([[ 0.6792,  0.6933, -0.0969],
        [ 0.9497, -0.1030,  0.3768]], requires_grad=True)
acc  0 0.5534238815307617
acc  1 0.9533824324607849
acc  2 0.39902248978614807
0.6352762679258982 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 1])
tensor([2, 1])
tensor([1, 0])
Parameter containing:
tensor([[ 1.3878,  1.0275, -1.0837],
        [ 0.7560,  0.7673,  0.0572]], requires_grad=True)
Parameter containing:
tensor([[-0.1346, -2.1380,  3.0592],
        [-0.6413,  3.1745, -0.8637]], requires_grad=True)
Parameter containing:
tensor([[-1.3915e-03,  2.4347e+00, -5.6900e-01],
        [ 1.1810e+00,  9.9534e-01, -5.6266e-01]], requires_grad=True)
acc  0 0.3760678172111511
acc  1 0.37591177225112915
acc  2 1.43478262424469
0.7289207379023234 21


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 1])
tensor([1, 2])
tensor([2, 2])
Parameter containing:
tensor([[ 0.8372,  0.3708,  0.7418],
        [-0.0602,  1.2650,  0.5706]], requires_grad=True)
Parameter containing:
tensor([[-0.2413,  2.1409,  0.3304],
        [ 0.9708, -0.1730,  1.1902]], requires_grad=True)
Parameter containing:
tensor([[-0.2951,  0.6674,  0.7714],
        [-0.4103, -0.2186,  2.1347]], requires_grad=True)
acc  0 0.8466609716415405
acc  1 0.9245405197143555
acc  2 0.9701194167137146
0.9137736360232035 21


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 2])
tensor([2, 1])
tensor([2, 0])
Parameter containing:
tensor([[0.4480, 0.1648, 0.5796],
        [0.4094, 0.6535, 0.8689]], requires_grad=True)
Parameter containing:
tensor([[-0.7598, -1.9739,  4.0248],
        [-0.2990,  2.2773,  0.3541]], requires_grad=True)
Parameter containing:
tensor([[-0.9168, -0.6822,  2.7801],
        [ 1.6098, -0.7761,  0.1527]], requires_grad=True)
acc  0 2.0968713760375977
acc  1 0.9580008387565613
acc  2 1.4991519451141357
1.518008053302765 28


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 2])
tensor([0, 2])
tensor([1, 1])
Parameter containing:
tensor([[-1.1920,  1.7041,  0.7601],
        [-0.2380,  0.1830,  2.1985]], requires_grad=True)
Parameter containing:
tensor([[ 1.6396, -0.9462,  1.0358],
        [-1.3462, -1.0735,  4.9798]], requires_grad=True)
Parameter containing:
tensor([[-0.8621,  1.1252,  0.9047],
        [-0.0493,  2.1001, -0.2818]], requires_grad=True)
acc  0 1.740183711051941
acc  1 1.508091926574707
acc  2 1.2972289323806763
1.5151681900024414 20
[Parameter containing:
tensor([[-1.1920,  1.7041,  0.7601],
        [-0.2380,  0.1830,  2.1985]], requires_grad=True), Parameter containing:
tensor([[ 1.6396, -0.9462,  1.0358],
        [-1.3462, -1.0735,  4.9798]], requires_grad=True), Parameter containing:
tensor([[-0.8621,  1.1252,  0.9047],
        [-0.0493,  2.1001, -0.2818]], requires_grad=True)]
Parameter containing:
tensor([13.8206], requires_grad=True)
Parameter containing:
tensor([62.9900], requires_grad=True)
Parameter containing:
tensor([-

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([3, 3])
tensor([7, 6])
tensor([3, 2])
Parameter containing:
tensor([[ 0.8117,  0.8870,  0.3736,  1.0432,  0.3880,  0.5788,  0.2497,  0.8491],
        [ 0.8976, -0.0584,  0.6461,  1.2454,  0.5690,  0.8508,  0.3777,  0.6816]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.6400,  0.2134,  0.4834,  0.5496, -0.0490,  0.6762,  0.0283,  1.6512],
        [ 0.2608,  0.4993,  0.3147, -0.0393,  0.3132,  0.2596,  0.8297,  0.0751]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.8668,  0.5977,  0.3199,  0.8700,  0.5975,  0.8513,  0.5687,  0.3228],
        [ 1.2190,  0.3443,  2.0459,  0.2816,  0.4002, -0.3457,  0.7068,  0.4521]],
       requires_grad=True)
acc  0 1.5257072448730469
acc  1 0.5426579117774963
acc  2 0.7975063920021057
0.9552905162175497 29


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([6, 7])
tensor([6, 4])
tensor([0, 0])
Parameter containing:
tensor([[-0.0082,  0.4391,  0.5778,  0.5674,  0.4549,  0.0444,  0.5905,  0.1427],
        [ 0.2931,  0.3432,  0.4996,  0.1355,  1.0743,  0.3549,  0.7326,  1.3086]],
       requires_grad=True)
Parameter containing:
tensor([[ 4.1881e-01, -1.5884e+00,  1.2465e+00, -4.1208e-02, -2.8722e-03,
         -3.0594e-01,  3.2090e+00,  8.1019e-01],
        [-4.0841e-01,  4.1244e-01, -2.5129e-01,  1.2085e-01,  4.0111e+00,
         -6.6809e-02, -3.3555e-01,  5.1684e-01]], requires_grad=True)
Parameter containing:
tensor([[ 3.2122,  0.1660,  0.6850, -0.4449, -0.5512,  0.0181,  1.3924,  0.1817],
        [ 3.6609, -0.4274, -0.1716,  0.7267,  0.6875, -0.2539,  0.0754, -0.2566]],
       requires_grad=True)
acc  0 0.6356501579284668
acc  1 0.7648564577102661
acc  2 1.274659276008606
0.8917219638824463 29


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 0])
tensor([4, 7])
tensor([4, 4])
Parameter containing:
tensor([[ 1.9446,  0.3258,  0.7327, -0.3163,  0.2785,  0.4541,  0.5339,  0.3162],
        [ 2.5195, -0.1353,  0.6408,  0.3894,  0.8112,  0.1680,  1.0583,  0.0790]],
       requires_grad=True)
Parameter containing:
tensor([[0.5313, 0.7240, 0.7538, 0.3003, 0.9349, 0.6927, 0.8654, 0.4275],
        [0.4318, 0.3457, 0.0441, 0.7274, 0.7805, 0.3539, 0.1682, 0.9411]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.4455,  1.3846, -0.7924,  0.8811,  1.8459,  0.3605,  0.2631, -0.5902],
        [ 0.5789, -0.4220,  1.6474, -1.7144,  2.4867, -1.0566,  0.0511,  1.5024]],
       requires_grad=True)
acc  0 0.7295717000961304
acc  1 1.7536693811416626
acc  2 0.8979586362838745
1.1270665725072224 29


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 6])
tensor([3, 5])
tensor([0, 2])
Parameter containing:
tensor([[0.2074, 0.9914, 0.7604, 0.7245, 0.0404, 0.5715, 0.8205, 0.0632],
        [0.6744, 0.2442, 0.0570, 0.2315, 0.2009, 0.3555, 0.7151, 0.6001]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.1256,  0.4873,  0.0885,  1.4010,  0.3257,  0.0974,  0.1245,  0.9862],
        [-0.1453,  1.1039,  0.0325,  0.1728,  0.2580,  1.4703,  0.2228,  1.0714]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.8631,  0.6259,  0.0124,  0.1581,  0.6044,  0.4418,  0.3184,  0.8024],
        [-0.0469,  0.3721,  1.2180,  0.5564, -0.0443,  0.1558, -0.0131,  0.7231]],
       requires_grad=True)
acc  0 1.6771315336227417
acc  1 0.6291226744651794
acc  2 1.141255497932434
1.1491699020067851 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([4, 4])
tensor([0, 4])
tensor([3, 4])
Parameter containing:
tensor([[ 0.6811,  0.0770,  0.5811,  0.0516,  0.7749,  0.7216,  0.1136,  0.5218],
        [ 0.0758,  0.4095,  0.9618, -0.0569,  0.9637,  0.4103,  0.8898,  0.9295]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.1900e+00,  3.7236e-01,  1.8760e-01,  9.0420e-01, -1.3694e-01,
          1.0118e+00,  8.5911e-01, -1.1587e-01],
        [ 3.8785e-02, -1.1536e-01,  8.4102e-01,  6.3674e-01,  2.1639e+00,
          1.1253e-04,  7.9215e-01,  1.3647e+00]], requires_grad=True)
Parameter containing:
tensor([[ 0.4419,  0.3766,  0.0973,  1.2971,  0.1600,  0.3573,  0.8869,  0.0576],
        [ 0.0209,  0.2375,  0.8144,  0.8641,  0.9176,  0.1016, -0.1901,  0.3664]],
       requires_grad=True)
acc  0 0.4875856935977936
acc  1 0.6953269243240356
acc  2 0.40764594078063965
0.5301861862341563 12


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([4, 1])
tensor([4, 7])
tensor([2, 7])
Parameter containing:
tensor([[-0.1526,  0.0964,  0.3078,  0.4311,  2.7923, -0.9319,  1.0589,  0.4228],
        [ 0.4713,  2.8234, -1.4352, -0.6366, -0.5698,  0.6721,  0.7056,  1.2659]],
       requires_grad=True)
Parameter containing:
tensor([[-0.3433,  0.2037,  0.2063, -0.1715,  2.6565,  0.0602, -0.2832,  0.5614],
        [ 0.8272,  1.3106, -0.5597,  0.0700, -0.7742, -0.0659,  0.7484,  2.1092]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.0124,  0.6771,  1.3752,  0.9879,  0.5298, -0.0881, -0.4539, -0.7242],
        [-0.0468,  0.9428, -0.5790,  0.0724,  0.3431, -0.2923,  0.9678,  1.6241]],
       requires_grad=True)
acc  0 0.8190269470214844
acc  1 0.931402862071991
acc  2 0.7337354421615601
0.8280550837516785 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([4, 5])
tensor([3, 3])
tensor([5, 4])
Parameter containing:
tensor([[ 0.4061,  0.2385,  0.3453,  0.2950,  0.9796,  0.6126,  0.2316,  0.7795],
        [ 1.2293,  0.3098, -0.3563, -0.1193,  0.5103,  1.5540,  1.1823, -0.4693]],
       requires_grad=True)
Parameter containing:
tensor([[0.3540, 0.7528, 0.7260, 0.7694, 0.6796, 0.2970, 0.2580, 0.2860],
        [0.8321, 0.6589, 0.5529, 1.2304, 0.0910, 0.3926, 0.6746, 0.7699]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.0625, -0.0230,  0.6350,  0.4521,  0.7054,  1.2914,  0.6691,  0.9066],
        [ 1.0663,  0.2680,  0.2391,  0.7250,  1.1658,  1.0259,  0.5502,  0.3289]],
       requires_grad=True)
acc  0 0.5290886163711548
acc  1 1.2693604230880737
acc  2 1.2266311645507812
1.0083600680033367 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([3, 0])
tensor([7, 7])
tensor([6, 0])
Parameter containing:
tensor([[-0.0474,  0.3878,  0.1827,  1.1555,  0.0907,  0.6593,  0.1683,  0.2460],
        [ 1.0609,  0.7076,  0.1403,  0.2328, -0.0288,  0.3413,  0.7600,  0.9054]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.0106,  0.7597, -0.0025,  0.6724,  0.7105,  0.5736,  0.6377,  1.1207],
        [ 0.2248, -0.1858,  0.3418,  0.9358,  0.3306,  0.2753,  0.7614,  1.0210]],
       requires_grad=True)
Parameter containing:
tensor([[0.2327, 0.8677, 0.0799, 0.4485, 0.3301, 0.1899, 0.9441, 0.7821],
        [0.8578, 0.3702, 0.1301, 0.2847, 0.4678, 0.4703, 0.2218, 0.5999]],
       requires_grad=True)
acc  0 0.8418760895729065
acc  1 0.5153151154518127
acc  2 1.314274549484253
0.8904885848363241 21


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 5])
tensor([3, 3])
tensor([7, 0])
Parameter containing:
tensor([[ 8.6881e-01,  3.1752e-01,  2.4565e-01,  3.3987e-01,  6.8170e-01,
         -1.7149e-01,  1.3663e-03,  3.2658e-01],
        [ 4.7965e-01,  7.9996e-01,  5.5353e-01,  7.6237e-01, -2.0320e-01,
          1.8968e+00,  4.6899e-01, -4.9315e-02]], requires_grad=True)
Parameter containing:
tensor([[ 0.2592,  0.1208,  0.1151,  1.3134,  0.4221,  0.7126,  0.4150, -0.0645],
        [ 0.7892,  0.1801,  0.6463,  1.4963,  0.0019,  0.2070,  0.2896, -0.7179]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.3623,  0.2825,  0.3181,  0.6839,  0.5786,  0.8448,  0.1623,  0.8786],
        [ 1.4071,  0.2912, -0.0382,  0.3168, -0.0683,  0.0412,  0.4650,  0.5158]],
       requires_grad=True)
acc  0 0.8338019847869873
acc  1 0.5493414402008057
acc  2 0.8649848103523254
0.7493760784467062 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([4, 1])
tensor([5, 2])
tensor([1, 6])
Parameter containing:
tensor([[ 0.1235,  1.3938,  0.6578, -0.1182,  1.9715, -0.4740,  0.4808,  0.8182],
        [-0.7167,  2.2997,  0.4505, -0.3621, -0.6432,  1.4449,  0.2700,  0.7857]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.0381,  0.3682,  0.8349,  0.6810,  0.5045,  0.9776,  0.9651,  0.6106],
        [ 0.5159,  0.6288,  1.0645, -0.1483, -0.0354,  0.4817,  0.2743,  0.3938]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.8685,  4.3094,  0.7746, -1.6000, -0.0190,  0.3371, -1.6838,  0.9906],
        [-0.6269,  0.5334,  1.4954, -1.4931, -0.6460, -0.1527,  4.3052,  0.0819]],
       requires_grad=True)
acc  0 1.7903478145599365
acc  1 0.5563455820083618
acc  2 0.6937487721443176
1.0134807229042053 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([4, 6])
tensor([3, 3])
tensor([2, 5])
Parameter containing:
tensor([[ 0.3128,  0.7728,  0.5768,  0.3472,  1.4801,  0.5261,  0.2628,  1.2454],
        [-0.3847,  0.1860,  0.1816,  0.5366,  0.9068,  0.9964,  1.3150,  0.9965]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.8360,  0.2014,  0.3566,  1.9295,  1.6447,  0.1958, -0.5025,  0.9206],
        [-1.0217, -0.5328, -0.6624,  3.4873,  1.4299,  0.2378,  1.2839, -0.0524]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.2256,  0.5666,  0.7960,  0.3215,  0.2540,  0.1750,  0.4162,  0.6267],
        [ 0.2441,  0.6463,  0.1172,  0.9269,  0.6490,  1.1143,  0.3295, -0.5423]],
       requires_grad=True)
acc  0 0.6194773316383362
acc  1 0.8515594005584717
acc  2 0.5045164823532104
0.6585177381833395 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 0])
tensor([5, 2])
tensor([7, 7])
Parameter containing:
tensor([[ 0.7277,  0.9308,  0.7415,  0.7196,  0.2602,  0.6394,  0.1025,  0.2217],
        [ 1.3259,  0.4949,  0.1491,  0.4892,  1.3043, -0.2591,  1.0966, -0.0877]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.6233,  0.2743,  0.0198,  0.0804,  0.4103,  0.6584,  0.6324,  0.4925],
        [ 0.2930,  0.1277,  1.1557,  1.1139, -0.2531,  0.6359,  0.3949,  0.8956]],
       requires_grad=True)
Parameter containing:
tensor([[0.7698, 0.8051, 0.8751, 0.4493, 0.2587, 0.0900, 0.0118, 0.9162],
        [0.2103, 0.9474, 0.0070, 0.2531, 0.1719, 0.3958, 0.5383, 1.0111]],
       requires_grad=True)
acc  0 0.4393768310546875
acc  1 0.6249621510505676
acc  2 1.1917898654937744
0.7520429491996765 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 6])
tensor([5, 7])
tensor([2, 1])
Parameter containing:
tensor([[ 0.9607,  1.0730,  0.8198,  0.5483,  0.2512,  0.5675,  0.7774,  0.1719],
        [ 0.2969,  0.4829, -0.2440,  0.3114,  0.1567,  0.8472,  0.9475,  0.4892]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.7118,  0.6247,  0.5536, -0.0314,  0.2160,  0.9191,  0.8693,  0.8199],
        [ 0.2296,  1.0873,  0.5142, -0.1523,  0.6132,  0.2814,  0.1695,  1.1593]],
       requires_grad=True)
Parameter containing:
tensor([[-0.4408,  0.4825,  0.9678,  0.6403,  0.5721,  0.1661,  0.4823,  0.4786],
        [ 0.7237,  1.1359,  0.0443,  0.5502,  0.6465,  0.2510,  0.8222,  0.8779]],
       requires_grad=True)
acc  0 1.5723655223846436
acc  1 0.8737825155258179
acc  2 1.2849736213684082
1.2437072197596233 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([4, 4])
tensor([0, 7])
tensor([7, 5])
Parameter containing:
tensor([[-0.0952, -0.4685,  1.6459,  1.4845,  1.9241,  0.8633,  0.0960, -0.1608],
        [ 1.2184, -0.0673, -0.7404,  0.2642,  1.6528,  0.4868,  0.1920,  1.2477]],
       requires_grad=True)
Parameter containing:
tensor([[0.8766, 0.3275, 0.3334, 0.2685, 0.1207, 0.7718, 0.6811, 0.5154],
        [0.0638, 0.3537, 0.4507, 0.2329, 0.1733, 0.4618, 0.1215, 1.0148]],
       requires_grad=True)
Parameter containing:
tensor([[-0.5724,  1.0716, -0.1106, -0.1372,  0.4189,  1.3667, -0.5668,  2.7620],
        [ 0.2312, -0.7301, -0.0439, -0.4314,  0.1663,  1.6712,  1.3927,  1.3865]],
       requires_grad=True)
acc  0 0.8258962035179138
acc  1 1.520595669746399
acc  2 0.7197946906089783
1.022095521291097 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 1])
tensor([0, 2])
tensor([7, 6])
Parameter containing:
tensor([[0.9671, 0.5334, 0.4499, 0.7707, 0.4680, 0.5524, 0.3490, 0.6673],
        [0.7495, 0.8492, 0.1515, 0.6855, 0.4294, 0.1390, 0.3217, 0.6891]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.0773,  0.3624,  0.4394, -0.0070,  0.1066,  0.8022, -0.1492,  0.1263],
        [ 0.8825,  0.4327,  1.2828,  0.7262,  0.5432,  0.0976,  0.0427,  0.5470]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.8727,  0.0204,  1.0247,  0.5033,  0.4770,  0.7101, -0.2762,  1.2260],
        [ 0.3450,  0.9181,  0.0088,  0.2625,  0.0193, -0.7614,  1.8573,  0.5380]],
       requires_grad=True)
acc  0 0.9497724175453186
acc  1 0.8300985097885132
acc  2 1.0342328548431396
0.9380345940589905 29


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([7, 2])
tensor([2, 6])
tensor([2, 0])
Parameter containing:
tensor([[-0.3395,  1.1376, -1.0562,  0.3873, -0.5772, -0.5876,  0.6144,  3.8953],
        [ 0.3128, -1.3735,  5.5500,  0.7401, -1.0245, -0.1745, -0.7777,  1.2510]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.7454,  0.0152,  0.8289,  0.4167,  0.3430,  0.1770, -0.0146,  0.2245],
        [ 0.5336,  0.8400, -0.4459,  0.0939,  0.8520,  0.2250,  1.1944,  0.5048]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.8206,  0.4890,  0.8254,  0.4432,  0.3448,  0.2428,  0.2017,  0.2751],
        [ 1.2847,  0.5019,  0.0614,  0.9678, -0.1638,  0.6770,  0.6070,  0.5876]],
       requires_grad=True)
acc  0 0.27681785821914673
acc  1 0.9898361563682556
acc  2 0.652723491191864
0.6397925019264221 29


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 4])
tensor([1, 4])
tensor([2, 4])
Parameter containing:
tensor([[ 1.4593, -0.0937,  1.2477, -0.6400,  0.9353, -0.3156, -0.0738,  0.8085],
        [ 0.2462,  0.3285,  0.3976,  0.7839,  2.0605, -0.1866,  0.7386,  0.0543]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.9465,  3.5718,  0.7757,  2.1091, -0.8214, -1.7908, -0.1744, -0.1111],
        [-0.8172, -0.7663,  0.1057,  0.0709,  6.5139, -0.8800, -0.0647, -0.1951]],
       requires_grad=True)
Parameter containing:
tensor([[-1.8949e-01, -2.7142e-01,  4.0510e+00, -8.6598e-01,  5.0044e-01,
         -9.6733e-02,  4.6815e-03,  8.9964e-01],
        [-5.6597e-01, -6.2356e-01, -8.8928e-02, -4.1176e-01,  4.8545e+00,
         -6.8827e-01,  4.6726e-01, -3.6178e-01]], requires_grad=True)
acc  0 0.5739841461181641
acc  1 0.3864124119281769
acc  2 0.7040956020355225
0.5548307200272878 12


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 3])
tensor([7, 5])
tensor([0, 3])
Parameter containing:
tensor([[0.4623, 1.2610, 0.4584, 0.3004, 0.5820, 0.4700, 0.5544, 1.0318],
        [0.7905, 0.8677, 0.1945, 0.9884, 0.5894, 0.2171, 0.4401, 0.4434]],
       requires_grad=True)
Parameter containing:
tensor([[0.2638, 0.7464, 0.0884, 0.3824, 0.7378, 0.2473, 0.1155, 1.1012],
        [0.1249, 0.0049, 0.1033, 0.2472, 0.9227, 1.0133, 0.2318, 0.2048]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.1470,  0.4620,  0.6478,  0.0160, -0.1602, -0.3560,  0.7428,  0.9269],
        [-0.0053,  0.1544,  0.9676,  1.1983,  1.0270,  0.1714,  1.0052, -0.3173]],
       requires_grad=True)
acc  0 0.5775920748710632
acc  1 0.4003385603427887
acc  2 0.5431490540504456
0.5070265630880991 21


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 3])
tensor([6, 7])
tensor([7, 6])
Parameter containing:
tensor([[-0.2259, -0.4688,  2.4828,  0.5970,  0.5540,  0.6229,  0.2625,  0.4464],
        [-0.3682,  0.1115,  1.3217,  1.3373,  0.5808, -0.7204,  1.1699,  0.5471]],
       requires_grad=True)
Parameter containing:
tensor([[-0.4500, -0.4266,  0.8788, -0.1382, -0.9108,  1.8952,  3.2315,  0.4961],
        [-0.2438,  0.5036, -0.5650,  0.7232,  0.9716, -0.9723, -0.7780,  3.2262]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.6807,  0.7321,  0.0165, -0.0353, -0.2346,  0.2284,  0.3958,  2.4338],
        [-0.3259,  0.5997,  1.1494,  0.6263,  2.1071, -0.8567,  2.5030,  0.1880]],
       requires_grad=True)
acc  0 1.689465880393982
acc  1 1.1725825071334839
acc  2 1.2037862539291382
1.355278213818868 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([3, 3])
tensor([0, 2])
tensor([7, 1])
Parameter containing:
tensor([[-0.1354,  0.9683,  0.5197,  1.1754,  0.4007,  1.0345,  0.2122,  0.9483],
        [ 0.1013,  0.1946,  1.3398,  1.4627,  0.9977, -0.0824, -0.1443, -0.0512]],
       requires_grad=True)
Parameter containing:
tensor([[ 3.2190, -0.4891, -0.9719,  0.0386,  0.8860,  2.1288,  0.1855, -0.7313],
        [ 0.7834, -0.9157,  3.2214,  2.0640,  1.4139, -0.7095, -0.6084, -0.1860]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.2600,  0.4993,  0.9361,  0.5437,  0.4537,  1.0338, -0.1221,  1.1973],
        [ 0.4680,  1.1996,  0.7568,  0.9094,  0.3361, -0.5015, -0.4244,  0.5835]],
       requires_grad=True)
acc  0 0.7159323692321777
acc  1 0.6698549389839172
acc  2 0.7013881206512451
0.69572514295578 30
[Parameter containing:
tensor([[-0.1354,  0.9683,  0.5197,  1.1754,  0.4007,  1.0345,  0.2122,  0.9483],
        [ 0.1013,  0.1946,  1.3398,  1.4627,  0.9977, -0.0824, -0.1443, -0.0512]],
       requires_grad=True),

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([3, 0])
tensor([0, 1])
tensor([0, 1])
tensor([2, 0])
Parameter containing:
tensor([[0.8128, 0.6236, 0.5025, 1.2005],
        [0.6584, 0.6436, 0.3434, 0.3962]], requires_grad=True)
Parameter containing:
tensor([[ 2.5238e+00,  3.0704e-01,  2.8243e-02, -2.5699e-01],
        [ 5.2913e-01,  1.7102e+00,  3.7079e-01, -2.4637e-03]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.4974,  1.3738, -0.1952, -0.3226],
        [ 1.2858,  1.9518, -0.2330, -1.1650]], requires_grad=True)
Parameter containing:
tensor([[-1.3023, -0.2574,  1.4654,  1.0273],
        [ 3.7420,  0.1320,  0.6020, -2.8959]], requires_grad=True)
acc  0 0.6373521685600281
acc  1 0.8728909492492676
acc  2 1.2086840867996216
acc  3 0.5230048298835754
0.8104830086231232 21


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([3, 0])
tensor([0, 0])
tensor([2, 3])
tensor([1, 0])
Parameter containing:
tensor([[ 0.5666,  0.2389, -0.4224,  1.5346],
        [ 0.5434,  0.4972, -0.2121,  0.0623]], requires_grad=True)
Parameter containing:
tensor([[ 2.3990, -0.4962,  0.9562, -0.4131],
        [ 1.9123,  0.0641,  0.7703, -0.4508]], requires_grad=True)
Parameter containing:
tensor([[ 0.0626,  0.0573,  2.0056, -0.4260],
        [ 0.2833,  0.3603,  0.6887,  0.7144]], requires_grad=True)
Parameter containing:
tensor([[-0.1801,  1.2743,  0.4962,  0.3553],
        [ 0.9224,  0.4439,  0.3982,  0.2890]], requires_grad=True)
acc  0 0.5570303201675415
acc  1 0.6238662004470825
acc  2 1.8140989542007446
acc  3 0.547915518283844
0.8857277482748032 22


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 1])
tensor([2, 2])
tensor([1, 0])
tensor([0, 0])
Parameter containing:
tensor([[ 0.9497, -0.0098,  0.9140,  0.5585],
        [ 0.3185,  1.1230,  0.0316,  0.3841]], requires_grad=True)
Parameter containing:
tensor([[ 0.6428,  0.5925,  1.5226,  0.8467],
        [ 0.4635, -0.0995,  1.1432,  0.4192]], requires_grad=True)
Parameter containing:
tensor([[0.1621, 1.5060, 0.6107, 0.0442],
        [1.2089, 0.2259, 0.9901, 0.4820]], requires_grad=True)
Parameter containing:
tensor([[ 0.9624,  0.0311, -0.1753,  0.8085],
        [ 1.2386, -0.0593,  0.0645,  0.9221]], requires_grad=True)
acc  0 0.530247688293457
acc  1 0.5298098921775818
acc  2 0.3763706386089325
acc  3 0.3337119519710541
0.44253504276275635 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([3, 2])
tensor([0, 2])
tensor([2, 3])
tensor([1, 1])
Parameter containing:
tensor([[ 0.2298,  0.8622,  0.7275,  0.9110],
        [-0.2102,  0.4514,  1.3632, -0.1557]], requires_grad=True)
Parameter containing:
tensor([[ 1.2678, -0.4004, -0.2053,  0.4127],
        [ 0.5439, -0.0094,  1.4341,  0.0354]], requires_grad=True)
Parameter containing:
tensor([[ 0.4687,  0.1184,  1.7190,  0.6257],
        [-0.0359,  0.4376, -0.4224,  1.7251]], requires_grad=True)
Parameter containing:
tensor([[ 0.0721,  1.8573,  0.3853, -0.1509],
        [ 0.0830,  0.9154,  0.2307,  0.7935]], requires_grad=True)
acc  0 1.237204909324646
acc  1 1.08966863155365
acc  2 1.5804728269577026
acc  3 0.6249144673347473
1.1330652087926865 31


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 1])
tensor([3, 1])
tensor([1, 1])
tensor([2, 1])
Parameter containing:
tensor([[ 1.0084,  0.2382,  0.2409, -0.0893],
        [ 0.2658,  1.3303,  0.0154,  0.5132]], requires_grad=True)
Parameter containing:
tensor([[-0.1718,  1.0132, -0.6727,  1.2885],
        [-1.2081,  4.8444, -0.0814, -0.4286]], requires_grad=True)
Parameter containing:
tensor([[ 0.3793,  1.3227,  0.5241,  0.4659],
        [-1.0147,  4.1419, -0.7035, -0.8434]], requires_grad=True)
Parameter containing:
tensor([[ 0.5983,  0.4621,  0.8194,  0.0945],
        [-0.3208,  4.3571,  0.4489, -0.7374]], requires_grad=True)
acc  0 1.2105367183685303
acc  1 1.3387789726257324
acc  2 1.140375018119812
acc  3 0.8852341175079346
1.1437312066555023 13


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 2])
tensor([0, 2])
tensor([0, 1])
tensor([2, 3])
Parameter containing:
tensor([[ 1.8062, -0.4074,  0.3437, -0.2962],
        [-0.3639,  0.1090,  4.7558, -1.9225]], requires_grad=True)
Parameter containing:
tensor([[ 3.1796, -0.9222, -0.0821, -0.0068],
        [-0.7282,  0.0834,  3.4452, -1.6723]], requires_grad=True)
Parameter containing:
tensor([[ 1.0785, -0.0152,  0.1570,  0.1066],
        [-0.1265,  1.6238, -0.0234,  0.0893]], requires_grad=True)
Parameter containing:
tensor([[ 0.6000,  0.4053,  1.2996, -0.1548],
        [-0.3992, -0.1557,  1.0300,  1.0402]], requires_grad=True)
acc  0 0.8259835839271545
acc  1 0.7970559000968933
acc  2 0.5324422121047974
acc  3 0.9492574334144592
0.7761847823858261 29


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 0])
tensor([1, 1])
tensor([1, 2])
tensor([3, 3])
Parameter containing:
tensor([[-0.3960, -0.2147,  0.9592,  0.7683],
        [ 0.9971,  0.9568,  0.3483,  0.4693]], requires_grad=True)
Parameter containing:
tensor([[ 0.7033,  1.1934,  0.1434, -0.1414],
        [ 0.3775,  0.8428, -0.0079,  0.7298]], requires_grad=True)
Parameter containing:
tensor([[0.7704, 1.3250, 0.1064, 0.0186],
        [0.1166, 0.3843, 1.2704, 0.1310]], requires_grad=True)
Parameter containing:
tensor([[ 0.7639, -0.0491,  0.0730,  1.7783],
        [ 0.7350,  0.8005,  0.0705,  1.0304]], requires_grad=True)
acc  0 0.44712164998054504
acc  1 0.41018688678741455
acc  2 0.8858555555343628
acc  3 1.0295875072479248
0.6931878998875618 39


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([3, 1])
tensor([0, 3])
tensor([2, 1])
tensor([0, 2])
Parameter containing:
tensor([[-0.1623,  0.2463,  0.3547,  1.0594],
        [ 0.2748,  0.7340, -0.1876,  0.5235]], requires_grad=True)
Parameter containing:
tensor([[ 2.9196,  0.0285, -0.5020, -0.4880],
        [ 0.3211,  0.6546,  0.5107,  0.6750]], requires_grad=True)
Parameter containing:
tensor([[-1.7553,  0.0478,  4.1104, -1.0525],
        [-0.5731,  4.0395,  1.2413, -1.5753]], requires_grad=True)
Parameter containing:
tensor([[ 1.0167, -0.1415,  0.3994,  0.4400],
        [-0.2039,  0.1813,  2.1860, -0.1731]], requires_grad=True)
acc  0 0.8438774943351746
acc  1 1.396499752998352
acc  2 1.185654878616333
acc  3 0.507111668586731
0.9832859486341476 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 3])
tensor([2, 0])
tensor([0, 0])
tensor([3, 0])
Parameter containing:
tensor([[ 0.1043, -0.2422,  1.4859, -0.2316],
        [-0.3149,  0.4803,  0.6180,  0.7102]], requires_grad=True)
Parameter containing:
tensor([[ 0.1473,  0.7531,  1.2902, -0.1068],
        [ 1.7926,  0.3219,  0.0930,  0.4174]], requires_grad=True)
Parameter containing:
tensor([[ 1.2167,  0.7300, -0.2710,  0.0966],
        [ 1.1264,  0.9089, -0.3571, -0.1569]], requires_grad=True)
Parameter containing:
tensor([[ 0.3299,  0.3302, -0.4399,  1.1295],
        [ 1.0742,  0.1763,  0.1800,  0.1124]], requires_grad=True)
acc  0 1.6723464727401733
acc  1 0.2246980369091034
acc  2 0.27833667397499084
acc  3 0.22852103412151337
0.6009755544364452 21


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 2])
tensor([3, 3])
tensor([3, 0])
tensor([2, 1])
Parameter containing:
tensor([[ 1.4298,  0.9417,  0.2379, -0.0799],
        [ 0.1725,  0.8270,  1.1969,  0.1275]], requires_grad=True)
Parameter containing:
tensor([[ 0.4086,  0.2490,  0.0335,  0.9442],
        [-0.0759, -0.3667,  0.3961,  1.9401]], requires_grad=True)
Parameter containing:
tensor([[-0.3234, -0.1547,  0.9853,  1.2908],
        [ 1.2557,  0.4320,  0.5739,  0.9203]], requires_grad=True)
Parameter containing:
tensor([[0.4851, 0.4797, 1.1444, 0.2095],
        [0.0522, 0.4634, 0.2240, 0.1171]], requires_grad=True)
acc  0 0.38824355602264404
acc  1 0.3632640540599823
acc  2 0.931204617023468
acc  3 1.020917534828186
0.6759074404835701 39


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 3])
tensor([1, 1])
tensor([1, 0])
tensor([0, 0])
Parameter containing:
tensor([[ 0.5879,  0.8474,  0.7507,  0.4411],
        [ 0.5911, -0.0789,  0.8593,  1.5252]], requires_grad=True)
Parameter containing:
tensor([[0.0178, 0.8660, 0.6789, 0.1404],
        [0.9358, 1.3231, 0.6506, 0.1217]], requires_grad=True)
Parameter containing:
tensor([[-0.0607,  2.8270, -0.4073,  0.0214],
        [ 2.0577,  0.2296,  0.4986,  0.4158]], requires_grad=True)
Parameter containing:
tensor([[ 1.6516,  0.2092,  0.5512,  0.3528],
        [ 1.1550,  0.4602,  0.2789, -0.4895]], requires_grad=True)
acc  0 1.2202602624893188
acc  1 0.8718273043632507
acc  2 0.8179916739463806
acc  3 0.8620111346244812
0.9430225938558578 29


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 0])
tensor([0, 0])
tensor([1, 2])
tensor([2, 2])
Parameter containing:
tensor([[ 0.9481,  0.7596,  0.6089,  0.4926],
        [ 1.5904,  0.3638, -0.4427,  0.0226]], requires_grad=True)
Parameter containing:
tensor([[ 1.0558,  0.5081,  0.1308,  0.7846],
        [ 1.4748, -0.0540,  0.4311,  0.1821]], requires_grad=True)
Parameter containing:
tensor([[ 0.4904,  0.5146,  0.0107,  0.0979],
        [-0.2117,  0.6236,  1.3732,  0.2928]], requires_grad=True)
Parameter containing:
tensor([[ 0.6046,  0.0763,  0.9982,  0.6208],
        [-0.1719,  0.0600,  1.3735,  0.8021]], requires_grad=True)
acc  0 0.46605002880096436
acc  1 0.9572658538818359
acc  2 0.7650967240333557
acc  3 0.7037919759750366
0.7230511456727982 21


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 1])
tensor([1, 2])
tensor([0, 1])
tensor([0, 0])
Parameter containing:
tensor([[0.5204, 0.6961, 0.9292, 0.7016],
        [0.2499, 0.8388, 0.7008, 0.5330]], requires_grad=True)
Parameter containing:
tensor([[ 0.0995,  0.7004, -0.0885,  0.2628],
        [-0.1092,  0.5784,  1.1900,  0.6543]], requires_grad=True)
Parameter containing:
tensor([[ 0.8076,  0.5968,  0.6335, -0.0396],
        [ 0.2654,  1.4212,  0.5388,  0.4592]], requires_grad=True)
Parameter containing:
tensor([[ 0.9234,  0.4168,  0.3134,  0.0170],
        [ 1.1431,  0.2831, -0.1506,  0.9559]], requires_grad=True)
acc  0 0.6339955925941467
acc  1 1.3787602186203003
acc  2 0.6093627214431763
acc  3 0.5938672423362732
0.8039964437484741 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 3])
tensor([0, 1])
tensor([2, 1])
tensor([1, 0])
Parameter containing:
tensor([[ 1.6266,  0.3605,  0.2783,  0.8196],
        [ 0.1538, -0.2069, -0.3160,  2.5736]], requires_grad=True)
Parameter containing:
tensor([[ 0.9628,  0.0399,  0.3334, -0.0207],
        [ 0.2230,  1.0301,  0.7158,  0.9699]], requires_grad=True)
Parameter containing:
tensor([[ 0.2322, -0.1654,  1.7132, -0.0246],
        [-0.0957,  2.9685, -0.5534, -0.1799]], requires_grad=True)
Parameter containing:
tensor([[ 0.4056,  0.4553, -0.1272,  0.3129],
        [ 1.2737,  0.1587, -0.3500,  0.7434]], requires_grad=True)
acc  0 0.7163370251655579
acc  1 0.22035378217697144
acc  2 0.2579082250595093
acc  3 0.29605019092559814
0.3726623058319092 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 0])
tensor([1, 0])
tensor([1, 0])
tensor([3, 1])
Parameter containing:
tensor([[ 1.1784,  0.5723, -0.0254,  1.0875],
        [ 0.6970,  0.5475,  0.0525,  0.6481]], requires_grad=True)
Parameter containing:
tensor([[ 0.0268,  1.3653,  0.7104,  0.1601],
        [ 1.3325, -0.6618, -0.1755,  1.2569]], requires_grad=True)
Parameter containing:
tensor([[ 0.2296,  1.6335, -0.5446,  0.3753],
        [ 1.3598,  0.0577, -0.0459, -0.3074]], requires_grad=True)
Parameter containing:
tensor([[ 0.9908, -0.1365, -0.1012,  1.5350],
        [ 0.4073,  1.3617,  0.8629, -0.3652]], requires_grad=True)
acc  0 1.40559720993042
acc  1 1.4853575229644775
acc  2 1.6096551418304443
acc  3 0.6566019058227539
1.289302945137024 21


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 0])
tensor([2, 0])
tensor([2, 3])
tensor([2, 0])
Parameter containing:
tensor([[ 5.5347, -1.2898, -0.9060, -1.9054],
        [ 2.6043, -1.2177, -1.5755,  2.2295]], requires_grad=True)
Parameter containing:
tensor([[ 1.5692, -2.3116,  5.1775, -1.7142],
        [ 1.3512,  1.2271, -1.9984,  1.2030]], requires_grad=True)
Parameter containing:
tensor([[ 0.3951, -0.0172,  0.9855,  0.5965],
        [ 0.1888,  0.0130, -0.1629,  0.7375]], requires_grad=True)
Parameter containing:
tensor([[ 0.2218, -0.6148,  2.7187, -0.9433],
        [ 4.0883, -0.9649, -0.4018, -0.3062]], requires_grad=True)
acc  0 0.7881141304969788
acc  1 1.5178476572036743
acc  2 0.6131466627120972
acc  3 0.7921972274780273
0.9278264194726944 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 0])
tensor([3, 0])
tensor([3, 0])
tensor([0, 0])
Parameter containing:
tensor([[ 1.4013,  0.2342,  0.5147, -0.6297],
        [ 2.8521, -0.2218, -0.6494, -0.1738]], requires_grad=True)
Parameter containing:
tensor([[-0.2635,  0.5894, -0.1726,  1.8872],
        [ 4.0717, -1.0697,  0.1738, -0.7933]], requires_grad=True)
Parameter containing:
tensor([[ 0.2189,  0.6867, -0.1180,  0.9543],
        [ 1.6932, -0.5061,  1.6066, -0.0302]], requires_grad=True)
Parameter containing:
tensor([[ 4.0585, -1.1694, -0.6368, -0.5470],
        [ 5.1457, -2.5688,  1.5092, -1.8240]], requires_grad=True)
acc  0 0.8345025777816772
acc  1 0.7202758193016052
acc  2 0.8542535305023193
acc  3 0.9594131708145142
0.842111274600029 11


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 3])
tensor([1, 0])
tensor([1, 0])
tensor([0, 3])
Parameter containing:
tensor([[1.4840, 0.1816, 0.1489, 0.3150],
        [0.7043, 0.5015, 0.7012, 1.0838]], requires_grad=True)
Parameter containing:
tensor([[-0.0118,  1.1660,  0.9694,  0.6993],
        [ 1.0809, -0.0185,  0.7668, -0.1210]], requires_grad=True)
Parameter containing:
tensor([[ 0.4012,  1.4002, -0.4199,  0.3079],
        [ 1.2636,  0.1221,  0.0087,  0.5991]], requires_grad=True)
Parameter containing:
tensor([[ 2.0601, -0.3641, -0.8793, -0.2459],
        [-0.1026, -0.7829,  1.3478,  1.8195]], requires_grad=True)
acc  0 1.5996297597885132
acc  1 1.6717302799224854
acc  2 1.1346113681793213
acc  3 1.5909868478775024
1.4992395639419556 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 0])
tensor([2, 2])
tensor([3, 0])
tensor([3, 2])
Parameter containing:
tensor([[-0.4352,  0.6246,  1.9453, -0.4431],
        [ 0.8487,  0.3953,  0.6500,  0.6852]], requires_grad=True)
Parameter containing:
tensor([[ 0.3002,  0.4298,  0.9392,  0.8077],
        [-0.0725, -0.9740,  2.5233,  0.0260]], requires_grad=True)
Parameter containing:
tensor([[ 0.9572, -0.8500, -0.6880,  2.7482],
        [ 1.4509, -0.2192,  0.6677,  0.5094]], requires_grad=True)
Parameter containing:
tensor([[ 0.7484, -0.3497, -1.0870,  1.9454],
        [ 0.3685, -0.5012,  1.1109,  0.6300]], requires_grad=True)
acc  0 0.637046217918396
acc  1 0.47649502754211426
acc  2 0.40489116311073303
acc  3 0.39863935112953186
0.4792679399251938 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 2])
tensor([2, 1])
tensor([1, 3])
tensor([2, 3])
Parameter containing:
tensor([[0.2021, 0.6680, 0.6612, 0.4514],
        [0.6466, 0.5355, 1.1832, 0.7757]], requires_grad=True)
Parameter containing:
tensor([[ 0.5898, -0.1004,  2.2448, -0.1528],
        [-0.9625,  1.8272, -0.2145,  0.5867]], requires_grad=True)
Parameter containing:
tensor([[ 0.8128,  0.9873,  0.1610,  0.9142],
        [-0.4178, -0.1924,  0.7268,  1.2738]], requires_grad=True)
Parameter containing:
tensor([[ 0.6491,  0.4201,  1.1967,  0.4456],
        [ 0.4187, -0.0479,  0.9186,  1.0621]], requires_grad=True)
acc  0 1.2098852396011353
acc  1 0.7150702476501465
acc  2 1.224556803703308
acc  3 1.476275086402893
1.1564468443393707 29
[Parameter containing:
tensor([[0.2021, 0.6680, 0.6612, 0.4514],
        [0.6466, 0.5355, 1.1832, 0.7757]], requires_grad=True), Parameter containing:
tensor([[ 0.5898, -0.1004,  2.2448, -0.1528],
        [-0.9625,  1.8272, -0.2145,  0.5867]], requires_grad=True), Parameter containin

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([3, 5])
tensor([7, 6])
tensor([5, 2])
tensor([5, 1])
Parameter containing:
tensor([[0.5991, 1.0357, 0.5463, 1.1145, 0.1493, 0.4545, 0.1746, 1.1068],
        [0.8387, 0.3112, 0.6484, 0.5190, 0.5829, 0.8727, 0.6715, 0.7653]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.9301,  0.9931,  0.2761,  0.4687,  0.2047,  0.2538, -0.2813,  1.3478],
        [-0.1396, -0.7585,  0.4135,  0.1645,  0.7264,  0.3766,  1.2550,  0.4752]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.8320, -0.0328,  0.4889, -0.0623,  1.0097,  2.0611,  1.0307, -0.3326],
        [ 0.6021,  0.7278,  1.4042, -0.6848,  0.8795,  0.2572,  0.9832,  0.9349]],
       requires_grad=True)
Parameter containing:
tensor([[-0.0099, -0.0773, -0.3477, -0.9481, -0.6236,  3.9522,  1.0813,  0.6119],
        [ 1.0188,  4.2157,  0.4656, -1.4206,  0.1249, -0.5591,  0.0272, -0.0149]],
       requires_grad=True)
acc  0 2.1305956840515137
acc  1 1.0098828077316284
acc  2 1.0629421472549438
acc  3 1.3288681507110

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 2])
tensor([5, 2])
tensor([2, 2])
tensor([3, 7])
Parameter containing:
tensor([[0.3691, 0.4161, 0.7468, 0.1157, 0.0262, 0.3004, 0.5268, 0.3073],
        [0.8713, 0.4306, 1.2609, 0.0849, 0.3647, 0.5084, 0.3594, 0.8617]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.9963,  0.0859,  0.7274,  0.4486, -0.1284,  1.9389, -0.1038, -0.2187],
        [-0.4816,  0.1211,  3.2907,  0.3584,  0.3340,  0.5164,  1.0807, -1.2206]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.5052,  0.5729,  1.5096,  0.2995,  0.6835,  0.5381,  0.4777,  0.0729],
        [ 0.4683,  0.4486,  1.2110,  0.5242,  0.6720,  0.6495,  0.2725, -0.2051]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.0573,  0.6374,  0.7325,  0.9502,  0.1755,  0.8049,  0.6067,  0.3500],
        [ 0.5473,  0.2721,  0.2906,  0.2008,  0.1097,  0.4793, -0.1423,  1.2195]],
       requires_grad=True)
acc  0 1.356783390045166
acc  1 1.3390470743179321
acc  2 1.5578585863113403
acc  3 0.96078938245773

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 4])
tensor([4, 7])
tensor([5, 1])
tensor([7, 7])
Parameter containing:
tensor([[ 0.8028,  0.7643,  0.9926,  0.3312,  0.3877,  0.3841,  0.4830,  0.1237],
        [ 0.9163,  1.0545,  0.8834,  0.8407,  1.1783,  0.2327,  0.4580, -0.0329]],
       requires_grad=True)
Parameter containing:
tensor([[0.7039, 0.8875, 0.9154, 0.1270, 1.0760, 0.3858, 0.8867, 0.2478],
        [0.3646, 0.4038, 0.1532, 0.7768, 0.5678, 0.3414, 0.2199, 0.9652]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.5031,  0.4215,  0.5332, -0.0530,  0.3310,  1.0320,  0.4538,  0.5765],
        [-0.4938,  1.1772,  0.4743,  0.0188,  0.1523,  0.2155,  0.7757,  0.7535]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.0569,  0.5234,  0.0795,  0.9500,  0.7211,  0.2614,  0.0418,  1.0021],
        [-0.0213,  0.5052,  0.5422,  0.7493,  0.1866,  0.5232,  0.3353,  1.4693]],
       requires_grad=True)
acc  0 1.3523359298706055
acc  1 0.7278510928153992
acc  2 0.6932588815689087
acc  3 0.6029517650604

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([6, 0])
tensor([7, 0])
tensor([0, 2])
tensor([4, 4])
Parameter containing:
tensor([[ 0.0839,  0.9409,  0.3862,  0.8542,  0.2218,  0.4845,  1.2888, -0.0811],
        [ 1.0676,  0.1437, -0.0818,  0.2228,  0.5710,  0.1882,  0.8840,  0.0833]],
       requires_grad=True)
Parameter containing:
tensor([[0.6814, 0.5035, 0.7109, 0.8439, 0.2495, 0.1910, 0.5113, 0.9449],
        [0.8615, 0.7964, 0.6106, 0.5197, 0.3330, 0.6402, 0.3512, 0.0740]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.2523,  0.6453,  0.4735, -0.0449,  0.4038,  0.4512,  0.0550,  0.5903],
        [ 0.4606,  0.4344,  1.2531, -0.5047,  0.0057,  0.0665,  0.4557,  0.7498]],
       requires_grad=True)
Parameter containing:
tensor([[0.0403, 0.5736, 0.0904, 0.2081, 1.2243, 0.6709, 0.4321, 0.4966],
        [0.2709, 0.3367, 0.5067, 0.7772, 1.0399, 0.8056, 0.3805, 0.2701]],
       requires_grad=True)
acc  0 1.3395946025848389
acc  1 1.2865930795669556
acc  2 0.9288084506988525
acc  3 0.9628718495368958
1.1294669955

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([5, 4])
tensor([3, 7])
tensor([6, 4])
tensor([6, 4])
Parameter containing:
tensor([[0.5273, 0.0496, 0.6412, 0.0691, 0.7181, 0.7474, 0.1605, 0.6095],
        [0.0349, 0.3646, 0.6426, 0.1860, 1.3511, 0.6323, 0.9035, 0.4685]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.2656,  1.1403,  1.3351,  2.1027, -0.7693, -0.6131, -0.3546,  1.1657],
        [ 1.8028, -0.0248,  1.1457, -1.7185, -0.7244,  0.6076, -0.7094,  5.3430]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.0522,  0.3566, -0.1583,  1.6161,  0.6432, -0.5573,  1.7650, -0.0428],
        [-0.2600, -0.8599,  1.2499, -1.1422,  3.2511,  1.2016, -0.5289,  0.2208]],
       requires_grad=True)
Parameter containing:
tensor([[-0.0709, -0.6883,  0.6241,  0.4942,  0.4891,  0.4968,  1.4079,  0.8801],
        [ 1.0961, -0.5511,  0.3785, -1.0738,  4.4493,  0.3683, -0.7191,  0.7056]],
       requires_grad=True)
acc  0 0.7098265290260315
acc  1 1.1579149961471558
acc  2 0.7744148373603821
acc  3 1.0208065509796

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 3])
tensor([3, 6])
tensor([2, 7])
tensor([5, 6])
Parameter containing:
tensor([[ 1.2887,  0.5457,  2.3079, -0.6587, -0.2983,  1.7714, -0.4175, -0.5145],
        [-0.8010, -1.0887, -1.4525,  5.1715, -0.9998,  1.2625,  0.6719,  0.5328]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.3855,  0.1920, -0.6632,  2.7201,  0.7436, -0.4463,  0.5728, -0.6145],
        [-0.6238, -1.4635,  1.5128,  1.7038, -0.7043, -0.7161,  3.8568,  0.0998]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.7712,  0.1215,  1.0201, -0.0196,  0.4362,  0.5683, -0.0927,  0.5112],
        [ 0.5630,  0.1085,  0.4098, -0.0767,  0.4016,  0.2871,  0.4426,  0.8962]],
       requires_grad=True)
Parameter containing:
tensor([[-0.1240,  0.1759,  0.8728,  0.2281,  0.9334,  0.9440, -0.0181, -0.0958],
        [ 0.0175, -0.6260,  0.3091,  1.1292,  1.1175, -0.6326,  1.9121,  0.2949]],
       requires_grad=True)
acc  0 0.34122899174690247
acc  1 1.327599287033081
acc  2 0.6929108500480652
acc  3

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([5, 5])
tensor([4, 0])
tensor([4, 0])
tensor([1, 6])
Parameter containing:
tensor([[ 0.6861,  0.2639,  0.4877,  0.1539,  0.4397,  0.8764,  0.5259,  0.4547],
        [ 1.1758,  0.6215, -0.3915,  0.9179,  0.4241,  1.1883, -0.1589,  0.0638]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.3238,  0.4231,  0.4441,  0.8270,  0.9304, -0.1311,  0.4920,  0.8133],
        [ 1.0998,  0.2050,  0.2043,  0.9322,  0.8455,  0.9737,  0.6538,  0.2881]],
       requires_grad=True)
Parameter containing:
tensor([[0.0219, 0.0558, 0.0917, 0.5781, 1.1609, 0.7663, 0.9084, 1.1160],
        [1.2127, 0.0035, 0.6013, 1.1158, 0.8541, 0.7176, 0.5492, 0.3148]],
       requires_grad=True)
Parameter containing:
tensor([[0.6129, 1.0602, 0.6959, 0.5747, 0.4801, 0.8037, 0.0642, 0.4667],
        [0.8931, 0.4096, 0.2118, 0.4067, 0.7970, 0.6124, 1.1864, 0.0463]],
       requires_grad=True)
acc  0 0.6732083559036255
acc  1 0.5348407030105591
acc  2 0.9657360315322876
acc  3 0.7984089255332947
0.7430485039

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([3, 0])
tensor([2, 6])
tensor([6, 5])
tensor([1, 5])
Parameter containing:
tensor([[ 0.0810,  0.2869,  0.1941,  0.9293,  0.1899,  0.6038,  0.2798,  0.2782],
        [ 1.1873,  0.5280,  0.0410,  0.3656, -0.0047,  0.2326,  0.6114,  1.1582]],
       requires_grad=True)
Parameter containing:
tensor([[-0.0511,  0.3165,  0.9417,  0.5653,  0.6351,  0.6409,  0.6816,  0.7527],
        [ 0.8735,  0.5505,  0.1734, -0.0352, -0.8639,  0.5877,  1.7650,  0.6538]],
       requires_grad=True)
Parameter containing:
tensor([[-1.6223,  0.9778,  0.1719, -0.2158,  0.4153, -0.0061,  4.1966, -0.0424],
        [ 1.7374, -0.5183, -0.2335, -1.5862, -0.3850,  5.5138,  0.2704, -1.3961]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.6323,  0.9245,  0.3157,  0.8943, -0.2186,  0.2839,  0.8290, -0.1713],
        [ 1.0127, -0.2411,  0.8247, -0.0749,  0.3544,  1.1609,  0.2465, -0.0639]],
       requires_grad=True)
acc  0 0.39146196842193604
acc  1 1.0406301021575928
acc  2 0.831200897693634
acc  3

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 1])
tensor([0, 3])
tensor([3, 7])
tensor([5, 7])
Parameter containing:
tensor([[ 1.2627,  0.0245,  0.4373, -0.1483,  0.1679, -0.0251,  0.2601,  0.6308],
        [ 0.8063,  1.0915,  0.0631,  0.4726,  0.5180,  0.9623,  0.4325,  0.3626]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.8734, -0.8455, -0.1554,  0.5251,  0.8606,  0.5399, -0.6509,  1.1465],
        [-0.0115, -1.3008, -0.7578,  2.2250,  1.0174,  0.9593,  0.7285,  0.0326]],
       requires_grad=True)
Parameter containing:
tensor([[-1.8014e-01,  1.1205e-01,  8.0747e-02,  1.2151e+00,  9.3750e-01,
          1.2002e+00,  1.5628e-01,  5.8948e-01],
        [ 9.6928e-01, -3.2941e-01,  5.3133e-02,  4.2771e-01,  4.2560e-01,
          4.6407e-04,  3.3750e-01,  1.0463e+00]], requires_grad=True)
Parameter containing:
tensor([[ 0.1069,  0.8209,  0.5205,  0.6430,  0.7939,  0.9642,  0.5358,  0.2061],
        [-0.0190,  0.6353,  0.0874,  0.0150,  0.2060,  0.4796,  0.6828,  1.1019]],
       requires_grad=True)
acc  0 1.

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 7])
tensor([5, 2])
tensor([0, 3])
tensor([7, 7])
Parameter containing:
tensor([[ 0.2100,  0.1836,  2.4215,  0.0933,  0.4535,  0.0198,  1.3696,  0.1021],
        [-0.4381,  0.3132,  0.4946, -0.0894,  0.8845,  1.0346, -0.3348,  1.6642]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.7035,  0.2833,  0.4733,  1.0320,  0.8379,  1.2230, -0.2883,  0.7153],
        [-0.6201, -0.0549,  3.5308,  0.2791,  0.2415,  0.1453, -0.2805, -0.0659]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.5346, -0.0355,  0.2916,  1.3915,  0.0954,  0.2126, -0.2197,  0.7068],
        [ 0.0309,  0.2114,  0.8225,  1.3598,  0.4865, -0.2999,  0.7572,  0.1287]],
       requires_grad=True)
Parameter containing:
tensor([[0.0661, 0.7491, 0.5592, 0.0015, 0.6021, 0.7511, 0.5659, 0.7770],
        [0.7272, 0.6324, 0.7417, 0.8275, 0.5162, 0.0871, 0.2628, 1.0553]],
       requires_grad=True)
acc  0 1.3361589908599854
acc  1 0.4064284861087799
acc  2 0.9734498262405396
acc  3 1.4142416715621

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 4])
tensor([6, 2])
tensor([6, 1])
tensor([1, 4])
Parameter containing:
tensor([[0.8580, 0.9206, 0.4897, 0.4962, 0.9017, 0.6963, 0.3344, 0.8269],
        [0.0553, 0.6003, 0.7448, 0.1871, 1.0604, 0.9077, 0.9097, 0.2689]],
       requires_grad=True)
Parameter containing:
tensor([[0.8391, 0.3177, 0.5758, 0.5446, 0.9349, 0.6177, 0.9828, 0.7695],
        [0.5814, 0.3863, 1.0438, 0.8477, 0.8275, 0.0764, 0.3229, 0.0836]],
       requires_grad=True)
Parameter containing:
tensor([[-0.2327, -0.4229,  0.8007, -0.4819,  0.8083,  0.3346,  1.6770,  0.8986],
        [-0.5453,  1.2548,  0.2279,  0.8667, -0.0719,  0.7542,  0.6661,  0.3324]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.3993,  1.7997, -0.2388,  0.0928,  1.2875,  0.3739,  0.1521, -0.0199],
        [-1.3384,  0.9728,  0.4933,  1.0636,  1.1402,  0.6117,  0.5337,  0.6096]],
       requires_grad=True)
acc  0 2.2466623783111572
acc  1 1.77135169506073
acc  2 1.1275832653045654
acc  3 2.067800283432007
1.8033494055271

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 0])
tensor([5, 7])
tensor([1, 7])
tensor([1, 5])
Parameter containing:
tensor([[ 0.8047,  1.0385,  0.6500,  0.4039,  0.0921,  0.6533,  0.1443,  0.5564],
        [ 1.0633,  0.9068, -0.1667,  0.4499,  0.5962,  0.3654,  0.7051,  0.5932]],
       requires_grad=True)
Parameter containing:
tensor([[0.5478, 0.2578, 0.0655, 0.2404, 0.3230, 0.7195, 0.6436, 0.3939],
        [0.0977, 0.4684, 0.9973, 0.7939, 0.0539, 0.3606, 0.5751, 1.0167]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.3762,  1.2155,  0.9190,  0.8421,  0.5532,  0.1389, -0.1492,  0.2805],
        [ 0.1811,  0.5262, -0.4002, -0.7612, -0.2783,  0.2537,  1.6193,  2.3942]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.2676,  1.4316,  0.9252,  0.5690,  1.0165, -0.2175,  0.2965,  0.7243],
        [ 0.6273,  0.1068,  0.0224, -0.2073, -0.1216,  1.2029,  0.1660,  1.0592]],
       requires_grad=True)
acc  0 0.7885494232177734
acc  1 1.0695421695709229
acc  2 0.9305670857429504
acc  3 0.2397086471319

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 6])
tensor([5, 7])
tensor([1, 6])
tensor([4, 2])
Parameter containing:
tensor([[0.4563, 0.5923, 1.3837, 0.5028, 0.2063, 0.8274, 0.6467, 0.5544],
        [0.2023, 0.4727, 0.1195, 0.3257, 0.2501, 0.5462, 0.7141, 0.6572]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.9414,  0.4138,  0.6195, -0.0282,  0.1474,  1.0250,  0.6468,  0.9172],
        [ 0.6382,  0.5315,  0.1305,  0.1893,  0.8979,  0.1849,  0.0097,  1.3200]],
       requires_grad=True)
Parameter containing:
tensor([[0.2960, 0.8623, 0.6365, 0.5273, 0.4076, 0.1048, 0.2310, 0.2833],
        [0.7641, 0.8134, 0.0289, 0.5114, 0.8094, 0.0055, 1.0998, 1.0192]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.4269,  0.5669,  0.2795,  1.0108,  1.2103,  0.3449,  0.3913,  0.1176],
        [-0.2433,  0.9227,  1.0390,  0.7462,  0.9842,  0.3257,  0.1570, -0.1377]],
       requires_grad=True)
acc  0 0.3562493920326233
acc  1 0.9273929595947266
acc  2 0.4374839663505554
acc  3 0.9296382665634155
0.6626911461

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([4, 0])
tensor([5, 7])
tensor([7, 3])
tensor([1, 1])
Parameter containing:
tensor([[ 1.3556,  0.2114,  1.5177,  1.1082,  1.5559, -0.1840, -0.5336,  0.2582],
        [ 1.4241,  0.2421, -0.2168, -0.7572,  1.3166,  0.4726,  0.6721,  1.1009]],
       requires_grad=True)
Parameter containing:
tensor([[0.6027, 0.2450, 0.2504, 0.2127, 0.0436, 1.3184, 0.5701, 0.6521],
        [0.1868, 0.2772, 0.2266, 0.2370, 0.4608, 0.5876, 0.1561, 0.7405]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.2321, -0.4328,  0.0103,  0.9342,  0.6629,  1.0450,  0.1768,  1.6037],
        [ 0.5551, -0.4034, -0.1532,  2.9380,  0.7805, -0.4935, -0.0629,  0.4820]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.8273,  0.9721,  0.9495,  0.6411,  0.5523, -0.0315,  0.1231,  0.3316],
        [ 0.6004,  1.0886,  0.4876,  0.0696,  0.2957,  0.1446,  0.3549,  1.0629]],
       requires_grad=True)
acc  0 0.8942652344703674
acc  1 1.337422490119934
acc  2 0.7198253870010376
acc  3 0.58403670787811

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 6])
tensor([0, 2])
tensor([2, 5])
tensor([4, 6])
Parameter containing:
tensor([[ 2.2019,  0.1120, -0.0951,  1.0955, -0.0063,  0.3513,  0.3373,  0.7613],
        [ 0.8981, -0.1264,  0.3366,  0.7984,  0.3376,  0.2634,  1.1949,  0.3123]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.0191,  0.2301,  0.0973,  0.7324,  0.3044,  0.0319, -0.2573,  0.6002],
        [ 0.7933,  0.3564,  1.3646,  0.2083,  0.6300,  0.3217,  1.2450, -0.3646]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.6245,  0.0763,  1.1500,  0.5462,  1.0161,  0.2151,  0.1567,  0.7731],
        [ 0.0888,  0.2718,  0.6740,  0.8580,  0.2830,  0.9388,  0.1378, -0.0647]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.4373,  1.1346,  0.8422, -1.1053,  2.8092,  0.8592,  0.2348, -0.4731],
        [-0.1103,  0.4317,  1.5163, -0.0090, -0.2126, -0.0096,  3.3646, -0.3509]],
       requires_grad=True)
acc  0 0.9529158473014832
acc  1 0.9145887494087219
acc  2 1.255550503730774
acc  3 

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 0])
tensor([2, 0])
tensor([2, 3])
tensor([6, 7])
Parameter containing:
tensor([[-1.0295,  2.7196,  0.6525,  0.5899, -0.2552,  0.8208, -0.6152,  0.5913],
        [ 6.5489, -1.6252, -0.0675, -0.2862, -0.3331, -0.1297, -1.4357,  1.8322]],
       requires_grad=True)
Parameter containing:
tensor([[-0.6323, -0.4557,  5.2662, -0.3947, -1.4202,  0.0536,  0.0923,  0.2271],
        [ 6.7822, -1.5628, -0.7380, -0.4081,  0.0580, -0.2927, -1.1746,  1.1337]],
       requires_grad=True)
Parameter containing:
tensor([[0.6302, 0.6251, 0.7992, 0.5442, 0.4649, 0.1639, 0.1433, 0.2716],
        [0.5804, 0.4097, 0.4175, 1.2014, 0.0288, 0.5231, 1.0943, 0.2684]],
       requires_grad=True)
Parameter containing:
tensor([[ 2.0071,  0.3782,  0.6437,  0.2089,  1.0907, -0.7500,  2.3633, -0.6821],
        [ 1.4746, -0.6506,  0.3131,  0.1674, -0.3885,  0.0666,  0.0105,  4.4008]],
       requires_grad=True)
acc  0 0.7552705407142639
acc  1 0.4498365819454193
acc  2 0.45829370617866516
acc  3 0.892503619194

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([4, 6])
tensor([3, 5])
tensor([3, 5])
tensor([4, 5])
Parameter containing:
tensor([[ 0.2343,  0.7919,  0.2796,  0.7131,  0.9496,  0.5044, -0.0714, -0.0739],
        [ 1.2050, -0.2827,  0.2221,  0.2491,  0.2352,  0.6685,  1.7977,  0.3280]],
       requires_grad=True)
Parameter containing:
tensor([[ 8.1178e-01, -2.4045e-03,  3.2757e-02,  3.0972e+00,  3.7754e-01,
         -2.9604e-02, -1.4513e-01,  3.6326e-01],
        [ 9.6665e-01, -7.6786e-01, -3.2100e-01, -9.1920e-01,  4.8779e-01,
          3.8817e+00,  2.7660e-01,  3.6253e-01]], requires_grad=True)
Parameter containing:
tensor([[ 0.0950,  0.4959,  0.6872,  1.6575,  0.6587,  0.1526,  0.0915,  0.1936],
        [ 0.1351,  0.7208, -0.0376,  0.1506, -0.1468,  1.0341,  0.5214,  0.2039]],
       requires_grad=True)
Parameter containing:
tensor([[-0.0408,  1.7506, -0.1001, -1.8291,  3.4121, -0.6837,  1.6126,  0.1120],
        [ 0.2178,  0.0777,  1.1277, -0.9984, -1.1056,  6.0505, -1.0531, -0.6698]],
       requires_grad=True)
acc  0 0.

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([4, 3])
tensor([7, 7])
tensor([3, 6])
tensor([3, 1])
Parameter containing:
tensor([[0.4240, 0.8136, 0.2750, 0.1600, 1.2331, 0.7763, 0.5934, 0.8450],
        [0.8461, 0.9785, 0.1377, 1.1523, 0.4747, 0.1008, 0.1886, 0.6523]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.6962,  0.7074, -0.0389,  0.3060,  0.4063,  0.3491,  0.0019,  1.2549],
        [ 0.1424, -0.5705,  0.2477, -0.0716,  0.1825,  1.0102,  0.8230,  1.0891]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.1720,  0.4988,  0.1600,  0.8206,  0.7217,  0.3942,  0.3659,  0.2931],
        [ 0.2831,  0.3543,  0.8266,  0.6400,  0.8702, -0.7421,  1.4889,  0.4802]],
       requires_grad=True)
Parameter containing:
tensor([[-0.2116,  0.1916, -0.1123,  1.4819,  1.4389, -0.1534,  1.4348, -0.3661],
        [ 1.3549,  1.6405, -0.6020, -0.2147, -0.6134,  0.6579, -0.1150,  0.6301]],
       requires_grad=True)
acc  0 1.2807435989379883
acc  1 1.034312129020691
acc  2 0.6385258436203003
acc  3 0.69498252868652

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([4, 3])
tensor([3, 0])
tensor([7, 2])
tensor([2, 5])
Parameter containing:
tensor([[ 0.1067,  0.2885,  0.7378,  0.2928,  1.0190,  0.5048,  0.6174,  0.7038],
        [ 0.2220,  0.3623,  0.9824,  1.1735, -0.3524,  0.2470,  0.7927,  0.5522]],
       requires_grad=True)
Parameter containing:
tensor([[0.8528, 0.0893, 0.1744, 1.0110, 0.2851, 0.6588, 0.8447, 0.6600],
        [0.7796, 0.3680, 0.1076, 0.0693, 0.4302, 0.2653, 0.2448, 0.6005]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.0316,  0.6974,  0.2515,  0.2455, -0.8037,  1.6938, -0.3531,  2.4543],
        [ 0.0691, -0.1780,  1.8285,  0.9487,  0.3434,  0.7392,  0.8259,  1.4141]],
       requires_grad=True)
Parameter containing:
tensor([[-0.2518,  0.1528,  2.4803, -0.2280, -0.1724,  1.3135,  0.1731,  0.2050],
        [-0.3929, -1.2180,  1.1780,  0.5724,  1.1811,  1.5087,  0.5006, -0.4990]],
       requires_grad=True)
acc  0 1.793028712272644
acc  1 0.5049993991851807
acc  2 1.1479054689407349
acc  3 0.46377471089363

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([6, 2])
tensor([1, 7])
tensor([2, 5])
tensor([7, 6])
Parameter containing:
tensor([[0.1139, 0.8137, 0.4118, 0.6686, 0.5169, 0.8629, 0.9500, 0.7859],
        [0.0606, 0.5180, 1.3921, 0.7091, 0.3683, 0.3627, 0.1658, 0.2416]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.5091,  2.0115,  0.0315,  0.7188, -0.0819,  0.3521,  0.6578,  0.0666],
        [ 0.3096,  0.7953,  0.7739,  0.4762,  0.9086,  0.0178,  0.2515,  1.5301]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.9125,  0.6101,  1.1920,  0.0837,  0.0559,  0.7823,  0.1593,  1.0060],
        [ 0.4031,  0.3755,  0.1208,  0.0274,  0.5836,  1.1496, -0.1402,  0.8077]],
       requires_grad=True)
Parameter containing:
tensor([[0.4559, 0.4497, 0.4417, 0.2019, 0.2386, 0.1871, 0.1759, 0.7937],
        [0.8111, 0.7355, 0.5442, 0.1494, 0.6445, 0.2975, 0.9739, 0.6189]],
       requires_grad=True)
acc  0 1.9382646083831787
acc  1 0.7755471467971802
acc  2 0.43928107619285583
acc  3 0.900898277759552
1.0134977772

In [13]:
for D in [2,3,4]:
    for HEAD_NUM in [D, 8]:
        dartslike('js', js_coef=100.0)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  tq = tqdm.tqdm_notebook(range(EPOCH_NUM )) # taking half of epoch num for NAS


  0%|          | 0/1000 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  tq = tqdm.tqdm_notebook(range(EPOCH_NUM))


  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 0])
tensor([0, 0])
Parameter containing:
tensor([[1.3774, 0.4199],
        [0.6922, 0.6499]], requires_grad=True)
Parameter containing:
tensor([[0.9782, 0.0131],
        [0.5450, 0.5052]], requires_grad=True)
acc  0 0.807583212852478
acc  1 1.200205683708191
1.0038944482803345 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 1])
tensor([0, 1])
Parameter containing:
tensor([[0.4215, 0.2290],
        [0.4389, 0.8283]], requires_grad=True)
Parameter containing:
tensor([[0.2019, 0.0320],
        [0.1326, 0.5243]], requires_grad=True)
acc  0 0.7397341132164001
acc  1 1.358829140663147
1.0492816269397736 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 1])
tensor([0, 1])
Parameter containing:
tensor([[ 0.8407,  0.6096],
        [-0.3321,  1.2942]], requires_grad=True)
Parameter containing:
tensor([[ 0.8063,  0.5514],
        [-0.5673,  1.0667]], requires_grad=True)
acc  0 1.5781514644622803
acc  1 1.1657382249832153
1.3719448447227478 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 0])
tensor([1, 0])
Parameter containing:
tensor([[0.4327, 0.7155],
        [1.4897, 0.0925]], requires_grad=True)
Parameter containing:
tensor([[ 0.1674,  0.4385],
        [ 1.1267, -0.2839]], requires_grad=True)
acc  0 1.475835919380188
acc  1 1.748286485671997
1.6120612025260925 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 0])
tensor([0, 0])
Parameter containing:
tensor([[ 1.7824, -1.0613],
        [ 0.9108, -0.2338]], requires_grad=True)
Parameter containing:
tensor([[ 2.2343, -0.6402],
        [ 0.8422, -0.3116]], requires_grad=True)
acc  0 1.7659430503845215
acc  1 1.5661429166793823
1.666042983531952 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 0])
tensor([1, 0])
Parameter containing:
tensor([[-1.4396,  1.5426],
        [ 4.0504, -2.7070]], requires_grad=True)
Parameter containing:
tensor([[-0.8266,  2.1811],
        [ 3.4725, -2.2485]], requires_grad=True)
acc  0 1.0063658952713013
acc  1 1.3777636289596558
1.1920647621154785 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 0])
tensor([1, 0])
Parameter containing:
tensor([[ 0.1224,  0.5349],
        [ 1.0581, -0.5987]], requires_grad=True)
Parameter containing:
tensor([[ 0.5599,  0.9714],
        [ 1.4533, -0.2131]], requires_grad=True)
acc  0 1.122082233428955
acc  1 1.0767631530761719
1.0994226932525635 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 1])
tensor([1, 1])
Parameter containing:
tensor([[-2.1652,  2.4888],
        [-1.9415,  3.1160]], requires_grad=True)
Parameter containing:
tensor([[-1.9198,  2.7808],
        [-2.3225,  2.8061]], requires_grad=True)
acc  0 0.9870089888572693
acc  1 1.110817313194275
1.048913151025772 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 0])
tensor([1, 0])
Parameter containing:
tensor([[ 0.0509,  0.6358],
        [ 0.6614, -0.2317]], requires_grad=True)
Parameter containing:
tensor([[-0.0218,  0.5673],
        [ 0.9191,  0.0290]], requires_grad=True)
acc  0 1.2983996868133545
acc  1 0.8057369589805603
1.0520683228969574 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 0])
tensor([1, 0])
Parameter containing:
tensor([[-0.3551,  2.0827],
        [ 1.6476, -0.8457]], requires_grad=True)
Parameter containing:
tensor([[-0.6098,  1.8326],
        [ 1.7968, -0.6956]], requires_grad=True)
acc  0 1.2171478271484375
acc  1 0.9829883575439453
1.1000680923461914 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 0])
tensor([0, 0])
Parameter containing:
tensor([[ 0.9583,  0.5417],
        [ 1.3484, -0.2213]], requires_grad=True)
Parameter containing:
tensor([[ 1.0351,  0.6174],
        [ 1.4211, -0.1769]], requires_grad=True)
acc  0 1.0529048442840576
acc  1 0.9384473562240601
0.9956761002540588 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 1])
tensor([0, 1])
Parameter containing:
tensor([[ 0.8907,  0.6161],
        [-0.4333,  1.7356]], requires_grad=True)
Parameter containing:
tensor([[ 0.7235,  0.4250],
        [-0.8893,  1.2748]], requires_grad=True)
acc  0 0.7813819646835327
acc  1 1.0783748626708984
0.9298784136772156 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 1])
tensor([1, 1])
Parameter containing:
tensor([[0.4619, 0.7518],
        [0.8121, 0.8215]], requires_grad=True)
Parameter containing:
tensor([[0.4322, 0.7221],
        [0.5815, 0.5867]], requires_grad=True)
acc  0 1.8813661336898804
acc  1 1.593772053718567
1.7375690937042236 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 1])
tensor([0, 1])
Parameter containing:
tensor([[ 1.1183,  0.6552],
        [-0.7059,  2.0172]], requires_grad=True)
Parameter containing:
tensor([[ 0.9723,  0.3723],
        [-0.9342,  1.7941]], requires_grad=True)
acc  0 2.058253526687622
acc  1 1.672442078590393
1.8653478026390076 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 1])
tensor([1, 1])
Parameter containing:
tensor([[0.7150, 0.7988],
        [0.4911, 0.8079]], requires_grad=True)
Parameter containing:
tensor([[0.5544, 0.6413],
        [0.2170, 0.5324]], requires_grad=True)
acc  0 1.381619930267334
acc  1 2.25333571434021
1.817477822303772 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 0])
tensor([1, 0])
Parameter containing:
tensor([[-2.0615,  3.0800],
        [ 3.0005, -2.5855]], requires_grad=True)
Parameter containing:
tensor([[-2.0804,  2.7351],
        [ 3.4481, -2.0621]], requires_grad=True)
acc  0 0.966356635093689
acc  1 1.0062401294708252
0.9862983822822571 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 1])
tensor([0, 1])
Parameter containing:
tensor([[ 0.6169,  0.2060],
        [-0.2461,  0.9438]], requires_grad=True)
Parameter containing:
tensor([[ 0.9092,  0.4824],
        [-0.3836,  0.7992]], requires_grad=True)
acc  0 1.0487393140792847
acc  1 0.9780340790748596
1.0133866965770721 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 1])
tensor([1, 1])
Parameter containing:
tensor([[-0.1141,  1.6490],
        [-0.0267,  0.6213]], requires_grad=True)
Parameter containing:
tensor([[-0.0884,  1.6841],
        [ 0.3773,  1.0179]], requires_grad=True)
acc  0 1.6995909214019775
acc  1 1.7360061407089233
1.7177985310554504 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 1])
tensor([1, 1])
Parameter containing:
tensor([[-0.9938,  1.3281],
        [-2.1053,  3.4625]], requires_grad=True)
Parameter containing:
tensor([[-0.4435,  1.8761],
        [-2.2959,  3.4425]], requires_grad=True)
acc  0 1.3988178968429565
acc  1 1.7674188613891602
1.5831183791160583 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 1])
tensor([1, 1])
Parameter containing:
tensor([[0.1211, 0.7255],
        [0.3943, 0.7418]], requires_grad=True)
Parameter containing:
tensor([[0.4137, 1.0193],
        [0.6800, 1.0281]], requires_grad=True)
acc  0 1.4929018020629883
acc  1 0.8642711639404297
1.178586483001709 10
[Parameter containing:
tensor([[0.1211, 0.7255],
        [0.3943, 0.7418]], requires_grad=True), Parameter containing:
tensor([[0.4137, 1.0193],
        [0.6800, 1.0281]], requires_grad=True)]
Parameter containing:
tensor([34.1526], requires_grad=True)
Parameter containing:
tensor([60.4791], requires_grad=True)


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([3, 0])
tensor([3, 0])
Parameter containing:
tensor([[ 0.9248,  0.5697,  0.5280,  1.1534,  0.2614,  0.5806,  0.6059,  0.5572],
        [ 1.8106,  0.9458, -0.0264,  1.0699,  1.5313,  0.3057, -0.0808, -0.3463]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.7995,  0.4462,  0.4053,  1.0338,  0.1292,  0.4568,  0.4953,  0.4270],
        [ 1.5175,  0.6142, -0.3777,  0.7451,  1.2139, -0.0570, -0.4054, -0.7374]],
       requires_grad=True)
acc  0 1.4078452587127686
acc  1 1.146497368812561
1.2771713137626648 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 4])
tensor([2, 4])
Parameter containing:
tensor([[ 0.2566,  0.3154,  0.9644,  0.2434, -0.0894,  0.2076,  0.3696,  0.5409],
        [ 0.5540,  0.8813,  0.8511,  0.2783,  0.8820,  0.5438,  0.6475,  0.1038]],
       requires_grad=True)
Parameter containing:
tensor([[0.3680, 0.4305, 1.0919, 0.3398, 0.0218, 0.3426, 0.4909, 0.6606],
        [0.4698, 0.7797, 0.7726, 0.1767, 0.7814, 0.4524, 0.5254, 0.0413]],
       requires_grad=True)
acc  0 0.9254566431045532
acc  1 1.4205979108810425
1.1730272769927979 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([4, 2])
tensor([4, 2])
Parameter containing:
tensor([[ 0.4290, -0.0817,  1.0708,  0.0759,  1.3562,  0.6699,  0.4490,  0.3004],
        [ 0.5688,  1.0452,  1.2762,  1.0228,  1.0930,  0.3861, -0.0642,  0.2030]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.5460,  0.0332,  1.1878,  0.1846,  1.4577,  0.7880,  0.6158,  0.4168],
        [ 0.3381,  0.8292,  0.9544,  0.7780,  0.8682,  0.1653, -0.2566,  0.1161]],
       requires_grad=True)
acc  0 1.4694401025772095
acc  1 1.3532847166061401
1.4113624095916748 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 7])
tensor([1, 7])
Parameter containing:
tensor([[ 0.4561,  1.1417,  0.4568,  0.8746,  0.1765,  0.4735,  0.3291,  0.2709],
        [ 0.3412,  0.6496, -0.0607,  0.5559,  0.4461, -0.0730,  0.0170,  1.2028]],
       requires_grad=True)
Parameter containing:
tensor([[0.5091, 1.2002, 0.5122, 0.9217, 0.2630, 0.5307, 0.3856, 0.3138],
        [0.4840, 0.7949, 0.0717, 0.6981, 0.5933, 0.0384, 0.1598, 1.3462]],
       requires_grad=True)
acc  0 1.3871103525161743
acc  1 0.9920561909675598
1.189583271741867 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 6])
tensor([1, 6])
Parameter containing:
tensor([[ 1.1666,  1.3912, -0.0313, -0.4284, -0.0796,  0.6847,  0.3371,  0.4823],
        [ 0.8852,  0.5709, -0.3417,  0.4563, -0.2668, -0.0968,  4.0555, -0.6790]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.2477,  1.4681,  0.0249, -0.2992,  0.0233,  0.7893,  0.4406,  0.5775],
        [ 1.1188,  0.7640, -0.2118,  0.6357, -0.0919, -0.0130,  4.2368, -0.7165]],
       requires_grad=True)
acc  0 1.047339916229248
acc  1 1.3839788436889648
1.2156593799591064 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([6, 6])
tensor([6, 6])
Parameter containing:
tensor([[ 0.4905, -0.7667,  0.0094, -1.5052,  0.1524, -0.1258,  2.9455,  2.8246],
        [ 2.1357,  1.8134, -0.6236, -1.4198,  0.4560, -0.8631,  2.8641, -1.0661]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.1813, -0.9957, -0.1994, -0.7493, -0.1568, -0.4492,  2.6864,  2.5728],
        [ 2.1478,  1.8091, -0.6834, -1.4920,  0.4448, -0.8032,  2.8713, -0.6290]],
       requires_grad=True)
acc  0 1.02996826171875
acc  1 0.8994452953338623
0.9647067785263062 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([4, 5])
tensor([4, 5])
Parameter containing:
tensor([[0.5759, 0.4628, 0.3309, 0.4848, 0.7300, 0.2861, 0.4206, 0.5971],
        [0.6327, 0.3654, 0.2818, 0.6774, 0.5064, 0.8610, 0.3328, 0.1834]],
       requires_grad=True)
Parameter containing:
tensor([[0.6131, 0.4980, 0.3523, 0.5107, 0.7599, 0.3127, 0.4508, 0.6250],
        [0.7989, 0.5389, 0.4520, 0.8590, 0.6805, 1.0311, 0.4933, 0.3487]],
       requires_grad=True)
acc  0 1.5913254022598267
acc  1 1.392852544784546
1.4920889735221863 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([6, 6])
tensor([6, 6])
Parameter containing:
tensor([[-0.5015,  0.3668,  0.6504, -1.6107,  0.1709,  0.9177,  3.6300, -0.7807],
        [ 0.1849, -1.0331, -1.6077,  2.4962, -1.3640,  2.8157,  3.1786, -0.5511]],
       requires_grad=True)
Parameter containing:
tensor([[-0.8661,  0.2662,  0.9892, -1.3422,  0.4073,  1.4054,  4.1798, -0.5568],
        [ 0.2166, -1.0280, -1.9080,  2.4787, -1.4320,  2.8263,  3.1938, -0.6425]],
       requires_grad=True)
acc  0 0.893079400062561
acc  1 0.9940889477729797
0.9435841739177704 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([5, 5])
tensor([5, 5])
Parameter containing:
tensor([[ 0.3574, -0.1503,  0.7243,  0.1150,  0.3505,  0.7335,  0.3833,  0.0964],
        [ 0.2015,  1.2259,  0.2599,  0.8444, -0.3724,  1.4498,  0.8994,  0.2001]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.4426, -0.0844,  0.8109,  0.2057,  0.4221,  0.8277,  0.4737,  0.1955],
        [-0.0925,  1.0251,  0.0515,  0.6283, -0.6229,  1.2531,  0.6818, -0.0318]],
       requires_grad=True)
acc  0 1.5209529399871826
acc  1 0.8262919783592224
1.1736224591732025 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([3, 0])
tensor([3, 0])
Parameter containing:
tensor([[ 0.3582,  0.3567,  0.9845,  1.1340,  0.5590,  0.6358,  0.6070,  0.2183],
        [ 1.9741,  0.9054, -0.6899, -0.4921,  0.0711,  0.9052,  0.1751,  0.6800]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.3735,  0.3696,  0.9931,  1.1495,  0.5773,  0.6457,  0.6326,  0.2387],
        [ 1.9085,  0.8435, -0.6108, -0.5488,  0.0052,  0.8504,  0.1143,  0.6129]],
       requires_grad=True)
acc  0 2.680891275405884
acc  1 2.256077527999878
2.468484401702881 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([5, 2])
tensor([5, 2])
Parameter containing:
tensor([[ 0.3575,  0.7658,  0.1276,  0.3204,  1.0927,  1.9902, -0.1656,  1.0352],
        [ 0.7542,  0.1398,  1.2630, -0.0464,  0.5234,  0.9233,  0.8478,  0.3290]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.3551,  0.7731,  0.1357,  0.3258,  1.1062,  1.9930, -0.1592,  1.0523],
        [ 0.7034,  0.0561,  1.1760, -0.1310,  0.4530,  0.8711,  0.7892,  0.2519]],
       requires_grad=True)
acc  0 1.2448151111602783
acc  1 1.3884217739105225
1.3166184425354004 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([5, 2])
tensor([5, 2])
Parameter containing:
tensor([[0.7299, 0.4846, 0.4300, 0.5794, 0.5955, 0.7987, 0.2602, 0.4650],
        [0.0648, 0.2480, 1.1431, 0.6609, 0.4408, 0.2281, 0.6458, 1.0819]],
       requires_grad=True)
Parameter containing:
tensor([[0.5937, 0.3167, 0.2829, 0.4365, 0.4523, 0.6770, 0.1051, 0.3275],
        [0.0439, 0.2715, 1.1339, 0.6110, 0.4179, 0.2046, 0.6275, 1.0534]],
       requires_grad=True)
acc  0 0.8013477921485901
acc  1 1.1626965999603271
0.9820221960544586 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 1])
tensor([2, 1])
Parameter containing:
tensor([[ 0.7314,  0.5430,  0.9638,  0.3163,  0.2448,  0.8576,  0.6576,  0.8555],
        [ 0.0052,  0.7566,  0.3641, -0.0382,  0.4772,  0.7482,  0.2582,  0.7164]],
       requires_grad=True)
Parameter containing:
tensor([[0.6774, 0.4768, 0.9013, 0.2517, 0.1898, 0.7980, 0.5940, 0.7939],
        [0.0781, 0.8309, 0.4448, 0.0380, 0.5517, 0.8258, 0.3366, 0.7962]],
       requires_grad=True)
acc  0 1.2320919036865234
acc  1 0.9097179770469666
1.070904940366745 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([7, 7])
tensor([4, 7])
Parameter containing:
tensor([[ 0.8205,  0.0540,  0.8302,  0.3501,  1.0147,  0.6188,  0.5581,  1.0431],
        [ 1.4443,  0.2170, -0.0178,  0.0916,  0.3111, -0.8047,  1.0137,  1.9991]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.6366, -0.1396,  0.6408,  0.1605,  0.8855,  0.4982,  0.3600,  0.8531],
        [ 1.2592,  0.0609, -0.1408, -0.0616,  0.1419, -1.1028,  0.8644,  1.8514]],
       requires_grad=True)
acc  0 0.19238586723804474
acc  1 0.16694702208042145
0.1796664446592331 11


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([3, 2])
tensor([3, 2])
Parameter containing:
tensor([[ 1.0420,  0.6545,  0.3110,  1.1300,  0.4811,  0.5687,  0.2823,  0.2883],
        [-0.1957,  0.2281,  1.0349,  0.3944,  0.9689,  0.3832,  0.7668,  0.4343]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.7927,  0.4041,  0.0498,  0.8846,  0.2327,  0.3167,  0.0427,  0.0347],
        [-0.1391,  0.2940,  1.0956,  0.4631,  1.0357,  0.4589,  0.8563,  0.4903]],
       requires_grad=True)
acc  0 1.4398164749145508
acc  1 2.117105722427368
1.7784610986709595 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 1])
tensor([0, 1])
Parameter containing:
tensor([[ 3.4986, -0.6542,  2.3373, -1.0131, -0.7636,  0.4416,  0.3972, -0.7697],
        [ 0.6052,  5.7187, -0.3180, -0.3466,  1.4591,  0.0503, -2.2914, -0.3734]],
       requires_grad=True)
Parameter containing:
tensor([[ 3.3482, -0.7104,  2.2383, -1.1012, -0.8423,  0.3329,  0.3240, -0.8533],
        [ 0.3180,  5.4611, -0.5539, -0.5893,  1.1335, -0.1480, -1.6399, -0.1838]],
       requires_grad=True)
acc  0 0.9764965176582336
acc  1 1.234257459640503
1.1053769886493683 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 5])
tensor([0, 5])
Parameter containing:
tensor([[ 1.2598,  0.3695,  0.2489, -0.1905,  0.9924,  0.0596,  0.6537, -0.0656],
        [ 1.0584,  0.6259,  0.8372, -0.3983,  0.0219,  1.7859,  0.1651,  0.3269]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.4089,  0.5227,  0.3960, -0.0317,  1.1384,  0.2278,  0.7898,  0.0536],
        [ 1.0027,  0.5660,  0.7771, -0.4737, -0.0307,  1.7581,  0.1009,  0.2668]],
       requires_grad=True)
acc  0 1.0969772338867188
acc  1 1.0449726581573486
1.0709749460220337 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([6, 5])
tensor([6, 5])
Parameter containing:
tensor([[ 0.9991,  0.7512, -0.0408,  0.5719,  0.7227,  0.8300,  1.2338,  0.0525],
        [ 0.8577, -0.1774,  0.6572,  0.5221,  0.4672,  1.0426,  0.7208,  0.4408]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.8264,  0.5845, -0.2190,  0.4029,  0.4977,  0.6570,  1.0699, -0.1366],
        [ 0.6474, -0.3645,  0.4588,  0.2939,  0.2774,  0.8357,  0.4736,  0.2303]],
       requires_grad=True)
acc  0 1.586712121963501
acc  1 1.419938564300537
1.503325343132019 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([3, 6])
tensor([3, 6])
Parameter containing:
tensor([[-0.2823, -0.2150,  1.1510,  1.9780,  0.0042,  0.5525,  0.2211,  0.8613],
        [-0.1736, -0.5423, -1.0941,  0.9935, -0.4783,  0.1674,  3.4146,  1.6926]],
       requires_grad=True)
Parameter containing:
tensor([[-0.2304, -0.2059,  1.1911,  1.9924,  0.0047,  0.6041,  0.2509,  0.9693],
        [-0.2823, -0.6808, -1.2640,  0.8554, -0.6058,  0.0097,  3.2371,  1.5961]],
       requires_grad=True)
acc  0 1.0481905937194824
acc  1 1.017356038093567
1.0327733159065247 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([5, 2])
tensor([5, 2])
Parameter containing:
tensor([[ 0.3256,  1.0624,  0.1518,  1.0675,  0.1726,  1.2567,  0.2995,  0.7877],
        [ 0.6875,  0.7552,  1.7875,  0.0223,  0.3856, -0.2189,  0.2688,  0.1303]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.2333,  0.9456,  0.0644,  0.9416,  0.0627,  1.1628,  0.1679,  0.6874],
        [ 0.8440,  0.9139,  1.9564,  0.1680,  0.5427, -0.0490,  0.4130,  0.2740]],
       requires_grad=True)
acc  0 1.6261450052261353
acc  1 1.0853201150894165
1.3557325601577759 10
[Parameter containing:
tensor([[ 0.3256,  1.0624,  0.1518,  1.0675,  0.1726,  1.2567,  0.2995,  0.7877],
        [ 0.6875,  0.7552,  1.7875,  0.0223,  0.3856, -0.2189,  0.2688,  0.1303]],
       requires_grad=True), Parameter containing:
tensor([[ 0.2333,  0.9456,  0.0644,  0.9416,  0.0627,  1.1628,  0.1679,  0.6874],
        [ 0.8440,  0.9139,  1.9564,  0.1680,  0.5427, -0.0490,  0.4130,  0.2740]],
       requires_grad=True)]
Parameter containing:
tensor([116.678

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 1])
tensor([2, 1])
tensor([2, 1])
Parameter containing:
tensor([[0.5979, 0.7046, 0.8777],
        [0.4090, 0.9906, 0.5510]], requires_grad=True)
Parameter containing:
tensor([[0.5335, 0.6443, 0.8132],
        [0.3130, 0.8934, 0.4550]], requires_grad=True)
Parameter containing:
tensor([[0.5950, 0.7106, 0.8725],
        [0.3907, 0.9664, 0.5317]], requires_grad=True)
acc  0 1.3849458694458008
acc  1 1.4592771530151367
acc  2 1.207685947418213
1.35063632329305 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 2])
tensor([1, 2])
tensor([1, 2])
Parameter containing:
tensor([[-0.6305,  2.0457,  0.1563],
        [ 0.5244, -1.5001,  1.5558]], requires_grad=True)
Parameter containing:
tensor([[-0.6264,  2.0619, -0.0310],
        [ 1.0340, -1.1612,  1.8255]], requires_grad=True)
Parameter containing:
tensor([[-0.6374,  2.0619,  0.1656],
        [ 0.7954, -1.2186,  1.8327]], requires_grad=True)
acc  0 1.2031519412994385
acc  1 1.5571820735931396
acc  2 1.5462380647659302
1.4355240265528362 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 2])
tensor([0, 2])
tensor([0, 2])
Parameter containing:
tensor([[ 1.0978,  0.8316,  0.3486],
        [ 0.4197, -0.4846,  1.5569]], requires_grad=True)
Parameter containing:
tensor([[ 0.8322,  0.5642,  0.0865],
        [ 0.7979, -0.1134,  1.9364]], requires_grad=True)
Parameter containing:
tensor([[ 0.8699,  0.5951,  0.1259],
        [ 0.4522, -0.4561,  1.5860]], requires_grad=True)
acc  0 1.6055883169174194
acc  1 2.1524815559387207
acc  2 2.0920934677124023
1.950054446856181 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 0])
tensor([2, 0])
tensor([2, 0])
Parameter containing:
tensor([[ 0.5142,  0.0128,  1.3047],
        [ 1.2570, -0.3779,  0.6257]], requires_grad=True)
Parameter containing:
tensor([[ 0.4521, -0.0670,  1.2418],
        [ 0.8533, -0.7831,  0.2203]], requires_grad=True)
Parameter containing:
tensor([[ 0.3715, -0.0838,  1.1823],
        [ 1.3882, -0.2556,  0.7376]], requires_grad=True)
acc  0 1.3661874532699585
acc  1 0.8902943134307861
acc  2 1.003743052482605
1.08674160639445 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 1])
tensor([1, 1])
tensor([1, 1])
Parameter containing:
tensor([[ 0.3780,  0.6017,  0.2588],
        [-0.2226,  1.3333,  0.6430]], requires_grad=True)
Parameter containing:
tensor([[ 0.1974,  0.4062,  0.0747],
        [-0.3788,  1.1880,  0.5003]], requires_grad=True)
Parameter containing:
tensor([[ 0.7112,  0.9449,  0.5903],
        [-0.0392,  1.5475,  0.8438]], requires_grad=True)
acc  0 1.7028684616088867
acc  1 1.6759334802627563
acc  2 1.847232460975647
1.7420114676157634 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 2])
tensor([1, 2])
tensor([1, 2])
Parameter containing:
tensor([[-1.1285,  2.2783, -0.5792],
        [-1.0397, -0.7068,  3.9766]], requires_grad=True)
Parameter containing:
tensor([[-0.8528,  2.5088, -0.2850],
        [-1.2763, -0.7248,  4.0227]], requires_grad=True)
Parameter containing:
tensor([[-1.0830,  2.2762, -0.5261],
        [-1.5173, -0.9860,  3.7433]], requires_grad=True)
acc  0 1.0558340549468994
acc  1 1.133865237236023
acc  2 0.6908763647079468
0.9601918856302897 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 0])
tensor([2, 0])
tensor([2, 0])
Parameter containing:
tensor([[0.2057, 0.2958, 0.3986],
        [0.6260, 0.5591, 0.5628]], requires_grad=True)
Parameter containing:
tensor([[0.5713, 0.6620, 0.7676],
        [0.4231, 0.3560, 0.3589]], requires_grad=True)
Parameter containing:
tensor([[0.4720, 0.5614, 0.6651],
        [0.4738, 0.4088, 0.4104]], requires_grad=True)
acc  0 1.0142258405685425
acc  1 1.0013022422790527
acc  2 1.1779961585998535
1.064508080482483 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 0])
tensor([2, 0])
tensor([2, 0])
Parameter containing:
tensor([[-0.0214,  0.0956,  0.4972],
        [ 0.9322,  0.3063,  0.5493]], requires_grad=True)
Parameter containing:
tensor([[0.2549, 0.3735, 0.7741],
        [0.6860, 0.0529, 0.3004]], requires_grad=True)
Parameter containing:
tensor([[0.2099, 0.3293, 0.7259],
        [0.8066, 0.1759, 0.4214]], requires_grad=True)
acc  0 0.6552903056144714
acc  1 1.062591791152954
acc  2 1.2120329141616821
0.9766383369763693 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 0])
tensor([1, 0])
tensor([1, 0])
Parameter containing:
tensor([[0.1649, 0.6002, 0.3109],
        [0.3554, 0.0812, 0.1494]], requires_grad=True)
Parameter containing:
tensor([[0.2854, 0.7558, 0.4996],
        [0.6547, 0.3893, 0.4471]], requires_grad=True)
Parameter containing:
tensor([[0.5087, 0.9398, 0.6537],
        [0.6525, 0.3755, 0.4428]], requires_grad=True)
acc  0 1.1592671871185303
acc  1 1.1455594301223755
acc  2 1.0612517595291138
1.1220261255900066 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 2])
tensor([1, 2])
tensor([1, 2])
Parameter containing:
tensor([[ 0.3680,  1.2975,  0.5085],
        [-0.5181, -0.0056,  2.1020]], requires_grad=True)
Parameter containing:
tensor([[ 0.2631,  1.2019,  0.3947],
        [-0.7479, -0.2424,  1.8670]], requires_grad=True)
Parameter containing:
tensor([[-0.0296,  1.0466,  0.1725],
        [-0.6738, -0.1633,  1.9426]], requires_grad=True)
acc  0 1.2969533205032349
acc  1 1.1298152208328247
acc  2 0.7620863318443298
1.0629516243934631 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 0])
tensor([1, 0])
tensor([1, 0])
Parameter containing:
tensor([[ 0.1354,  1.7220,  0.3289],
        [ 1.2974, -0.1557,  0.9517]], requires_grad=True)
Parameter containing:
tensor([[-0.1263,  1.4814,  0.0500],
        [ 1.1172, -0.3341,  0.7592]], requires_grad=True)
Parameter containing:
tensor([[ 0.3013,  1.9087,  0.5099],
        [ 1.1422, -0.3049,  0.7957]], requires_grad=True)
acc  0 1.3070069551467896
acc  1 1.3433541059494019
acc  2 1.073365569114685
1.2412422100702922 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 1])
tensor([1, 1])
tensor([1, 1])
Parameter containing:
tensor([[ 1.2326,  1.5711, -0.6385],
        [ 0.2662,  1.6870, -0.1608]], requires_grad=True)
Parameter containing:
tensor([[ 0.9455,  1.2787, -0.8607],
        [ 0.1517,  1.6059, -0.2562]], requires_grad=True)
Parameter containing:
tensor([[ 1.0509,  1.3945, -0.7961],
        [ 0.0689,  1.4799, -0.3714]], requires_grad=True)
acc  0 1.1026768684387207
acc  1 0.6990684270858765
acc  2 1.2841286659240723
1.0286246538162231 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 1])
tensor([2, 1])
tensor([2, 1])
Parameter containing:
tensor([[0.7163, 0.5752, 0.9115],
        [0.4454, 0.8751, 0.4781]], requires_grad=True)
Parameter containing:
tensor([[0.4396, 0.2965, 0.6350],
        [0.1033, 0.5357, 0.1323]], requires_grad=True)
Parameter containing:
tensor([[0.5727, 0.4317, 0.7659],
        [0.4931, 0.9283, 0.5259]], requires_grad=True)
acc  0 1.485708236694336
acc  1 0.7999261617660522
acc  2 1.2035191059112549
1.163051168123881 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 2])
tensor([0, 2])
tensor([0, 2])
Parameter containing:
tensor([[ 1.4735,  1.1083, -0.0644],
        [ 0.3986,  0.6567,  0.8568]], requires_grad=True)
Parameter containing:
tensor([[ 1.2320,  0.8769, -0.2814],
        [-0.1245,  0.1400,  0.3323]], requires_grad=True)
Parameter containing:
tensor([[ 1.3574,  1.0015, -0.2204],
        [ 0.3733,  0.6418,  0.8346]], requires_grad=True)
acc  0 0.8996307849884033
acc  1 1.444575309753418
acc  2 1.226897120475769
1.1903677384058635 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 1])
tensor([2, 1])
tensor([2, 1])
Parameter containing:
tensor([[0.4417, 0.5570, 0.8687],
        [0.1639, 1.3153, 0.6620]], requires_grad=True)
Parameter containing:
tensor([[ 0.3139,  0.4302,  0.7436],
        [-0.0424,  1.1133,  0.4535]], requires_grad=True)
Parameter containing:
tensor([[ 0.1819,  0.3120,  0.6326],
        [-0.0036,  1.2037,  0.5292]], requires_grad=True)
acc  0 0.9929911494255066
acc  1 1.1549625396728516
acc  2 1.159103274345398
1.1023523211479187 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 1])
tensor([2, 1])
tensor([2, 1])
Parameter containing:
tensor([[ 0.0121, -0.9329,  1.9414],
        [ 0.2959,  3.1021, -2.3304]], requires_grad=True)
Parameter containing:
tensor([[ 0.4141, -0.3901,  2.2467],
        [ 0.5285,  3.1222, -1.8145]], requires_grad=True)
Parameter containing:
tensor([[ 0.0822, -0.7221,  1.9154],
        [ 0.3233,  2.9204, -2.0202]], requires_grad=True)
acc  0 0.8079759478569031
acc  1 1.1399898529052734
acc  2 1.1030648946762085
1.017010231812795 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 2])
tensor([2, 2])
tensor([2, 2])
Parameter containing:
tensor([[ 0.1030,  0.3292,  0.8994],
        [ 0.6481, -0.8456,  1.7779]], requires_grad=True)
Parameter containing:
tensor([[-0.0972,  0.1474,  0.7363],
        [ 0.7390, -0.9166,  1.8472]], requires_grad=True)
Parameter containing:
tensor([[ 0.2981,  0.4821,  1.0841],
        [ 0.6396, -0.8723,  1.8464]], requires_grad=True)
acc  0 0.7801899909973145
acc  1 0.7738405466079712
acc  2 0.901419997215271
0.8184835116068522 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 2])
tensor([1, 2])
tensor([1, 2])
Parameter containing:
tensor([[ 0.4792,  1.0143,  0.4562],
        [-0.0119,  0.6596,  1.1277]], requires_grad=True)
Parameter containing:
tensor([[0.5664, 1.1162, 0.5474],
        [0.0621, 0.7282, 1.1978]], requires_grad=True)
Parameter containing:
tensor([[ 0.2141,  0.7332,  0.1964],
        [-0.0987,  0.5671,  1.0374]], requires_grad=True)
acc  0 1.2197290658950806
acc  1 0.8155718445777893
acc  2 1.1673511266708374
1.0675506790479024 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 2])
tensor([2, 2])
tensor([2, 2])
Parameter containing:
tensor([[-0.1086, -0.2641,  1.5651],
        [ 0.7701, -0.2994,  1.4610]], requires_grad=True)
Parameter containing:
tensor([[-0.0766, -0.2389,  1.6066],
        [ 0.8996, -0.1645,  1.5972]], requires_grad=True)
Parameter containing:
tensor([[-0.1253, -0.2652,  1.5716],
        [ 0.4608, -0.6586,  1.1842]], requires_grad=True)
acc  0 1.3006887435913086
acc  1 1.0396902561187744
acc  2 0.7281156182289124
1.0228315393129985 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 2])
tensor([2, 2])
tensor([2, 2])
Parameter containing:
tensor([[-0.1078, -0.4433,  1.8232],
        [-1.1069, -0.9509,  4.2013]], requires_grad=True)
Parameter containing:
tensor([[ 0.0667, -0.2907,  1.9531],
        [-0.8982, -1.2534,  4.7117]], requires_grad=True)
Parameter containing:
tensor([[-0.1750, -0.4550,  1.7978],
        [-1.2370, -1.0698,  4.0759]], requires_grad=True)
acc  0 1.5060064792633057
acc  1 1.282974123954773
acc  2 1.0644400119781494
1.2844735383987427 10
[Parameter containing:
tensor([[-0.1078, -0.4433,  1.8232],
        [-1.1069, -0.9509,  4.2013]], requires_grad=True), Parameter containing:
tensor([[ 0.0667, -0.2907,  1.9531],
        [-0.8982, -1.2534,  4.7117]], requires_grad=True), Parameter containing:
tensor([[-0.1750, -0.4550,  1.7978],
        [-1.2370, -1.0698,  4.0759]], requires_grad=True)]
Parameter containing:
tensor([14.1254], requires_grad=True)
Parameter containing:
tensor([62.6617], requires_grad=True)
Parameter containing:
tensor([

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([7, 2])
tensor([7, 2])
tensor([7, 2])
Parameter containing:
tensor([[0.8242, 0.5862, 0.4503, 0.9129, 0.3470, 0.7780, 0.3061, 0.9763],
        [0.9763, 0.3451, 1.0092, 0.5952, 0.6171, 0.3302, 0.7636, 0.5731]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.7010,  0.4627,  0.3299,  0.7856,  0.2232,  0.6565,  0.1830,  0.8510],
        [ 0.6413,  0.0126,  0.6734,  0.2516,  0.2779, -0.0092,  0.4354,  0.2300]],
       requires_grad=True)
Parameter containing:
tensor([[0.8011, 0.5616, 0.4273, 0.8871, 0.3242, 0.7567, 0.2838, 0.9530],
        [0.9654, 0.3323, 1.0023, 0.5766, 0.6013, 0.3117, 0.7536, 0.5610]],
       requires_grad=True)
acc  0 0.9468544721603394
acc  1 1.0394651889801025
acc  2 0.8458674550056458
0.9440623720486959 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([6, 4])
tensor([6, 4])
tensor([6, 4])
Parameter containing:
tensor([[ 0.0757, -0.9792,  0.5446,  0.1044, -0.0211, -0.4844,  3.2844,  0.2841],
        [-0.0252, -0.1357, -0.1781,  0.2205,  3.8302, -0.0875,  0.4272,  0.6904]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.1440, -1.0105,  0.6894,  0.2598,  0.0848, -0.3166,  3.4530,  0.4422],
        [-0.1659, -0.2131, -0.3286,  0.1623,  3.7589, -0.1930,  0.3461,  0.6323]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.3152, -0.7837,  0.7867,  0.2995,  0.2076, -0.2379,  3.5451,  0.5269],
        [-0.0959, -0.2173, -0.2427,  0.1434,  3.7385, -0.2023,  0.3467,  0.5706]],
       requires_grad=True)
acc  0 1.293015956878662
acc  1 1.3974380493164062
acc  2 1.3685274124145508
1.3529938062032063 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([4, 0])
tensor([4, 0])
tensor([4, 0])
Parameter containing:
tensor([[ 0.8131,  1.1189, -0.3943,  0.1461,  1.5958,  0.7496,  0.1204,  0.1199],
        [ 1.4510, -0.2001,  0.7378,  0.1020,  1.2645, -0.0426,  0.8679,  1.3504]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.9397,  1.2434, -0.2916,  0.3125,  1.7208,  0.8778,  0.2131,  0.2142],
        [ 1.2274, -0.4150,  0.5186, -0.1239,  1.0443, -0.2438,  0.6536,  1.1314]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.7526,  1.0966, -0.4921,  0.2139,  1.6011,  0.7300, -0.0328, -0.0711],
        [ 1.1977, -0.5045,  0.4556, -0.2743,  0.9501, -0.5186,  0.6338,  1.1338]],
       requires_grad=True)
acc  0 1.5314480066299438
acc  1 1.633302927017212
acc  2 1.352307677268982
1.5056862036387126 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 7])
tensor([1, 7])
tensor([1, 7])
Parameter containing:
tensor([[0.7135, 0.7318, 0.4068, 0.6956, 0.3772, 0.3041, 0.3666, 0.5836],
        [0.1042, 0.5148, 0.2789, 0.3454, 0.1181, 0.5935, 0.3185, 0.8053]],
       requires_grad=True)
Parameter containing:
tensor([[0.7706, 0.8199, 0.4565, 0.7508, 0.4368, 0.3583, 0.3995, 0.6439],
        [0.2384, 0.6541, 0.4163, 0.4837, 0.2612, 0.7347, 0.4567, 0.9413]],
       requires_grad=True)
Parameter containing:
tensor([[0.6709, 0.6839, 0.3610, 0.6620, 0.3256, 0.2610, 0.3198, 0.5424],
        [0.0801, 0.4981, 0.2603, 0.3269, 0.1017, 0.5724, 0.2961, 0.7854]],
       requires_grad=True)
acc  0 1.2923012971878052
acc  1 0.7545924782752991
acc  2 0.8959580659866333
0.9809506138165792 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([3, 2])
tensor([3, 4])
tensor([3, 4])
Parameter containing:
tensor([[0.6668, 0.4394, 0.1538, 0.7779, 0.3813, 0.5721, 0.4708, 0.0607],
        [0.1519, 0.1994, 1.1318, 0.6389, 1.1193, 0.3503, 0.2890, 0.7028]],
       requires_grad=True)
Parameter containing:
tensor([[0.7589, 0.5428, 0.2485, 0.8599, 0.4699, 0.6672, 0.5700, 0.1551],
        [0.2970, 0.3301, 1.2715, 0.7857, 1.2769, 0.4863, 0.4282, 0.8463]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.6877,  0.4786,  0.1714,  0.8097,  0.3824,  0.5916,  0.5066,  0.0467],
        [-0.0285,  0.0017,  0.9450,  0.4690,  0.9543,  0.1629,  0.1038,  0.5242]],
       requires_grad=True)
acc  0 1.3239158391952515
acc  1 1.1566804647445679
acc  2 1.5060590505599976
1.3288851181666057 19


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([4, 7])
tensor([4, 7])
tensor([4, 7])
Parameter containing:
tensor([[ 1.0234e-03,  4.2302e-01,  7.8797e-01,  4.1827e-01,  2.4946e+00,
         -4.6211e-01,  1.5486e-01,  2.0717e-01],
        [ 1.1631e+00,  1.4219e+00, -1.2406e+00, -5.6713e-01, -5.9879e-01,
          1.3515e-01,  1.2435e+00,  1.7395e+00]], requires_grad=True)
Parameter containing:
tensor([[-0.1467,  0.2951,  0.6300,  0.2473,  2.3519, -0.5573, -0.0035,  0.0731],
        [ 1.1840,  1.4755, -1.0931, -0.4896, -0.5678,  0.1492,  1.2712,  1.7359]],
       requires_grad=True)
Parameter containing:
tensor([[-0.0878,  0.3230,  0.6970,  0.3163,  2.3765, -0.4919,  0.0727,  0.1103],
        [ 1.0878,  1.3199, -1.1754, -0.5484, -0.6078,  0.1013,  1.2054,  1.6494]],
       requires_grad=True)
acc  0 0.88565593957901
acc  1 1.0526036024093628
acc  2 1.0396920442581177
0.9926505287488302 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([4, 0])
tensor([4, 0])
tensor([4, 0])
Parameter containing:
tensor([[0.2431, 0.2474, 0.4870, 0.4537, 0.7766, 0.6448, 0.3625, 0.6731],
        [0.9500, 0.2889, 0.0520, 0.5451, 0.3100, 0.8655, 0.7180, 0.1115]],
       requires_grad=True)
Parameter containing:
tensor([[0.2724, 0.2762, 0.4988, 0.4884, 0.8115, 0.6825, 0.3928, 0.7001],
        [1.1163, 0.4585, 0.2229, 0.7161, 0.4795, 1.0297, 0.8803, 0.2991]],
       requires_grad=True)
Parameter containing:
tensor([[0.3415, 0.3467, 0.5956, 0.5510, 0.8773, 0.7480, 0.4636, 0.7755],
        [1.1377, 0.4755, 0.2424, 0.7342, 0.5116, 1.0486, 0.9054, 0.3137]],
       requires_grad=True)
acc  0 1.2815098762512207
acc  1 1.2071973085403442
acc  2 1.3612674474716187
1.2833248774210613 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([3, 7])
tensor([3, 7])
tensor([3, 7])
Parameter containing:
tensor([[-0.0477,  0.5561,  0.0016,  0.6356,  0.3017,  0.3652,  0.4495,  0.5808],
        [ 0.7092,  0.3425,  0.2753,  0.5515,  0.3180,  0.4294,  0.6041,  0.8895]],
       requires_grad=True)
Parameter containing:
tensor([[0.1603, 0.7610, 0.2121, 0.8338, 0.5065, 0.5749, 0.6572, 0.7771],
        [0.6551, 0.2902, 0.2211, 0.4969, 0.2684, 0.3746, 0.5574, 0.8411]],
       requires_grad=True)
Parameter containing:
tensor([[0.0806, 0.6877, 0.1270, 0.7665, 0.4299, 0.4939, 0.5788, 0.7106],
        [0.6195, 0.2509, 0.1862, 0.4619, 0.2173, 0.3493, 0.5148, 0.8028]],
       requires_grad=True)
acc  0 2.0302228927612305
acc  1 1.7713088989257812
acc  2 2.1321146488189697
1.9778821468353271 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([3, 0])
tensor([3, 0])
tensor([3, 0])
Parameter containing:
tensor([[ 0.2968,  0.0996,  0.1412,  0.8237,  0.4161,  0.4095,  0.2091,  0.2140],
        [ 1.1148,  0.5856,  0.5720,  0.9738,  0.0960,  0.7144,  0.6729, -0.0207]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.3773,  0.1863,  0.2255,  0.9171,  0.5059,  0.4887,  0.2937,  0.2992],
        [ 0.8855,  0.3491,  0.3476,  0.7524, -0.1149,  0.4828,  0.4430, -0.2529]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.4867,  0.2808,  0.3382,  1.0095,  0.6024,  0.5963,  0.3953,  0.4020],
        [ 0.8913,  0.3583,  0.3418,  0.7499, -0.1074,  0.4875,  0.4526, -0.2435]],
       requires_grad=True)
acc  0 1.2521907091140747
acc  1 1.1936591863632202
acc  2 1.1427068710327148
1.19618558883667 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 1])
tensor([1, 1])
tensor([1, 1])
Parameter containing:
tensor([[ 0.3633,  1.6595,  0.8635, -0.5432,  0.9682,  0.0675, -0.0151,  1.4897],
        [-0.4534,  2.5932,  0.7541, -0.8770, -0.6832,  1.0324,  0.6358,  0.5270]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.3974,  1.6808,  0.8630, -0.4986,  0.9702,  0.0774, -0.0038,  1.4933],
        [-0.4935,  2.5409,  0.7034, -0.9380, -0.7118,  0.9807,  0.6124,  0.4813]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.2716,  1.5617,  0.7184, -0.6183,  0.8241, -0.0433, -0.1214,  1.3847],
        [-1.2709,  2.7175,  0.8953, -1.1038, -0.5737,  1.1750,  0.9880,  0.6698]],
       requires_grad=True)
acc  0 1.320845365524292
acc  1 1.1326897144317627
acc  2 0.6674624085426331
1.0403324961662292 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([4, 3])
tensor([4, 3])
tensor([4, 3])
Parameter containing:
tensor([[ 0.7735,  0.9602,  0.6840,  0.8329,  1.0727,  0.4709,  0.1479,  0.5817],
        [-0.7109, -0.3569, -0.3232,  2.0724,  1.3613,  0.9960,  1.4177,  0.2777]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.7668,  0.9752,  0.6697,  0.8659,  1.1012,  0.4800,  0.1357,  0.5874],
        [-0.8805, -0.4493, -0.4427,  2.0247,  1.3232,  0.9647,  1.3807,  0.2488]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.4979,  0.7139,  0.4027,  0.5850,  0.8119,  0.2016, -0.1228,  0.2916],
        [-0.8538, -0.4966, -0.4767,  1.9026,  1.1970,  0.8351,  1.2635,  0.1137]],
       requires_grad=True)
acc  0 1.2552595138549805
acc  1 1.2956440448760986
acc  2 0.9477642774581909
1.16622261206309 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 6])
tensor([0, 6])
tensor([0, 6])
Parameter containing:
tensor([[0.6858, 0.6818, 0.5141, 0.4244, 0.5135, 0.5277, 0.4329, 0.5630],
        [0.6935, 0.2945, 0.5107, 0.8102, 0.2899, 0.3113, 0.9620, 0.6411]],
       requires_grad=True)
Parameter containing:
tensor([[0.5495, 0.5398, 0.3714, 0.2764, 0.3776, 0.3761, 0.2843, 0.4165],
        [0.7057, 0.2840, 0.5000, 0.7950, 0.2149, 0.3006, 0.9310, 0.6324]],
       requires_grad=True)
Parameter containing:
tensor([[0.6673, 0.6562, 0.4961, 0.4018, 0.5064, 0.5059, 0.4048, 0.5379],
        [0.5661, 0.1834, 0.3932, 0.6887, 0.1570, 0.1949, 0.8270, 0.5245]],
       requires_grad=True)
acc  0 0.9695658087730408
acc  1 0.8407106995582581
acc  2 1.4314461946487427
1.0805742343266804 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 7])
tensor([2, 7])
tensor([2, 7])
Parameter containing:
tensor([[ 0.5265,  0.8060,  0.9123,  0.4735,  0.4316,  0.6968,  0.7772,  0.5459],
        [ 0.3538,  0.7930, -0.0166,  0.1690,  0.5014,  0.3164,  0.3627,  0.8080]],
       requires_grad=True)
Parameter containing:
tensor([[0.4520, 0.7445, 0.8503, 0.4107, 0.3716, 0.6379, 0.7128, 0.5032],
        [0.4279, 0.8723, 0.0654, 0.2466, 0.5758, 0.3896, 0.4393, 0.8852]],
       requires_grad=True)
Parameter containing:
tensor([[0.2764, 0.5775, 0.6875, 0.2447, 0.2156, 0.4709, 0.5442, 0.3321],
        [0.5724, 1.0164, 0.2112, 0.3919, 0.7298, 0.5315, 0.5780, 1.0205]],
       requires_grad=True)
acc  0 2.343937635421753
acc  1 1.6017051935195923
acc  2 2.287123918533325
2.07758891582489 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([5, 7])
tensor([5, 7])
tensor([5, 7])
Parameter containing:
tensor([[ 0.2350,  0.3592,  0.7046,  0.7069,  0.8619,  1.3804,  0.3071,  0.7344],
        [ 0.7998, -0.0738, -0.0992, -0.0499,  0.4109,  0.8024,  0.7216,  1.7426]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.0551,  0.2112,  0.5722,  0.5501,  0.6901,  1.1518,  0.1259,  0.5385],
        [ 0.6120, -0.2425, -0.2361, -0.2331,  0.2233,  0.6177,  0.5539,  1.5776]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.7089e-04,  2.5026e-01,  7.0400e-01,  6.0596e-01,  7.4795e-01,
          1.1489e+00,  1.8191e-01,  5.9302e-01],
        [ 6.9391e-01, -1.5665e-01, -1.3225e-01, -1.3747e-01,  3.3332e-01,
          7.2052e-01,  6.5959e-01,  1.6615e+00]], requires_grad=True)
acc  0 1.0215680599212646
acc  1 1.5393320322036743
acc  2 1.1535624265670776
1.2381541728973389 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 1])
tensor([0, 1])
tensor([0, 1])
Parameter containing:
tensor([[ 1.1040,  0.3528,  0.7322,  0.5134,  0.3591,  0.8371,  0.0077,  0.8517],
        [ 0.7213,  0.8381,  0.4279,  0.6878,  0.4484, -0.3889,  0.6195,  0.6609]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.8635,  0.1059,  0.4852,  0.2516,  0.0914,  0.6041, -0.2441,  0.6005],
        [ 0.7932,  0.9114,  0.5367,  0.7500,  0.5256, -0.3626,  0.6677,  0.7328]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.0726,  0.3300,  0.6994,  0.4938,  0.3368,  0.8149, -0.0268,  0.8372],
        [ 0.6224,  0.7488,  0.3198,  0.5833,  0.3416, -0.4925,  0.5098,  0.5543]],
       requires_grad=True)
acc  0 0.9860100746154785
acc  1 1.0850918292999268
acc  2 1.1982344388961792
1.0897787809371948 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([7, 2])
tensor([7, 2])
tensor([7, 2])
Parameter containing:
tensor([[-0.1800,  0.5129, -1.0032,  0.3196,  0.0514, -0.5954,  0.0855,  4.2833],
        [ 0.5888, -1.0912,  3.9582, -0.0523, -1.0889, -0.3947, -0.2580,  2.8419]],
       requires_grad=True)
Parameter containing:
tensor([[-0.3280,  0.4627, -1.0337,  0.4119, -0.0867, -0.6552, -0.0402,  4.0055],
        [ 0.4029, -1.2030,  3.7964, -0.2006, -0.8771, -0.4476, -0.3315,  2.6583]],
       requires_grad=True)
Parameter containing:
tensor([[-0.2142,  0.5780, -0.9209,  0.5226,  0.0307, -0.5437,  0.0750,  4.1148],
        [ 0.4998, -1.1193,  3.8874, -0.1092, -0.7901, -0.3519, -0.2455,  2.7524]],
       requires_grad=True)
acc  0 0.7494773864746094
acc  1 0.9880102276802063
acc  2 0.9205212593078613
0.8860029578208923 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 4])
tensor([1, 4])
tensor([1, 4])
Parameter containing:
tensor([[ 1.1638,  2.9612,  0.3002,  1.0268, -0.4601, -1.2295, -0.2754, -0.1593],
        [-0.5226, -0.1622,  1.0734, -0.6978,  5.7123, -0.9892,  0.3868, -0.3777]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.2954e+00,  3.1432e+00,  4.3045e-01,  1.1759e+00, -3.1820e-01,
         -1.1089e+00, -1.1387e-01,  1.3619e-03],
        [-5.0574e-01, -2.6818e-01,  1.2723e+00, -1.9441e+00,  5.8857e+00,
         -7.9839e-01,  4.3198e-01, -1.0636e-01]], requires_grad=True)
Parameter containing:
tensor([[ 1.2566,  3.0585,  0.4070,  1.1122, -0.3642, -1.1472, -0.2146, -0.0762],
        [-0.5977, -0.6234,  0.9630, -1.0134,  5.7826, -1.7748,  0.4028, -0.5578]],
       requires_grad=True)
acc  0 0.790428876876831
acc  1 0.8824794888496399
acc  2 0.8306794166564941
0.8345292607943217 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([7, 3])
tensor([7, 3])
tensor([7, 3])
Parameter containing:
tensor([[0.8240, 0.9963, 0.4517, 0.4505, 0.4386, 0.1903, 0.5942, 1.1747],
        [0.2062, 0.5924, 0.5659, 1.0663, 0.9862, 0.2994, 0.7158, 0.0989]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.6550,  0.8166,  0.2723,  0.2571,  0.2644,  0.0154,  0.4117,  0.9904],
        [-0.0028,  0.3813,  0.3526,  0.8576,  0.7809,  0.0866,  0.5071, -0.1106]],
       requires_grad=True)
Parameter containing:
tensor([[0.6248, 0.7803, 0.2612, 0.2015, 0.2279, 0.0062, 0.3809, 0.9436],
        [0.1215, 0.5551, 0.5395, 1.0178, 0.9532, 0.2798, 0.6494, 0.0851]],
       requires_grad=True)
acc  0 0.9024575352668762
acc  1 0.7893396019935608
acc  2 0.8772285580635071
0.8563418984413147 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([7, 4])
tensor([7, 4])
tensor([7, 4])
Parameter containing:
tensor([[ 0.3641,  0.2062,  0.7148,  0.1205, -0.6001,  0.9145,  1.0542,  1.4966],
        [-0.3702,  0.8257,  0.6663,  0.8872,  1.6785, -0.9537,  0.0463,  1.1996]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.4240,  0.3414,  0.8119,  0.2113, -0.8569,  0.9616,  1.1414,  1.5412],
        [-0.5024,  0.8075,  0.3462,  0.4886,  1.5675, -1.0724, -0.0857,  1.3160]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.3369,  0.2014,  0.7308,  0.1264, -0.6037,  0.8976,  1.0405,  1.4876],
        [-0.1351,  1.1360,  0.9005,  1.1177,  1.9490, -0.7150,  0.3013,  1.4366]],
       requires_grad=True)
acc  0 1.198880672454834
acc  1 1.0931272506713867
acc  2 0.7807074189186096
1.0242384473482768 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([5, 3])
tensor([5, 3])
tensor([5, 3])
Parameter containing:
tensor([[ 1.5433, -0.0578, -0.2508,  0.7573,  1.2030,  1.7682,  0.5166, -0.3561],
        [ 0.8276, -0.3706,  1.7141,  2.2412,  1.1417, -0.6572, -0.7030, -0.3757]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.4546, -0.1763, -0.3762,  0.6634,  1.1019,  1.6677,  0.3784, -0.4479],
        [ 1.0171, -0.2693,  1.9399,  2.4217,  1.3607, -0.4737, -0.6033, -0.3300]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.4941, -0.0948, -0.2990,  0.6988,  1.1633,  1.7319,  0.4788, -0.3714],
        [ 0.7594, -0.4507,  1.6547,  2.1606,  1.0675, -0.6774, -0.7747, -0.4120]],
       requires_grad=True)
acc  0 1.0164722204208374
acc  1 1.0288403034210205
acc  2 1.005828857421875
1.017047127087911 10
[Parameter containing:
tensor([[ 1.5433, -0.0578, -0.2508,  0.7573,  1.2030,  1.7682,  0.5166, -0.3561],
        [ 0.8276, -0.3706,  1.7141,  2.2412,  1.1417, -0.6572, -0.7030, -0.3757]],
       requires_grad=True),

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 1])
tensor([1, 1])
tensor([1, 1])
tensor([1, 1])
Parameter containing:
tensor([[ 1.1700,  1.3647,  0.3021,  0.3026],
        [ 0.7164,  2.7826,  0.1501, -1.6075]], requires_grad=True)
Parameter containing:
tensor([[ 1.0388,  1.2385,  0.1650,  0.1598],
        [ 0.8687,  2.9326,  0.3078, -1.5014]], requires_grad=True)
Parameter containing:
tensor([[ 0.9813,  1.1704,  0.1050,  0.0967],
        [ 0.6785,  2.7696,  0.1134, -1.7219]], requires_grad=True)
Parameter containing:
tensor([[ 0.6461,  0.8507, -0.2876, -0.2762],
        [ 0.6046,  2.6696,  0.0284, -1.7223]], requires_grad=True)
acc  0 1.0474865436553955
acc  1 1.0395358800888062
acc  2 1.1622310876846313
acc  3 0.998468279838562
1.0619304478168488 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 0])
tensor([0, 0])
tensor([0, 0])
tensor([0, 0])
Parameter containing:
tensor([[ 0.9449,  0.0418,  0.8317,  0.0993],
        [ 0.6681,  0.1401,  0.1022, -0.0196]], requires_grad=True)
Parameter containing:
tensor([[1.0936, 0.1559, 0.9600, 0.2365],
        [1.0190, 0.4897, 0.4602, 0.3271]], requires_grad=True)
Parameter containing:
tensor([[ 0.8904, -0.0075,  0.7755,  0.0410],
        [ 0.9566,  0.4303,  0.3948,  0.2650]], requires_grad=True)
Parameter containing:
tensor([[0.9477, 0.0575, 0.8364, 0.1040],
        [0.9585, 0.4266, 0.3981, 0.2703]], requires_grad=True)
acc  0 0.873377799987793
acc  1 1.043276071548462
acc  2 1.3920038938522339
acc  3 1.1825357675552368
1.1227983832359314 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 0])
tensor([2, 0])
tensor([2, 0])
tensor([2, 0])
Parameter containing:
tensor([[0.6005, 0.6458, 0.6657, 0.5004],
        [0.7512, 0.2020, 0.4324, 0.4714]], requires_grad=True)
Parameter containing:
tensor([[0.9000, 0.9410, 0.9671, 0.7964],
        [0.7688, 0.2173, 0.4507, 0.4896]], requires_grad=True)
Parameter containing:
tensor([[0.5820, 0.6214, 0.6477, 0.4718],
        [1.0162, 0.4621, 0.6939, 0.7349]], requires_grad=True)
Parameter containing:
tensor([[0.4077, 0.4491, 0.4696, 0.3004],
        [0.8340, 0.2756, 0.5084, 0.5477]], requires_grad=True)
acc  0 0.739852786064148
acc  1 1.006553053855896
acc  2 1.2229981422424316
acc  3 0.8764556050300598
0.9614648967981339 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 2])
tensor([2, 2])
tensor([2, 3])
tensor([2, 2])
Parameter containing:
tensor([[0.6359, 0.7294, 0.7327, 0.6325],
        [0.0245, 0.4149, 0.5091, 0.5001]], requires_grad=True)
Parameter containing:
tensor([[0.2225, 0.3135, 0.3184, 0.2203],
        [0.1661, 0.5544, 0.6471, 0.6364]], requires_grad=True)
Parameter containing:
tensor([[0.6910, 0.7746, 0.7827, 0.6836],
        [0.0816, 0.4882, 0.5664, 0.5683]], requires_grad=True)
Parameter containing:
tensor([[0.4943, 0.5881, 0.5922, 0.4892],
        [0.1675, 0.5590, 0.6511, 0.6450]], requires_grad=True)
acc  0 1.5188595056533813
acc  1 1.4469654560089111
acc  2 1.6398018598556519
acc  3 1.1725265979766846
1.4445383548736572 19


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 1])
tensor([1, 1])
tensor([1, 1])
tensor([1, 1])
Parameter containing:
tensor([[ 0.0667,  1.0150, -0.1123,  0.4287],
        [-0.9098,  4.0678, -0.6110, -0.4224]], requires_grad=True)
Parameter containing:
tensor([[ 0.0852,  1.0432, -0.0998,  0.4286],
        [-0.8046,  4.3843, -0.4290, -0.0244]], requires_grad=True)
Parameter containing:
tensor([[ 0.4047,  1.3288,  0.2136,  0.7448],
        [-1.0455,  4.0203, -0.8562, -0.5382]], requires_grad=True)
Parameter containing:
tensor([[ 0.2197,  1.1641,  0.0249,  0.5656],
        [-0.4615,  4.4954, -0.1484, -0.1378]], requires_grad=True)
acc  0 1.3364744186401367
acc  1 1.0579181909561157
acc  2 0.9461362361907959
acc  3 1.1324501037597656
1.1182447373867035 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 1])
tensor([0, 2])
tensor([0, 2])
tensor([0, 2])
Parameter containing:
tensor([[ 1.3859,  0.4127, -0.3809,  0.0286],
        [-0.4727,  1.6747,  1.5969, -0.2204]], requires_grad=True)
Parameter containing:
tensor([[ 1.5099,  0.4861, -0.1855,  0.3580],
        [-0.6947,  1.1680,  1.2256, -0.5707]], requires_grad=True)
Parameter containing:
tensor([[ 1.2913,  0.3312, -0.3907,  0.0951],
        [-0.5803,  1.2557,  1.3128, -0.4250]], requires_grad=True)
Parameter containing:
tensor([[ 1.4622,  0.5416, -0.1978,  0.3441],
        [-0.5882,  1.2437,  1.3067, -0.4469]], requires_grad=True)
acc  0 0.6863530278205872
acc  1 1.3414337635040283
acc  2 1.07477605342865
acc  3 1.033643364906311
1.034051552414894 19


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([3, 1])
tensor([3, 1])
tensor([3, 1])
tensor([3, 1])
Parameter containing:
tensor([[0.3646, 0.2339, 0.0632, 0.4550],
        [0.7365, 0.8719, 0.4148, 0.7483]], requires_grad=True)
Parameter containing:
tensor([[0.5632, 0.4358, 0.2533, 0.6464],
        [0.5290, 0.6636, 0.2076, 0.5420]], requires_grad=True)
Parameter containing:
tensor([[0.6417, 0.5211, 0.3322, 0.7254],
        [0.5142, 0.6536, 0.2053, 0.5291]], requires_grad=True)
Parameter containing:
tensor([[0.7282, 0.5974, 0.4198, 0.8206],
        [0.7018, 0.8384, 0.3794, 0.7168]], requires_grad=True)
acc  0 1.0114212036132812
acc  1 0.8525888919830322
acc  2 1.0628743171691895
acc  3 1.154809832572937
1.02042356133461 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 1])
tensor([0, 1])
tensor([0, 1])
tensor([0, 1])
Parameter containing:
tensor([[ 1.1317,  0.5264,  0.1814, -0.3413],
        [ 0.0173,  2.2170,  0.3581, -1.2477]], requires_grad=True)
Parameter containing:
tensor([[ 1.2505,  0.6430,  0.2931, -0.2285],
        [ 0.2221,  2.4249,  0.5578, -1.0434]], requires_grad=True)
Parameter containing:
tensor([[ 1.0644,  0.5061,  0.1377, -0.3578],
        [ 0.5644,  2.8757,  0.7738, -1.0816]], requires_grad=True)
Parameter containing:
tensor([[ 1.1868,  0.5814,  0.2327, -0.2863],
        [ 0.1806,  2.3794,  0.5254, -1.0951]], requires_grad=True)
acc  0 0.9416921734809875
acc  1 0.9682577848434448
acc  2 0.8785647749900818
acc  3 0.795127272605896
0.8959105014801025 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 3])
tensor([2, 3])
tensor([2, 3])
tensor([2, 3])
Parameter containing:
tensor([[ 0.2216,  0.3141,  0.6677, -0.0869],
        [ 0.2312, -0.2162,  0.4639,  1.0147]], requires_grad=True)
Parameter containing:
tensor([[0.4494, 0.5766, 0.8983, 0.1596],
        [0.4951, 0.0640, 0.7444, 1.3214]], requires_grad=True)
Parameter containing:
tensor([[ 0.3865,  0.4896,  0.8232,  0.0731],
        [ 0.2400, -0.2127,  0.4689,  1.0251]], requires_grad=True)
Parameter containing:
tensor([[ 0.2803,  0.3739,  0.7207, -0.0251],
        [ 0.2470, -0.2106,  0.4744,  1.0320]], requires_grad=True)
acc  0 1.2247847318649292
acc  1 0.947857141494751
acc  2 0.7873382568359375
acc  3 1.196722149848938
1.039175570011139 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 3])
tensor([1, 3])
tensor([1, 3])
tensor([1, 3])
Parameter containing:
tensor([[0.6599, 0.7320, 0.6119, 0.5256],
        [0.2663, 0.4794, 0.7555, 0.8228]], requires_grad=True)
Parameter containing:
tensor([[0.4418, 0.5135, 0.3808, 0.2992],
        [0.1617, 0.3662, 0.6336, 0.7321]], requires_grad=True)
Parameter containing:
tensor([[0.4751, 0.5523, 0.4296, 0.3410],
        [0.4836, 0.6905, 0.9605, 1.0474]], requires_grad=True)
Parameter containing:
tensor([[ 0.6097,  0.6833,  0.5570,  0.4686],
        [-0.0974,  0.1095,  0.3799,  0.4647]], requires_grad=True)
acc  0 1.0043009519577026
acc  1 0.9495274424552917
acc  2 0.7484109401702881
acc  3 0.8485256433486938
0.8876912444829941 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 0])
tensor([1, 0])
tensor([1, 0])
tensor([1, 0])
Parameter containing:
tensor([[0.3820, 1.4295, 0.3827, 0.4329],
        [1.1540, 0.0780, 0.8226, 0.8421]], requires_grad=True)
Parameter containing:
tensor([[0.1525, 1.1997, 0.1518, 0.1990],
        [1.1915, 0.1197, 0.8540, 0.8658]], requires_grad=True)
Parameter containing:
tensor([[0.3185, 1.3713, 0.3232, 0.3673],
        [1.2421, 0.1395, 0.9007, 0.9193]], requires_grad=True)
Parameter containing:
tensor([[ 0.4278,  1.4224,  0.4159,  0.4988],
        [ 0.7863, -0.2860,  0.4431,  0.4614]], requires_grad=True)
acc  0 1.1553704738616943
acc  1 1.231768012046814
acc  2 0.8300163745880127
acc  3 0.765038251876831
0.995548278093338 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 2])
tensor([0, 2])
tensor([0, 2])
tensor([0, 2])
Parameter containing:
tensor([[0.9474, 0.5431, 0.5933, 0.7254],
        [0.4680, 0.2321, 0.4825, 0.3514]], requires_grad=True)
Parameter containing:
tensor([[0.8642, 0.4596, 0.5099, 0.6456],
        [0.5955, 0.3602, 0.6004, 0.4778]], requires_grad=True)
Parameter containing:
tensor([[0.5206, 0.1192, 0.1690, 0.3048],
        [0.6031, 0.3649, 0.6214, 0.4885]], requires_grad=True)
Parameter containing:
tensor([[0.8182, 0.4131, 0.4720, 0.5965],
        [0.6002, 0.3665, 0.6202, 0.4769]], requires_grad=True)
acc  0 1.03626549243927
acc  1 0.9637779593467712
acc  2 1.1049296855926514
acc  3 1.1697195768356323
1.0686731785535812 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 1])
tensor([1, 1])
tensor([1, 1])
tensor([1, 1])
Parameter containing:
tensor([[0.7432, 0.8688, 0.7956, 0.4398],
        [0.3114, 0.7917, 0.5874, 0.6320]], requires_grad=True)
Parameter containing:
tensor([[ 0.2730,  0.4014,  0.3280, -0.0281],
        [ 0.3095,  0.7894,  0.5848,  0.6297]], requires_grad=True)
Parameter containing:
tensor([[0.5291, 0.6572, 0.5851, 0.2268],
        [0.4001, 0.8837, 0.6780, 0.7229]], requires_grad=True)
Parameter containing:
tensor([[0.4527, 0.5628, 0.5083, 0.1468],
        [0.2881, 0.7679, 0.5668, 0.6088]], requires_grad=True)
acc  0 1.4125264883041382
acc  1 1.242576003074646
acc  2 1.2005776166915894
acc  3 1.0709662437438965
1.2316615879535675 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 3])
tensor([0, 3])
tensor([2, 3])
tensor([0, 3])
Parameter containing:
tensor([[ 1.2652,  0.1611,  1.1216,  0.5369],
        [ 0.5815,  0.8251, -0.1796,  0.9775]], requires_grad=True)
Parameter containing:
tensor([[ 0.8204, -0.2747,  0.6802,  0.0896],
        [ 0.7598,  1.0067,  0.0128,  1.1594]], requires_grad=True)
Parameter containing:
tensor([[ 0.9819, -0.3836,  0.9837,  0.1735],
        [ 0.5625,  0.8112, -0.1942,  0.9601]], requires_grad=True)
Parameter containing:
tensor([[ 0.7546, -0.3424,  0.6100,  0.0244],
        [ 0.4854,  0.7293, -0.2697,  0.8808]], requires_grad=True)
acc  0 0.9249156713485718
acc  1 0.6123517751693726
acc  2 0.7485234141349792
acc  3 1.3463828563690186
0.9080434292554855 11


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([3, 1])
tensor([3, 1])
tensor([3, 1])
tensor([3, 1])
Parameter containing:
tensor([[ 0.7848,  0.7784,  0.0824,  1.1672],
        [ 0.5012,  1.1512,  0.5215, -0.2287]], requires_grad=True)
Parameter containing:
tensor([[ 0.6495,  0.6370, -0.0552,  1.0314],
        [ 0.4532,  1.1007,  0.4670, -0.2686]], requires_grad=True)
Parameter containing:
tensor([[ 0.5066,  0.4986, -0.2003,  0.8889],
        [ 0.2808,  0.9284,  0.2994, -0.4444]], requires_grad=True)
Parameter containing:
tensor([[ 0.6593,  0.6511, -0.0561,  1.0338],
        [ 0.5848,  1.2339,  0.6026, -0.1546]], requires_grad=True)
acc  0 1.0141016244888306
acc  1 0.983288049697876
acc  2 0.8903053402900696
acc  3 0.999933123588562
0.9719070345163345 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 0])
tensor([0, 0])
tensor([0, 0])
tensor([0, 0])
Parameter containing:
tensor([[ 2.2161, -1.1833,  2.1457, -1.7451],
        [ 4.0821,  0.2271, -1.7639, -0.5047]], requires_grad=True)
Parameter containing:
tensor([[ 2.5113, -0.8230,  2.4668, -1.4343],
        [ 4.0519,  0.1529, -1.8240, -0.5980]], requires_grad=True)
Parameter containing:
tensor([[ 2.3360, -1.0162,  2.2564, -1.6162],
        [ 3.7404, -0.0774, -2.0044, -0.8823]], requires_grad=True)
Parameter containing:
tensor([[ 2.1607, -1.1548,  2.1412, -1.7647],
        [ 4.1605,  0.3175, -1.5861, -0.4766]], requires_grad=True)
acc  0 0.8354969024658203
acc  1 0.8043392300605774
acc  2 0.9268838167190552
acc  3 0.9692284464836121
0.8839870989322662 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 2])
tensor([0, 2])
tensor([0, 2])
tensor([0, 2])
Parameter containing:
tensor([[ 2.5228, -0.7443, -0.5193,  0.2613],
        [ 2.0728, -2.0937,  3.2841, -1.4561]], requires_grad=True)
Parameter containing:
tensor([[ 2.6656, -0.6196, -0.3921,  0.3866],
        [ 2.2884, -2.1249,  3.4728, -1.2538]], requires_grad=True)
Parameter containing:
tensor([[ 2.5675, -0.6877, -0.4696,  0.3318],
        [ 2.3218, -1.8699,  3.5308, -1.2193]], requires_grad=True)
Parameter containing:
tensor([[ 2.5623, -0.6411, -0.5296,  0.3136],
        [ 2.2300, -2.3169,  3.5712, -1.2223]], requires_grad=True)
acc  0 1.1183773279190063
acc  1 1.2191166877746582
acc  2 0.9135628342628479
acc  3 0.8041596412658691
1.0138041228055954 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 3])
tensor([0, 3])
tensor([0, 3])
tensor([0, 3])
Parameter containing:
tensor([[ 0.9450,  0.6997, -0.0172,  0.5020],
        [ 0.7826,  0.0332,  0.8289,  1.3461]], requires_grad=True)
Parameter containing:
tensor([[ 1.1085,  0.8798,  0.1538,  0.6807],
        [ 0.4608, -0.2938,  0.5177,  1.0236]], requires_grad=True)
Parameter containing:
tensor([[ 0.8302,  0.6031, -0.1337,  0.3898],
        [ 0.5335, -0.2347,  0.5957,  1.0990]], requires_grad=True)
Parameter containing:
tensor([[ 0.5416,  0.3169, -0.4004,  0.1128],
        [ 0.6023, -0.1482,  0.6555,  1.1723]], requires_grad=True)
acc  0 1.3636822700500488
acc  1 1.7388947010040283
acc  2 1.47151780128479
acc  3 1.3593387603759766
1.483358383178711 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([3, 2])
tensor([3, 2])
tensor([3, 2])
tensor([3, 2])
Parameter containing:
tensor([[ 0.3835, -0.1605,  0.0768,  1.3917],
        [ 0.4896, -0.3358,  2.2852,  0.1402]], requires_grad=True)
Parameter containing:
tensor([[ 0.5761,  0.0394,  0.2701,  1.5914],
        [ 0.2233, -0.6147,  2.0155, -0.1213]], requires_grad=True)
Parameter containing:
tensor([[ 0.4878, -0.0303,  0.2005,  1.5094],
        [ 0.4519, -0.3840,  2.2386,  0.1023]], requires_grad=True)
Parameter containing:
tensor([[ 0.2761, -0.2633, -0.0550,  1.2993],
        [ 0.2206, -0.5758,  2.0562, -0.0928]], requires_grad=True)
acc  0 0.9576551914215088
acc  1 1.0009208917617798
acc  2 0.7426429390907288
acc  3 0.8096965551376343
0.8777288943529129 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 3])
tensor([2, 3])
tensor([2, 3])
tensor([2, 3])
Parameter containing:
tensor([[0.5164, 0.2632, 0.8780, 0.3251],
        [0.2361, 1.0737, 0.7273, 1.1040]], requires_grad=True)
Parameter containing:
tensor([[ 0.6644,  0.3930,  1.0365,  0.4874],
        [-0.2304,  0.5921,  0.2495,  0.6256]], requires_grad=True)
Parameter containing:
tensor([[ 0.7350,  0.4877,  1.0992,  0.5534],
        [-0.1951,  0.6344,  0.2794,  0.6716]], requires_grad=True)
Parameter containing:
tensor([[0.6945, 0.4440, 1.0593, 0.5137],
        [0.0432, 0.8738, 0.5244, 0.9102]], requires_grad=True)
acc  0 1.4239662885665894
acc  1 1.1132982969284058
acc  2 1.107771396636963
acc  3 1.3078858852386475
1.2382304668426514 10
[Parameter containing:
tensor([[0.5164, 0.2632, 0.8780, 0.3251],
        [0.2361, 1.0737, 0.7273, 1.1040]], requires_grad=True), Parameter containing:
tensor([[ 0.6644,  0.3930,  1.0365,  0.4874],
        [-0.2304,  0.5921,  0.2495,  0.6256]], requires_grad=True), Parameter containing:
tens

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([5, 7])
tensor([5, 7])
tensor([5, 7])
tensor([5, 7])
Parameter containing:
tensor([[ 0.6023,  0.5525,  0.7372,  0.1852,  0.2934,  1.7781,  0.7992,  0.2331],
        [ 0.6625,  0.9877,  0.7206, -0.4070,  0.6601,  0.0659,  1.0881,  1.4318]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.4822,  0.4177,  0.6169,  0.0603,  0.1765,  1.6524,  0.6747,  0.1123],
        [ 0.3255,  0.6415,  0.3875, -0.7425,  0.3196, -0.2787,  0.7587,  1.1015]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.5855,  0.5745,  0.6761,  0.1440,  0.2863,  1.7743,  0.7719,  0.1821],
        [ 0.6390,  1.0053,  0.8411, -0.3754,  0.6758, -0.1363,  1.0649,  1.3898]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.3527,  0.3651,  0.5650, -0.0678,  0.1631,  1.6480,  0.5933,  0.0195],
        [ 0.5147,  0.8297,  0.5604, -0.6059,  0.5126, -0.1535,  0.9295,  1.2701]],
       requires_grad=True)
acc  0 1.0655183792114258
acc  1 1.0416829586029053
acc  2 1.3087656497955322
acc  3

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([5, 2])
tensor([5, 2])
tensor([5, 2])
tensor([5, 2])
Parameter containing:
tensor([[ 0.5185,  0.2133,  0.5646,  0.4897,  0.1079,  0.7689,  0.1742, -0.0286],
        [ 0.4512,  0.0806,  1.5714,  0.4507,  0.6789,  0.6974,  0.6341,  0.1775]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.6550,  0.3303,  0.6563,  0.6035,  0.2247,  0.8832,  0.3045,  0.0888],
        [ 0.3679, -0.1011,  1.5050,  0.3494,  0.5903,  0.6186,  0.5726,  0.0965]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.7531,  0.4352,  0.7870,  0.7280,  0.3354,  1.0065,  0.4151,  0.1989],
        [ 0.3670, -0.0173,  1.4848,  0.3583,  0.5944,  0.6227,  0.5587,  0.0725]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.6999,  0.3945,  0.7586,  0.6834,  0.2925,  0.9544,  0.3744,  0.1568],
        [ 0.2346, -0.1467,  1.3466,  0.2282,  0.4578,  0.4838,  0.4233, -0.0506]],
       requires_grad=True)
acc  0 0.9992592930793762
acc  1 1.3077486753463745
acc  2 1.7362735271453857
acc  3

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 1])
tensor([1, 1])
tensor([1, 1])
tensor([1, 1])
Parameter containing:
tensor([[0.4582, 0.6373, 0.5713, 0.3484, 0.5977, 0.5903, 0.4988, 0.5675],
        [0.4601, 0.9486, 0.7078, 0.7731, 0.7018, 0.5303, 0.6430, 0.7662]],
       requires_grad=True)
Parameter containing:
tensor([[0.5901, 0.7595, 0.6933, 0.4460, 0.7252, 0.7092, 0.6206, 0.6861],
        [0.2399, 0.7346, 0.4913, 0.5505, 0.4811, 0.3177, 0.4281, 0.5494]],
       requires_grad=True)
Parameter containing:
tensor([[0.4096, 0.5800, 0.5123, 0.2809, 0.5283, 0.5365, 0.4410, 0.5096],
        [0.1409, 0.6417, 0.4021, 0.4638, 0.3944, 0.2233, 0.3444, 0.4629]],
       requires_grad=True)
Parameter containing:
tensor([[0.3753, 0.5457, 0.4940, 0.2699, 0.5134, 0.5281, 0.4189, 0.4910],
        [0.3123, 0.7974, 0.5472, 0.6166, 0.5410, 0.3713, 0.4877, 0.6164]],
       requires_grad=True)
acc  0 0.8645411729812622
acc  1 1.0436604022979736
acc  2 1.0680620670318604
acc  3 0.8577417135238647
0.9585013389587402 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 0])
tensor([1, 0])
tensor([1, 0])
tensor([1, 0])
Parameter containing:
tensor([[0.4964, 0.7086, 0.4220, 0.4529, 0.5498, 0.4627, 0.6524, 0.4344],
        [0.6753, 0.3696, 0.3864, 0.1622, 0.4075, 0.4670, 0.3874, 0.2234]],
       requires_grad=True)
Parameter containing:
tensor([[0.5532, 0.7649, 0.4826, 0.5061, 0.6142, 0.5180, 0.7075, 0.4898],
        [0.8141, 0.5073, 0.5269, 0.3027, 0.5451, 0.5993, 0.5276, 0.3635]],
       requires_grad=True)
Parameter containing:
tensor([[0.4583, 0.6622, 0.3827, 0.4044, 0.4902, 0.4223, 0.6116, 0.3950],
        [0.6533, 0.3505, 0.3651, 0.1433, 0.3807, 0.4534, 0.3665, 0.2082]],
       requires_grad=True)
Parameter containing:
tensor([[0.4423, 0.6535, 0.3651, 0.3971, 0.4974, 0.4059, 0.5959, 0.3791],
        [0.8392, 0.5272, 0.5488, 0.3281, 0.5677, 0.6314, 0.5523, 0.3930]],
       requires_grad=True)
acc  0 1.2656891345977783
acc  1 1.0238151550292969
acc  2 0.7818084359169006
acc  3 0.9780818223953247
1.0123486369848251 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([3, 7])
tensor([3, 7])
tensor([3, 7])
tensor([3, 7])
Parameter containing:
tensor([[ 0.1809, -0.1240,  0.6652,  1.8786, -0.0177, -0.1749,  0.6475,  0.4671],
        [ 0.3585,  0.2505,  2.0458, -1.1857, -0.5015,  0.6736, -0.6018,  3.5441]],
       requires_grad=True)
Parameter containing:
tensor([[ 2.8051e-01, -1.9141e-03,  7.6657e-01,  1.9823e+00,  5.7039e-02,
         -5.9253e-02,  7.0611e-01,  5.4098e-01],
        [ 4.1102e-01,  5.4990e-01,  2.3126e+00, -1.1912e+00, -6.2156e-01,
          7.8930e-01, -3.2545e-01,  3.7974e+00]], requires_grad=True)
Parameter containing:
tensor([[ 2.2016e-01, -8.4269e-02,  6.4840e-01,  1.9307e+00,  2.4713e-03,
         -1.3661e-01,  6.6509e-01,  4.2879e-01],
        [ 1.7785e-01,  6.2617e-02,  1.8776e+00, -1.4639e+00, -6.0786e-01,
          4.8575e-01, -7.6982e-01,  3.3701e+00]], requires_grad=True)
Parameter containing:
tensor([[ 0.1825, -0.1286,  0.6829,  1.8966,  0.0136, -0.1802,  0.6713,  0.4948],
        [ 0.3765,  0.2265,  2.0467, -1.1909,

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 6])
tensor([0, 6])
tensor([0, 6])
tensor([0, 6])
Parameter containing:
tensor([[ 1.0292,  0.6372,  0.5804,  0.8861,  0.1630,  0.2568,  0.4161,  0.0560],
        [-0.3020, -1.1238, -0.3087,  1.9454, -0.6646,  0.1172,  3.4048,  0.2285]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.9610,  0.4892,  0.1636,  0.9106,  0.1056,  0.0413,  0.2740, -0.0554],
        [-0.2168, -1.1952, -0.2135,  2.1111, -0.3423, -0.4607,  3.6668,  0.3161]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.9884,  0.5026,  0.4229,  0.8279,  0.1384,  0.1596,  0.2925, -0.0161],
        [-0.2933, -1.1661, -0.3274,  1.8310, -0.4488,  0.0045,  3.3102,  0.1220]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.9270,  0.4685,  0.3614,  0.7907,  0.0774,  0.1075,  0.2444, -0.0607],
        [-0.2774, -1.0996, -0.2484,  1.9011, -0.3642,  0.0352,  3.3882,  0.1868]],
       requires_grad=True)
acc  0 0.7483898401260376
acc  1 0.9507056474685669
acc  2 1.053282618522644
acc  3 

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([7, 0])
tensor([7, 0])
tensor([7, 0])
tensor([7, 0])
Parameter containing:
tensor([[0.3980, 0.3613, 0.3884, 0.4808, 0.5880, 0.5668, 0.3983, 0.7067],
        [1.0404, 0.2176, 0.0175, 0.7575, 0.5662, 0.7904, 0.3447, 0.1067]],
       requires_grad=True)
Parameter containing:
tensor([[0.4259, 0.3879, 0.4157, 0.5093, 0.6229, 0.5966, 0.4264, 0.7379],
        [1.2104, 0.3866, 0.1922, 0.9263, 0.7401, 0.9593, 0.5178, 0.2699]],
       requires_grad=True)
Parameter containing:
tensor([[0.4967, 0.4683, 0.4864, 0.5817, 0.6926, 0.6718, 0.4949, 0.8068],
        [1.2289, 0.4026, 0.2176, 0.9515, 0.7607, 0.9758, 0.5397, 0.2922]],
       requires_grad=True)
Parameter containing:
tensor([[0.5079, 0.4754, 0.4967, 0.5849, 0.6918, 0.6875, 0.4973, 0.8170],
        [1.1304, 0.3046, 0.1147, 0.8445, 0.6568, 0.8799, 0.4401, 0.1922]],
       requires_grad=True)
acc  0 1.020025372505188
acc  1 0.8260501623153687
acc  2 1.051666259765625
acc  3 1.1722925901412964
1.0175085961818695 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([5, 5])
tensor([5, 5])
tensor([5, 5])
tensor([5, 5])
Parameter containing:
tensor([[-0.2524,  0.1486,  0.3973, -0.3461,  0.4901,  2.0243,  1.2288, -0.8477],
        [ 1.5733, -0.7641,  0.1219, -0.7950, -0.1227,  4.7241, -0.0058, -0.6123]],
       requires_grad=True)
Parameter containing:
tensor([[-0.0537,  0.3529,  0.6030, -0.1394,  0.6966,  2.2308,  1.4342, -0.6417],
        [ 1.5676, -0.9158,  0.0739, -0.8596, -0.1721,  4.7010, -0.0372, -0.6530]],
       requires_grad=True)
Parameter containing:
tensor([[-0.3898,  0.2981,  0.5634, -0.2805,  0.6745,  2.2512,  1.4680, -0.7098],
        [ 1.9328, -0.5671,  0.3789, -2.1363,  0.2665,  5.0570, -0.4500, -1.0792]],
       requires_grad=True)
Parameter containing:
tensor([[-0.1794,  0.2303,  0.4762, -0.2670,  0.5757,  2.0960,  1.3222, -0.7641],
        [ 1.4619, -0.8772,  0.0125, -0.9217, -0.2268,  4.6151, -0.1164, -0.7280]],
       requires_grad=True)
acc  0 0.8880710601806641
acc  1 0.9163710474967957
acc  2 0.8292844891548157
acc  3

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 5])
tensor([0, 5])
tensor([0, 5])
tensor([0, 5])
Parameter containing:
tensor([[ 0.7897, -0.0425,  0.1366,  0.4025,  0.4923,  0.4472, -0.1821,  0.5664],
        [ 0.1580,  0.7428, -0.2846,  0.9807,  0.7440,  0.9931,  0.9671,  0.4076]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.8630,  0.0462,  0.2452,  0.5068,  0.5656,  0.4558, -0.0973,  0.7084],
        [-0.1156,  0.5149, -0.5209,  0.7606,  0.5309,  0.7727,  0.7423,  0.2078]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.9706,  0.1451,  0.3214,  0.6035,  0.6792,  0.6354,  0.0076,  0.7483],
        [-0.0688,  0.5135, -0.5099,  0.7639,  0.5210,  0.7716,  0.7476,  0.1918]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.0351,  0.2087,  0.3844,  0.6501,  0.7397,  0.6950,  0.0672,  0.8111],
        [-0.0359,  0.5518, -0.4748,  0.7889,  0.5542,  0.8053,  0.7781,  0.2214]],
       requires_grad=True)
acc  0 1.2502727508544922
acc  1 0.986155092716217
acc  2 0.8206369280815125
acc  3 

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 2])
tensor([2, 2])
tensor([2, 2])
tensor([2, 2])
Parameter containing:
tensor([[ 0.5187,  0.4434,  1.0625,  0.4354,  0.6627,  0.6184,  0.4848,  0.6275],
        [-0.2902,  0.1373,  1.4003,  0.8750,  0.5971,  0.1489, -0.0564,  0.7170]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.5399,  0.4693,  1.0551,  0.4489,  0.7045,  0.6350,  0.4872,  0.6400],
        [-0.3557,  0.0913,  1.3652,  0.8343,  0.5644,  0.0913, -0.0838,  0.6683]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.4279,  0.3262,  0.9410,  0.3240,  0.5606,  0.5080,  0.3661,  0.5236],
        [-0.2789,  0.1274,  1.3996,  0.8686,  0.5943,  0.1387, -0.0560,  0.7035]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.4325,  0.3443,  0.9555,  0.3334,  0.5695,  0.5188,  0.3849,  0.5331],
        [-0.1098,  0.2984,  1.5599,  1.0354,  0.7632,  0.3048,  0.1238,  0.8745]],
       requires_grad=True)
acc  0 0.962773859500885
acc  1 0.9194109439849854
acc  2 0.7240646481513977
acc  3 

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([4, 1])
tensor([4, 1])
tensor([4, 1])
tensor([4, 1])
Parameter containing:
tensor([[ 0.8947,  0.9563,  0.0727,  0.4632,  1.4349,  0.8270,  0.6282,  0.2469],
        [-0.7370,  1.0772,  0.9295,  0.9938,  0.7599,  0.5550,  0.7495,  0.4063]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.8894,  0.9571,  0.0782,  0.4730,  1.4470,  0.8453,  0.6264,  0.2657],
        [-0.8072,  1.0077,  0.8659,  0.9182,  0.6857,  0.4728,  0.6658,  0.3608]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.6252,  0.6972, -0.1976,  0.1890,  1.1722,  0.5617,  0.3492, -0.0151],
        [-0.8729,  0.9477,  0.7906,  0.8391,  0.5710,  0.3654,  0.5672,  0.2767]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.6908,  0.7439, -0.1134,  0.2609,  1.2232,  0.6632,  0.3542,  0.0238],
        [-0.8055,  0.9960,  0.8574,  0.9070,  0.6710,  0.4602,  0.6547,  0.3458]],
       requires_grad=True)
acc  0 0.9770259857177734
acc  1 1.0732537508010864
acc  2 0.9063047766685486
acc  3

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 7])
tensor([1, 7])
tensor([1, 7])
tensor([1, 7])
Parameter containing:
tensor([[0.3590, 1.2133, 0.9111, 0.4199, 0.5085, 0.2000, 0.0872, 0.6442],
        [0.8424, 0.5097, 0.1059, 0.2380, 0.0758, 0.5355, 0.8752, 1.3308]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.2173,  1.0656,  0.7601,  0.2787,  0.3654,  0.0564, -0.0541,  0.5022],
        [ 0.8270,  0.4886,  0.0886,  0.2175,  0.0579,  0.5141,  0.8581,  1.3119]],
       requires_grad=True)
Parameter containing:
tensor([[ 4.2650e-01,  1.2641e+00,  8.8839e-01,  4.0853e-01,  4.9445e-01,
          7.4845e-02,  9.9234e-02,  5.2014e-01],
        [ 7.2259e-01,  3.8794e-01, -1.2302e-03,  1.0598e-01, -4.4660e-02,
          3.9146e-01,  7.5736e-01,  1.2154e+00]], requires_grad=True)
Parameter containing:
tensor([[ 4.3812e-01,  1.2898e+00,  9.9097e-01,  5.2048e-01,  6.0923e-01,
          2.8247e-01,  1.9428e-01,  6.8772e-01],
        [ 6.3506e-01,  3.0456e-01, -1.0535e-01,  6.0464e-04, -1.3480e-01,
          3.5404e-01

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 7])
tensor([2, 7])
tensor([2, 7])
tensor([2, 7])
Parameter containing:
tensor([[0.6167, 0.7305, 0.9341, 0.5334, 0.5372, 0.6558, 0.6110, 0.5511],
        [0.2711, 0.5952, 0.2182, 0.3602, 0.6124, 0.1664, 0.4228, 0.6416]],
       requires_grad=True)
Parameter containing:
tensor([[0.5562, 0.6665, 0.8749, 0.4712, 0.4744, 0.5940, 0.5522, 0.4934],
        [0.3472, 0.6720, 0.2902, 0.4396, 0.6909, 0.2699, 0.4916, 0.7006]],
       requires_grad=True)
Parameter containing:
tensor([[0.3918, 0.5026, 0.7024, 0.3088, 0.3091, 0.4238, 0.3861, 0.3243],
        [0.4919, 0.8188, 0.4367, 0.5804, 0.8326, 0.4069, 0.6365, 0.8481]],
       requires_grad=True)
Parameter containing:
tensor([[0.5117, 0.6273, 0.8339, 0.4361, 0.4337, 0.5487, 0.5096, 0.4473],
        [0.3328, 0.6609, 0.2785, 0.4231, 0.6793, 0.2545, 0.4754, 0.6893]],
       requires_grad=True)
acc  0 0.8897070288658142
acc  1 0.8439292311668396
acc  2 0.9598750472068787
acc  3 1.020612120628357
0.9285308569669724 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([7, 4])
tensor([7, 4])
tensor([7, 4])
tensor([7, 4])
Parameter containing:
tensor([[ 0.3579,  0.3141,  0.4016,  0.8207,  0.8488,  0.9839,  0.4999,  1.0624],
        [ 0.6006,  0.3792,  0.4342,  0.2305,  1.3822, -0.0036,  0.3698,  0.8615]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.1751,  0.1536,  0.2203,  0.6430,  0.6705,  0.8204,  0.3221,  0.8900],
        [ 0.4369,  0.1985,  0.2659,  0.0560,  1.2051, -0.1747,  0.1989,  0.6860]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.1772,  0.2128,  0.2524,  0.6765,  0.7218,  0.8756,  0.3811,  0.9348],
        [ 0.5198,  0.3017,  0.3931,  0.1599,  1.3146, -0.1313,  0.2907,  0.7939]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.2360,  0.2143,  0.2810,  0.7021,  0.7321,  0.8737,  0.3837,  0.9424],
        [ 0.5885,  0.3539,  0.4219,  0.2139,  1.3587, -0.0239,  0.3499,  0.8414]],
       requires_grad=True)
acc  0 0.8368511199951172
acc  1 1.0707651376724243
acc  2 1.0352365970611572
acc  3

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 2])
tensor([0, 2])
tensor([0, 2])
tensor([4, 2])
Parameter containing:
tensor([[ 1.1560,  0.6572,  0.8625, -0.2279,  1.1242,  0.6120,  0.2982,  0.2757],
        [-0.0661,  0.5679,  1.6214,  0.5108, -0.0072,  0.0609,  1.4651, -0.1378]],
       requires_grad=True)
Parameter containing:
tensor([[ 8.9792e-01,  4.1009e-01,  6.1717e-01, -4.6179e-01,  8.9040e-01,
          3.6734e-01,  4.0840e-02, -3.9564e-03],
        [-7.4208e-04,  6.2680e-01,  1.6555e+00,  5.7769e-01,  6.7301e-02,
          1.9819e-01,  1.5233e+00, -9.3291e-02]], requires_grad=True)
Parameter containing:
tensor([[ 1.1203,  0.6375,  0.8408, -0.2374,  1.1154,  0.5858,  0.2667,  0.2290],
        [-0.1755,  0.4515,  1.4757,  0.4061, -0.1044,  0.0305,  1.3471, -0.2434]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.1489e+00,  6.8060e-01,  8.7957e-01, -2.3146e-01,  1.1506e+00,
          6.1082e-01,  2.4591e-01,  2.5386e-01],
        [-4.0781e-04,  6.3686e-01,  1.6717e+00,  5.8997e-01,  9.7464e-02,
    

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([5, 0])
tensor([5, 0])
tensor([5, 0])
tensor([5, 0])
Parameter containing:
tensor([[ 0.0816, -0.1388,  1.5774,  0.3042, -1.6177,  1.7850, -0.1992,  1.6816],
        [ 6.3520, -1.3646,  0.2433, -0.7744, -0.3350, -0.1854, -0.9929,  1.5608]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.1221, -0.2372,  1.6150,  0.0543, -1.7821,  1.7633, -0.4704,  1.6712],
        [ 6.6055, -1.5620, -0.0544, -2.1368, -0.2428,  0.2152, -0.9568,  1.9298]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.0826, -0.1804,  1.5986,  0.2196, -1.1862,  1.7092, -0.2354,  1.6344],
        [ 6.1105, -1.3800,  0.1175, -0.2391, -0.4120, -0.0465, -0.9294,  1.3024]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.2851,  0.0268,  1.7638,  0.4270, -0.9948,  1.9238, -0.0160,  1.8439],
        [ 6.2213, -1.2872,  0.1941, -0.1297, -0.2583,  0.0889, -0.8178,  1.3825]],
       requires_grad=True)
acc  0 0.8275737762451172
acc  1 0.8625928163528442
acc  2 0.9032430052757263
acc  3

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([6, 5])
tensor([6, 5])
tensor([6, 5])
tensor([6, 5])
Parameter containing:
tensor([[ 0.2251,  0.4617,  0.1441, -0.5605,  0.9710, -0.0880,  1.3230,  0.8513],
        [ 1.7145, -0.2633, -0.2490, -0.9118, -0.5391,  4.5174,  0.3800, -0.2257]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.3013,  0.6041,  0.2627, -0.3703,  1.1276,  0.0575,  1.5096,  1.0130],
        [ 1.8509, -0.2255, -0.1386, -1.7434, -1.0241,  5.1872,  0.2731, -0.2124]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.2844,  0.5307,  0.2587, -0.5124,  1.0613,  0.0096,  1.4505,  0.9493],
        [ 1.4929, -0.4883, -0.4880, -1.1325, -0.7792,  4.3041,  0.1263, -0.4537]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.4214,  0.4997,  0.3844, -0.6174,  1.0390, -0.0086,  1.5150,  1.0000],
        [ 1.7293, -0.3458, -0.4475, -0.8192, -1.0320,  4.4959,  0.3103, -0.2441]],
       requires_grad=True)
acc  0 0.8888953924179077
acc  1 1.0243977308273315
acc  2 0.9320665597915649
acc  3

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([4, 0])
tensor([4, 1])
tensor([4, 0])
tensor([4, 0])
Parameter containing:
tensor([[0.2231, 0.6451, 0.1599, 1.0632, 1.1975, 0.3941, 0.8016, 0.6358],
        [1.0265, 0.9760, 0.1466, 0.5220, 0.0758, 0.2187, 0.7111, 0.8543]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.0510,  0.4626, -0.0129,  0.8786,  1.0162,  0.2058,  0.6236,  0.4581],
        [ 0.8176,  0.8398, -0.0425,  0.3166, -0.1394, -0.0124,  0.4264,  0.6467]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.0114,  0.4291, -0.0523,  0.8438,  0.9863,  0.1795,  0.5952,  0.4332],
        [ 0.9893,  0.9448,  0.1029,  0.5034,  0.0310,  0.1422,  0.6745,  0.8132]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.0357,  0.4715, -0.0308,  0.9107,  1.0049,  0.2176,  0.6301,  0.4642],
        [ 0.8134,  0.7670, -0.0762,  0.2867, -0.1552, -0.0052,  0.4839,  0.6240]],
       requires_grad=True)
acc  0 1.0604045391082764
acc  1 1.7589465379714966
acc  2 1.002719521522522
acc  3 0.65746963024139

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([7, 2])
tensor([7, 2])
tensor([7, 2])
tensor([7, 2])
Parameter containing:
tensor([[ 0.2782,  0.3852,  0.8200,  0.1958, -0.4220,  1.4225,  0.1219,  1.4692],
        [-0.0226, -0.3431,  1.4425,  0.8254,  0.2202, -0.0180,  0.7402,  1.1352]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.2991,  0.4241,  0.8683,  0.2361, -0.3929,  1.4609,  0.1759,  1.5045],
        [-0.1607, -0.4800,  1.2842,  0.6778,  0.0602, -0.0934,  0.5862,  0.9910]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.2654,  0.4235,  0.8018,  0.1933, -0.4999,  1.4192,  0.1326,  1.4814],
        [ 0.2231, -0.0742,  1.6636,  1.0642,  0.4414,  0.3033,  0.9806,  1.3889]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.2057,  0.3114,  0.7319,  0.1224, -0.5010,  1.3564,  0.0676,  1.3781],
        [-0.1764, -0.4722,  1.2863,  0.6731,  0.0621, -0.1031,  0.5805,  0.9806]],
       requires_grad=True)
acc  0 0.9519301056861877
acc  1 0.9958717226982117
acc  2 0.738353431224823
acc  3 

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 2])
tensor([1, 2])
tensor([1, 2])
tensor([1, 2])
Parameter containing:
tensor([[0.7095, 0.9778, 0.6214, 0.5213, 0.3035, 0.6579, 0.6041, 0.7281],
        [0.2695, 0.6042, 0.7235, 0.3472, 0.6565, 0.2977, 0.2261, 0.6934]],
       requires_grad=True)
Parameter containing:
tensor([[0.5924, 0.8639, 0.5130, 0.4060, 0.1992, 0.5542, 0.5058, 0.6312],
        [0.4211, 0.7662, 0.8747, 0.5061, 0.8222, 0.4535, 0.3712, 0.8480]],
       requires_grad=True)
Parameter containing:
tensor([[0.6672, 0.9300, 0.5794, 0.4891, 0.2677, 0.6215, 0.5584, 0.6885],
        [0.2092, 0.5429, 0.6611, 0.2875, 0.5985, 0.2371, 0.1603, 0.6308]],
       requires_grad=True)
Parameter containing:
tensor([[0.4348, 0.6940, 0.3483, 0.2509, 0.0381, 0.3845, 0.3332, 0.4607],
        [0.3902, 0.7257, 0.8454, 0.4689, 0.7781, 0.4183, 0.3343, 0.8143]],
       requires_grad=True)
acc  0 1.2431328296661377
acc  1 0.8567472100257874
acc  2 0.6469011306762695
acc  3 1.1760374307632446
0.9807046502828598 10
[Parameter containing:

In [14]:
def rl(fname, lr=1e-3, lr0=0.1, lr2=.01):
    h = []
    accs = []
    param_num = []

    
    for k in range(RUN_NUM):
        torch.manual_seed(42+k)
        x_train, y_train, x_val, y_val, x_test, y_test = [],[],[],[],[],[]
        for angle in make_angles():
            (_x_train, _y_train), (_x_val, _y_val), (_x_test, _y_test) = gen(TRAIN_NUM, TEST_NUM, seed=42+k+int(angle),
                                                                             angle=angle)
            x_train.append(_x_train)
            y_train.append(_y_train)
            x_val.append(_x_val)
            y_val.append(_y_val)
            x_test.append(_x_test)
            y_test.append(_y_test)
        model = ArchModel()
        
    
        rs = np.random.RandomState(42+k)
        
        opt = torch.optim.SGD([{'params': model.rots.parameters(), 'lr': lr0},
                              {'params': model.models.parameters(), 'lr': lr}],
                              lr=lr)
                              
                              
        opt2 = torch.optim.SGD(model.gammas, lr=lr2)

        crit = torch.nn.MSELoss()
        tq = tqdm.tqdm_notebook(range(EPOCH_NUM )) # taking half of epoch num for NAS
        losses = []
        losses2 = []
        for e in tq:
            
            
            for d in range(D):
                opt2.zero_grad()
                gammas = torch.nn.functional.softmax(model.gammas[d])
                m = torch.distributions.Categorical(gammas)
                
            
                x = torch.tensor(x_val[d]).float()
                y = torch.tensor(y_val[d]).float()
                gamma_id = m.sample((x.shape[0],))
                log_prob = m.log_prob(gamma_id)
                onehot = torch.nn.functional.one_hot(gamma_id, HEAD_NUM)
            
            
                
                out = model(x, d, gammas = onehot, use_softmax = False)[:,0]
                loss2 = crit(out, y) 
                
                opt2.zero_grad()
                reward = -loss2 # more -> better
                loss_to_min = - (reward * log_prob) 
                loss_to_min.sum().backward()
                torch.nn.utils.clip_grad_value_(model.gammas, GRAD_CLIP)
                opt2.step()

                            
                opt.zero_grad()
                x = torch.tensor(x_train[d]).float()
                y = torch.tensor(y_train[d]).float()
                gamma_id = m.sample((x.shape[0],))
                log_prob = m.log_prob(gamma_id)
                onehot = torch.nn.functional.one_hot(gamma_id, HEAD_NUM)
            
                
                out = model(x, d, gammas = onehot, use_softmax = False)[:,0]
                loss = crit(out, y)
                
                loss.backward()
                torch.nn.utils.clip_grad_value_(model.parameters(), GRAD_CLIP)
                opt.step()
                losses.append(loss.item())
                losses2.append(loss2.item())
            
            tq.set_description(str(np.mean(losses))+';'+str(np.mean(losses2)))

        tq = tqdm.tqdm_notebook(range(EPOCH_NUM))
        
        # search step. Now we concatenate train and validation
        x_train = np.concatenate([x_train, x_val], axis=1)
        y_train =  np.concatenate([y_train, y_val], axis=1)
        
        
        for g in model.gammas:
            print (g.argmax(1))
        
        for g in model.gammas:
            print (g)
        
        opt = torch.optim.SGD([{'params': model.rots.parameters(), 'lr': lr0},
                              {'params': model.models.parameters(), 'lr': lr}],
                              lr=lr)
        
        for e in tq:

            for d in range(D):
                loss = 0.0
                opt.zero_grad()

                d2 = rs.choice([i for i in range(D) if i != d])

                x = torch.tensor(x_train[d]).float()
                y = torch.tensor(y_train[d]).float()
                out = model(x, d, use_softmax=False)[:,0]

               
                loss = crit(out, y) 
                
                
                loss.backward()
                torch.nn.utils.clip_grad_value_(model.parameters(), GRAD_CLIP)
                opt.step()
            angles= [str(r.angle.item()) for r in model.rots]
            #print (angles)
            tq.set_description(' '.join(angles))
        total = 0
        for d in range(D):

            out = model(torch.tensor(x_test[d]).float(), d, use_softmax=False)
            total += crit2(out[:,0], torch.tensor(y_test[d]).float()).item()
        
        acc = total/D
        accs.append(acc)
        param_num.append(calc_params(model.gammas))
        print (acc, param_num[-1])
        
    with open(fname+'.txt', 'a') as out:
        out.write(f'D={D}, HEAD_NUM={HEAD_NUM}, RUN_NUM={RUN_NUM}, CRIT={np.mean(accs)}+-{np.std(accs)}.  MIN={np.min(accs)}, MAX={np.max(accs)}')
        out.write(f' PARAMS={np.mean(param_num)}+-{np.std(param_num)}. MIN={np.min(param_num)}, MAX={np.max(param_num)}\n')
    
    with open(fname+'.jsonl', 'a') as out:
        out.write(json.dumps({'D': D, 'HEAD_NUM': HEAD_NUM, 'RUN_NUM': RUN_NUM, 'accs': accs, 'params': param_num})+'\n')
    print (model.gammas)
    for r in model.rots:
        print (r.angle)
    return (np.mean(accs), np.mean(param_num))


In [15]:
# helper for make inference for a model

for D in [2,3,4]:
    for HEAD_NUM in [D, 8]:
        rl("rl")

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  tq = tqdm.tqdm_notebook(range(EPOCH_NUM )) # taking half of epoch num for NAS


  0%|          | 0/1000 [00:00<?, ?it/s]

  gammas = torch.nn.functional.softmax(model.gammas[d])
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  tq = tqdm.tqdm_notebook(range(EPOCH_NUM))


  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 1])
tensor([1, 1])
Parameter containing:
tensor([[1.0206, 0.7767],
        [0.0293, 1.3129]], requires_grad=True)
Parameter containing:
tensor([[ 0.4217,  0.5696],
        [-0.2292,  1.2794]], requires_grad=True)
1.55710107088089 11


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 0])
tensor([1, 0])
Parameter containing:
tensor([[0.4622, 0.1883],
        [0.8286, 0.4386]], requires_grad=True)
Parameter containing:
tensor([[-0.4587,  0.6926],
        [ 0.4955,  0.1614]], requires_grad=True)
1.815471112728119 11


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 0])
tensor([1, 1])
Parameter containing:
tensor([[ 0.5188,  0.9315],
        [ 1.0206, -0.0584]], requires_grad=True)
Parameter containing:
tensor([[-0.1822,  1.5399],
        [ 0.0456,  0.4538]], requires_grad=True)
0.5248137563467026 19


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 1])
tensor([1, 0])
Parameter containing:
tensor([[0.1558, 0.9925],
        [0.4280, 1.1543]], requires_grad=True)
Parameter containing:
tensor([[0.0344, 0.5716],
        [0.7937, 0.0491]], requires_grad=True)
0.5438301414251328 19


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 0])
tensor([0, 1])
Parameter containing:
tensor([[0.3465, 0.3746],
        [0.6338, 0.0433]], requires_grad=True)
Parameter containing:
tensor([[ 1.5087,  0.0853],
        [-0.9572,  1.4878]], requires_grad=True)
1.3527682423591614 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 0])
tensor([1, 0])
Parameter containing:
tensor([[ 3.0836, -2.9806],
        [ 4.4168, -3.0735]], requires_grad=True)
Parameter containing:
tensor([[0.1445, 1.2100],
        [1.2131, 0.0109]], requires_grad=True)
1.1170843243598938 11


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 0])
tensor([1, 1])
Parameter containing:
tensor([[ 3.5174, -2.8601],
        [ 2.3799, -1.9205]], requires_grad=True)
Parameter containing:
tensor([[0.5224, 1.0089],
        [0.2005, 1.0396]], requires_grad=True)
0.7202798128128052 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 0])
tensor([1, 1])
Parameter containing:
tensor([[0.2468, 0.0768],
        [0.6305, 0.5440]], requires_grad=True)
Parameter containing:
tensor([[-2.0885,  2.9495],
        [ 0.0977,  0.3860]], requires_grad=True)
0.5993018597364426 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 1])
tensor([0, 1])
Parameter containing:
tensor([[-0.1162,  0.8030],
        [ 0.1511,  0.2786]], requires_grad=True)
Parameter containing:
tensor([[ 2.0536, -1.5081],
        [-0.9280,  1.8762]], requires_grad=True)
0.5912598073482513 11


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 0])
tensor([1, 0])
Parameter containing:
tensor([[ 1.3694,  0.3582],
        [ 2.3718, -1.5699]], requires_grad=True)
Parameter containing:
tensor([[0.5768, 0.6459],
        [0.7195, 0.3817]], requires_grad=True)
1.338460624217987 11


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 0])
tensor([0, 1])
Parameter containing:
tensor([[0.5734, 0.9266],
        [0.5997, 0.5274]], requires_grad=True)
Parameter containing:
tensor([[ 1.4065,  0.2460],
        [-0.1448,  1.3891]], requires_grad=True)
0.4611408859491348 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 0])
tensor([1, 1])
Parameter containing:
tensor([[0.5233, 0.9836],
        [0.7517, 0.5506]], requires_grad=True)
Parameter containing:
tensor([[-0.0138,  1.1622],
        [-0.2322,  0.6178]], requires_grad=True)
0.7332600951194763 19


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 0])
tensor([1, 0])
Parameter containing:
tensor([[0.4811, 0.7327],
        [1.0558, 0.5778]], requires_grad=True)
Parameter containing:
tensor([[0.3566, 0.7978],
        [0.7246, 0.4435]], requires_grad=True)
1.5007022023200989 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 1])
tensor([1, 0])
Parameter containing:
tensor([[-0.1267,  1.9002],
        [-0.6546,  1.9660]], requires_grad=True)
Parameter containing:
tensor([[0.2436, 1.1011],
        [0.7029, 0.1570]], requires_grad=True)
0.8329067230224609 19


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 1])
tensor([1, 0])
Parameter containing:
tensor([[0.7238, 0.7900],
        [0.2151, 1.0839]], requires_grad=True)
Parameter containing:
tensor([[0.2781, 0.9177],
        [0.3783, 0.3711]], requires_grad=True)
0.8191429376602173 19


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 1])
tensor([1, 1])
Parameter containing:
tensor([[-3.4501,  4.4686],
        [-3.6795,  4.0945]], requires_grad=True)
Parameter containing:
tensor([[-0.0615,  0.7162],
        [-0.0704,  1.4564]], requires_grad=True)
1.005465567111969 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 0])
tensor([0, 1])
Parameter containing:
tensor([[0.4223, 0.4005],
        [0.4655, 0.2321]], requires_grad=True)
Parameter containing:
tensor([[1.0867, 0.3049],
        [0.0946, 0.3210]], requires_grad=True)
0.9471522569656372 19


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 1])
tensor([0, 1])
Parameter containing:
tensor([[0.4198, 1.1151],
        [0.2770, 0.3176]], requires_grad=True)
Parameter containing:
tensor([[1.3545, 0.2411],
        [0.6857, 0.7094]], requires_grad=True)
1.2822698950767517 11


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 1])
tensor([0, 1])
Parameter containing:
tensor([[ 0.8512, -0.5168],
        [ 0.3904,  0.9668]], requires_grad=True)
Parameter containing:
tensor([[ 0.9652,  0.4674],
        [-0.2460,  1.3926]], requires_grad=True)
0.8888999223709106 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 1])
tensor([1, 0])
Parameter containing:
tensor([[0.0709, 0.7757],
        [0.2753, 0.8608]], requires_grad=True)
Parameter containing:
tensor([[0.5369, 0.8961],
        [1.1684, 0.5396]], requires_grad=True)
1.404140830039978 19
[Parameter containing:
tensor([[0.0709, 0.7757],
        [0.2753, 0.8608]], requires_grad=True), Parameter containing:
tensor([[0.5369, 0.8961],
        [1.1684, 0.5396]], requires_grad=True)]
Parameter containing:
tensor([34.0012], requires_grad=True)
Parameter containing:
tensor([61.0099], requires_grad=True)


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 0])
tensor([7, 2])
Parameter containing:
tensor([[0.6367, 1.2203, 0.3916, 0.5671, 0.3591, 0.7745, 0.3043, 0.9274],
        [1.3569, 0.1402, 0.8245, 0.4976, 0.6791, 0.6207, 0.7826, 0.3081]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.6264,  0.6552,  0.4692,  0.5364, -0.0694,  0.8497,  0.2615,  0.8640],
        [ 0.8414,  0.7306,  1.0623, -0.7283, -0.1178, -0.1557,  0.9252, -0.0446]],
       requires_grad=True)
1.8551229238510132 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 2])
tensor([5, 6])
Parameter containing:
tensor([[0.3549, 0.2759, 0.9013, 0.4210, 0.0391, 0.1246, 0.6450, 0.0467],
        [0.6288, 0.5544, 0.9097, 0.1287, 0.6896, 0.5867, 0.5100, 0.7341]],
       requires_grad=True)
Parameter containing:
tensor([[0.4852, 0.2983, 0.6146, 0.2293, 0.2331, 0.8041, 0.3078, 0.7739],
        [0.0136, 0.6246, 0.5479, 0.2226, 0.7865, 0.5419, 0.9068, 0.3553]],
       requires_grad=True)
1.401248723268509 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 2])
tensor([6, 7])
Parameter containing:
tensor([[ 1.3190,  0.7653,  0.0778,  0.1456,  0.1413,  1.0238,  0.3042,  0.4925],
        [ 0.9527,  0.9574,  1.3623,  0.7753,  0.6325, -0.1828,  0.6073,  0.4263]],
       requires_grad=True)
Parameter containing:
tensor([[0.4231, 0.4242, 0.9246, 0.4530, 1.0659, 0.6583, 1.1401, 0.1410],
        [0.3814, 0.4531, 0.0435, 0.7119, 0.4977, 0.3931, 0.1092, 1.2028]],
       requires_grad=True)
0.7201334834098816 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([3, 6])
tensor([3, 7])
Parameter containing:
tensor([[0.1761, 0.8130, 0.8601, 0.9404, 0.0978, 0.5588, 0.6678, 0.0651],
        [0.4849, 0.1823, 0.1653, 0.1606, 0.4170, 0.3199, 0.9970, 0.3519]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.5867,  0.8885,  0.7480,  2.0405,  0.0481, -0.1657, -0.0429,  0.5330],
        [ 0.3528,  1.0487,  0.2912,  0.3256,  0.2804,  0.6450,  0.1300,  1.1127]],
       requires_grad=True)
0.9628806412220001 19


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([5, 4])
tensor([6, 4])
Parameter containing:
tensor([[0.6648, 0.0351, 0.5207, 0.1261, 0.7798, 0.8657, 0.1169, 0.4136],
        [0.1729, 0.2935, 0.9030, 0.0680, 1.0077, 0.4482, 0.8602, 0.8300]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.7350,  1.5536,  0.5035, -0.1254,  0.0069, -0.2892,  2.4281, -0.5403],
        [-0.6901,  0.5481, -0.0459,  0.0098,  4.4134,  0.3784, -0.1085,  1.2169]],
       requires_grad=True)
1.1898483633995056 11


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([4, 7])
tensor([0, 7])
Parameter containing:
tensor([[-2.4461, -0.4147,  0.6283,  2.1554,  8.6273, -4.1542,  2.5282, -2.8994],
        [-2.0229, -1.7344, -2.1338,  0.6412,  1.9666, -1.3203,  0.8093,  7.0909]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.8545,  0.0206,  0.2032,  0.4474,  0.6760,  0.4025, -0.0059,  0.2917],
        [ 0.2959,  0.6417,  0.6942,  0.0326,  0.0911,  0.3259,  0.5026,  1.0813]],
       requires_grad=True)
0.7696866989135742 11


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([4, 1])
tensor([7, 3])
Parameter containing:
tensor([[ 0.0645, -2.1680, -1.0176,  0.6047,  5.6034,  0.6173, -0.2325,  0.4164],
        [ 1.1758,  2.4108,  0.0223,  0.6466, -0.2780, -0.5075,  1.7163, -1.3454]],
       requires_grad=True)
Parameter containing:
tensor([[0.4390, 0.4809, 0.5669, 0.8615, 0.2604, 0.0585, 0.4372, 1.0183],
        [0.8345, 0.3962, 0.5193, 1.0395, 0.5854, 0.3887, 0.8926, 0.5463]],
       requires_grad=True)
0.8885329365730286 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([3, 0])
tensor([7, 3])
Parameter containing:
tensor([[0.0332, 0.3155, 0.2427, 0.9018, 0.1340, 0.6894, 0.2920, 0.2342],
        [0.8874, 0.6735, 0.1666, 0.2202, 0.0643, 0.4053, 0.8283, 0.8739]],
       requires_grad=True)
Parameter containing:
tensor([[-0.0473,  1.1912, -1.6606, -0.8397,  0.8942, -0.8310,  1.5519,  4.2241],
        [ 0.9633, -1.9880,  0.9933,  3.8401,  0.0645, -1.1188,  0.3219,  0.6285]],
       requires_grad=True)
0.6589390337467194 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([6, 1])
tensor([3, 7])
Parameter containing:
tensor([[ 0.4552,  0.2762,  0.4357,  0.0513,  0.3794,  0.1846,  0.5471,  0.2806],
        [ 0.7089,  1.1150, -0.0427,  0.4645,  0.5812,  0.8189,  0.6025,  0.4605]],
       requires_grad=True)
Parameter containing:
tensor([[ 2.9750e-03, -9.0995e-02, -1.4791e+00,  3.0753e+00,  1.0791e+00,
          1.0400e+00, -3.1600e-01, -1.7466e-02],
        [ 2.5606e-01, -1.1460e+00, -1.3262e+00,  7.4393e-01,  9.2513e-01,
          6.4647e-02,  3.1383e-01,  3.0612e+00]], requires_grad=True)
1.106770247220993 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 7])
tensor([2, 2])
Parameter containing:
tensor([[ 1.2272,  1.1114,  0.5019,  0.1114,  0.6522,  0.1347,  0.7568,  0.3579],
        [ 0.4676,  0.1589, -0.2095,  0.3195,  0.4523,  0.8523,  0.4052,  1.0825]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.0660,  0.3988,  1.0953,  0.2348,  0.6562,  1.0553,  1.0015,  0.4720],
        [ 0.3213,  0.5626,  1.0419,  0.3837, -0.0898,  0.6270,  0.2117,  0.1169]],
       requires_grad=True)
0.768829345703125 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 2])
tensor([5, 2])
Parameter containing:
tensor([[1.2022, 0.9110, 0.4463, 0.5028, 0.8785, 0.2341, 0.5833, 0.7656],
        [0.3087, 0.5656, 1.0031, 0.2764, 0.7812, 0.9010, 0.7683, 0.1299]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.9683,  0.3023,  0.5912,  0.4603,  0.8470,  1.3756,  0.6997,  0.3377],
        [ 0.7807,  0.0400,  1.2736,  1.0803,  0.8309,  0.2547, -0.2382,  0.1476]],
       requires_grad=True)
0.7419363856315613 11


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([5, 0])
tensor([4, 7])
Parameter containing:
tensor([[-1.5746, -0.0358,  0.2165,  0.4156, -0.6817,  4.7822,  0.3070,  0.9140],
        [ 4.3661, -0.6891,  0.9735,  2.2065, -0.0416, -0.8659, -0.6722, -0.7641]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.0113,  0.1800, -0.3210,  0.8246,  0.9619,  0.7324,  0.1812,  0.6210],
        [ 0.2271,  0.1733,  0.6492,  0.7339,  0.5051,  0.6050,  0.6283,  0.8418]],
       requires_grad=True)
0.8581418991088867 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 5])
tensor([0, 7])
Parameter containing:
tensor([[ 0.5474,  0.4785,  1.2351,  0.6580,  0.3059,  0.9562,  0.5060,  0.4827],
        [ 0.8249,  0.4975,  0.1208, -0.3168, -0.0251,  1.1001,  0.9403,  0.1461]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.9442,  0.5401,  0.6646, -0.0233,  0.1905,  0.8245,  0.6308,  0.9115],
        [ 0.5254,  0.8070,  0.3620, -0.0273,  0.8472,  0.3775,  0.1526,  0.8578]],
       requires_grad=True)
0.5596269071102142 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 0])
tensor([1, 7])
Parameter containing:
tensor([[0.8097, 1.0148, 0.7244, 0.6245, 0.7910, 0.4477, 0.1099, 0.7673],
        [1.0315, 0.2483, 0.0383, 0.0066, 0.8736, 0.7918, 0.4359, 0.8284]],
       requires_grad=True)
Parameter containing:
tensor([[-0.0288,  3.1499, -0.0741, -0.8888,  0.0355, -0.5732,  0.8182,  1.4564],
        [ 0.9301, -0.6105, -0.2772, -0.3758, -0.5707, -0.3330,  0.3573,  3.7524]],
       requires_grad=True)
0.5953093618154526 19


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([5, 0])
tensor([1, 3])
Parameter containing:
tensor([[0.9452, 0.4135, 0.2159, 0.8585, 0.4693, 1.0025, 0.3677, 0.4852],
        [1.0716, 0.2902, 0.2300, 0.8462, 0.3626, 0.0681, 0.3715, 0.7746]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.0976,  0.8193,  0.5962, -0.0588,  0.5235,  0.4805,  0.2005,  0.0993],
        [ 0.6635,  0.3537,  0.6242,  1.2462,  0.4388,  0.2658,  0.4669,  0.4957]],
       requires_grad=True)
0.7458398342132568 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([6, 4])
tensor([2, 4])
Parameter containing:
tensor([[-0.4998, -0.0619,  2.8762, -3.8081, -0.8239,  0.0557,  7.4612, -1.7253],
        [ 2.0372, -0.3727,  0.5707, -1.5434,  7.3680,  0.5102, -5.7050,  1.6389]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.5523,  0.1847,  0.8491,  0.4769,  0.4414, -0.0042,  0.0015,  0.2345],
        [ 0.5122,  0.5135, -0.0936,  0.2344,  0.9408,  0.3776,  0.6480,  0.6648]],
       requires_grad=True)
0.7373203635215759 11


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([4, 6])
tensor([6, 6])
Parameter containing:
tensor([[ 0.1573,  0.5859,  0.3297,  0.2230,  0.6191,  0.4508,  0.6027,  0.3591],
        [ 0.4830,  0.7652,  0.0126,  0.7975, -0.0129,  0.6046,  1.2605,  0.5127]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.5935,  0.2930, -0.0344,  0.1544,  0.8301,  1.2160,  1.2238,  0.2289],
        [ 0.3485,  0.2994,  0.0180,  0.4771,  0.8773,  0.3858,  1.2622,  0.2989]],
       requires_grad=True)
1.3891696333885193 11


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 3])
tensor([1, 4])
Parameter containing:
tensor([[0.4815, 1.0550, 0.4460, 0.1004, 0.9761, 0.5569, 0.5176, 0.9869],
        [0.7289, 0.9110, 0.1249, 0.9173, 0.5844, 0.1613, 0.5668, 0.5365]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.9524,  1.2928,  0.2392,  0.0442,  0.4881, -0.1281,  0.0958,  0.6983],
        [-0.3323,  0.5943,  0.1019, -0.1057,  1.9129,  0.2289,  0.6121, -0.1594]],
       requires_grad=True)
1.0205858647823334 19


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([4, 2])
tensor([5, 0])
Parameter containing:
tensor([[0.0307, 0.3884, 0.6967, 0.4663, 0.9542, 0.5740, 0.5623, 0.5982],
        [0.1102, 0.3740, 1.0336, 0.9127, 0.0254, 0.3664, 0.7810, 0.3765]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.3138,  0.1040,  0.2107,  0.7352,  0.3427,  1.4685,  0.8404,  0.5606],
        [ 0.8982,  0.2633,  0.3497, -0.3091,  0.3731, -0.0120,  0.4356,  0.8665]],
       requires_grad=True)
1.530773937702179 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 2])
tensor([3, 2])
Parameter containing:
tensor([[-0.0743,  1.1157,  0.6476,  0.5723,  0.3935,  0.9053,  0.8261,  0.7376],
        [-0.0470,  0.6342,  0.8024,  0.6238,  0.6763,  0.5326,  0.3028,  0.2932]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.9169,  0.7709,  0.2875,  0.9786, -0.0350,  0.6118,  0.6356,  0.0994],
        [ 0.5516,  0.5079,  0.9967,  0.5922,  0.6895,  0.1394,  0.8966,  0.6893]],
       requires_grad=True)
0.9665644764900208 11
[Parameter containing:
tensor([[-0.0743,  1.1157,  0.6476,  0.5723,  0.3935,  0.9053,  0.8261,  0.7376],
        [-0.0470,  0.6342,  0.8024,  0.6238,  0.6763,  0.5326,  0.3028,  0.2932]],
       requires_grad=True), Parameter containing:
tensor([[ 0.9169,  0.7709,  0.2875,  0.9786, -0.0350,  0.6118,  0.6356,  0.0994],
        [ 0.5516,  0.5079,  0.9967,  0.5922,  0.6895,  0.1394,  0.8966,  0.6893]],
       requires_grad=True)]
Parameter containing:
tensor([116.6689], requires_grad=True)
Parameter containing:
tensor

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 0])
tensor([1, 1])
tensor([2, 1])
Parameter containing:
tensor([[0.5882, 0.5463, 1.0456],
        [1.2609, 0.0505, 0.6393]], requires_grad=True)
Parameter containing:
tensor([[0.6140, 0.8023, 0.5746],
        [0.3344, 1.1265, 0.2004]], requires_grad=True)
Parameter containing:
tensor([[0.8535, 0.2531, 1.0717],
        [0.6771, 0.9315, 0.2802]], requires_grad=True)
1.3444910844167073 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 0])
tensor([2, 0])
tensor([1, 1])
Parameter containing:
tensor([[0.6235, 0.0506, 0.8974],
        [0.4720, 0.0528, 0.0552]], requires_grad=True)
Parameter containing:
tensor([[-0.6622, -1.6378,  3.7045],
        [ 4.3239, -1.4180, -1.2076]], requires_grad=True)
Parameter containing:
tensor([[0.4423, 0.6719, 0.4760],
        [0.3001, 0.8517, 0.2577]], requires_grad=True)
1.1264281868934631 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 0])
tensor([2, 1])
tensor([0, 2])
Parameter containing:
tensor([[0.6651, 0.7889, 0.8241],
        [0.7335, 0.6995, 0.0591]], requires_grad=True)
Parameter containing:
tensor([[0.2366, 0.2056, 1.0408],
        [0.7754, 0.9590, 0.8866]], requires_grad=True)
Parameter containing:
tensor([[0.6766, 0.3596, 0.5547],
        [0.0314, 0.5250, 1.0257]], requires_grad=True)
0.7440009315808614 29


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 0])
tensor([0, 1])
tensor([0, 1])
Parameter containing:
tensor([[-0.1385,  1.8939,  0.0763],
        [ 1.2557,  0.1385,  0.1106]], requires_grad=True)
Parameter containing:
tensor([[ 0.7102,  0.3085,  0.6082],
        [-0.4482,  0.7798, -0.0410]], requires_grad=True)
Parameter containing:
tensor([[ 1.1166,  0.2768,  0.0766],
        [-0.6090,  1.6705,  0.8087]], requires_grad=True)
0.9262683987617493 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 2])
tensor([0, 1])
tensor([0, 2])
Parameter containing:
tensor([[0.7143, 0.0686, 0.4556],
        [0.3133, 0.6738, 0.7666]], requires_grad=True)
Parameter containing:
tensor([[0.3664, 0.2082, 0.1037],
        [0.1018, 0.8134, 0.3943]], requires_grad=True)
Parameter containing:
tensor([[ 1.4691, -0.1830,  0.9603],
        [ 0.4833,  0.4028,  1.4660]], requires_grad=True)
1.2255741953849792 19


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 2])
tensor([0, 0])
tensor([1, 1])
Parameter containing:
tensor([[ 2.4910, -0.9741, -0.9463],
        [ 0.3242, -0.9854,  2.8913]], requires_grad=True)
Parameter containing:
tensor([[ 1.6240,  0.4461, -0.6992],
        [ 0.8129,  0.4426,  0.7661]], requires_grad=True)
Parameter containing:
tensor([[ 0.1638,  0.5509, -0.0476],
        [-0.2326,  1.1400,  0.3326]], requires_grad=True)
1.0511013269424438 29


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 1])
tensor([2, 1])
tensor([2, 2])
Parameter containing:
tensor([[0.4609, 0.2022, 0.2370],
        [0.0655, 1.0863, 0.5962]], requires_grad=True)
Parameter containing:
tensor([[0.3249, 0.6472, 1.0288],
        [0.3791, 0.3919, 0.3669]], requires_grad=True)
Parameter containing:
tensor([[0.4230, 0.6239, 0.6516],
        [0.2595, 0.2915, 0.7420]], requires_grad=True)
1.059049367904663 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 0])
tensor([2, 0])
tensor([2, 0])
Parameter containing:
tensor([[0.1666, 0.2150, 0.1899],
        [0.8438, 0.1583, 0.7857]], requires_grad=True)
Parameter containing:
tensor([[0.2086, 0.3756, 0.8183],
        [0.4868, 0.2378, 0.3147]], requires_grad=True)
Parameter containing:
tensor([[-0.0728,  0.4629,  0.8750],
        [ 0.8955,  0.0812,  0.4272]], requires_grad=True)
0.814871609210968 11


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 0])
tensor([2, 0])
tensor([1, 0])
Parameter containing:
tensor([[ 0.2350,  0.2288,  0.6122],
        [ 0.5026,  0.2813, -0.1981]], requires_grad=True)
Parameter containing:
tensor([[-0.4663,  0.8144,  1.1927],
        [ 0.9849, -0.2450,  0.7512]], requires_grad=True)
Parameter containing:
tensor([[ 0.3805,  1.0598,  0.6618],
        [ 0.8433,  0.6594, -0.0320]], requires_grad=True)
0.8727778593699137 11


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 1])
tensor([0, 0])
tensor([0, 2])
Parameter containing:
tensor([[ 2.0590, -0.2419,  0.3569],
        [ 0.4687,  0.8556,  0.2540]], requires_grad=True)
Parameter containing:
tensor([[0.9916, 0.1557, 0.7125],
        [0.5194, 0.0710, 0.2863]], requires_grad=True)
Parameter containing:
tensor([[ 0.6038,  0.0178,  0.5679],
        [-0.3163, -1.3074,  2.7292]], requires_grad=True)
1.287734031677246 28


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 2])
tensor([1, 1])
tensor([2, 1])
Parameter containing:
tensor([[0.4433, 0.3564, 1.3865],
        [0.2992, 0.6048, 1.1894]], requires_grad=True)
Parameter containing:
tensor([[-0.0683,  0.9962,  0.4772],
        [ 0.8150,  0.9477, -0.2204]], requires_grad=True)
Parameter containing:
tensor([[0.6653, 0.9430, 1.1116],
        [0.4123, 0.9094, 0.3114]], requires_grad=True)
0.8971735835075378 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 2])
tensor([1, 2])
tensor([0, 1])
Parameter containing:
tensor([[-0.5336, -0.0963,  2.7951],
        [-0.4780, -1.1493,  3.4197]], requires_grad=True)
Parameter containing:
tensor([[-2.4605,  4.5755, -0.7516],
        [-3.4694,  2.2252,  2.7456]], requires_grad=True)
Parameter containing:
tensor([[0.8240, 0.0703, 0.7551],
        [0.3830, 0.4557, 0.3387]], requires_grad=True)
1.2085296312967937 21


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 2])
tensor([0, 2])
tensor([0, 0])
Parameter containing:
tensor([[0.6390, 0.7449, 0.8192],
        [0.5713, 0.3026, 0.9248]], requires_grad=True)
Parameter containing:
tensor([[0.7331, 0.4206, 0.2174],
        [0.2766, 0.1998, 0.2949]], requires_grad=True)
Parameter containing:
tensor([[0.8671, 0.4480, 0.4552],
        [0.8603, 0.5700, 0.5171]], requires_grad=True)
0.4739286204179128 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 2])
tensor([2, 0])
tensor([0, 1])
Parameter containing:
tensor([[0.7276, 1.2009, 0.5889],
        [0.6255, 0.5837, 0.7028]], requires_grad=True)
Parameter containing:
tensor([[ 0.1395,  0.8120,  0.8761],
        [ 0.3019,  0.0772, -0.0313]], requires_grad=True)
Parameter containing:
tensor([[ 1.5743,  0.9386, -0.3744],
        [ 0.6815,  1.0145,  0.1538]], requires_grad=True)
1.4140515128771465 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 0])
tensor([2, 2])
tensor([2, 0])
Parameter containing:
tensor([[0.8380, 0.8554, 0.1739],
        [0.9209, 0.5799, 0.6404]], requires_grad=True)
Parameter containing:
tensor([[0.0878, 0.5906, 0.8092],
        [0.6677, 0.1718, 0.6848]], requires_grad=True)
Parameter containing:
tensor([[0.2545, 0.1122, 0.7598],
        [0.9391, 0.6871, 0.1031]], requires_grad=True)
0.6367211838563284 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 0])
tensor([2, 2])
tensor([1, 1])
Parameter containing:
tensor([[-1.5206, -2.0044,  4.5456],
        [ 5.1178, -2.2872, -1.7631]], requires_grad=True)
Parameter containing:
tensor([[0.5467, 0.8078, 0.9162],
        [0.4076, 0.5467, 0.8819]], requires_grad=True)
Parameter containing:
tensor([[ 0.3424,  0.9354, -0.0024],
        [ 0.1737,  0.7310,  0.3188]], requires_grad=True)
0.9922994573911031 29


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 1])
tensor([2, 0])
tensor([2, 1])
Parameter containing:
tensor([[-0.5132,  1.7755,  0.0694],
        [-0.1414,  1.2551,  0.4668]], requires_grad=True)
Parameter containing:
tensor([[ 0.1908, -1.7572,  2.3531],
        [ 3.4228, -1.0899, -0.6635]], requires_grad=True)
Parameter containing:
tensor([[-1.1391, -0.9045,  3.9080],
        [-2.3320,  4.0967, -0.1510]], requires_grad=True)
0.6647140383720398 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 1])
tensor([1, 2])
tensor([0, 0])
Parameter containing:
tensor([[0.6160, 1.2014, 0.1324],
        [0.0818, 1.1698, 0.5238]], requires_grad=True)
Parameter containing:
tensor([[0.4612, 0.9265, 0.8423],
        [0.9301, 0.0919, 0.9660]], requires_grad=True)
Parameter containing:
tensor([[ 1.2180, -0.0144, -0.0599],
        [ 1.0848, -0.3711,  0.7920]], requires_grad=True)
0.7438644369443258 29


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 1])
tensor([1, 1])
tensor([2, 2])
Parameter containing:
tensor([[-0.0277,  0.2424,  0.9777],
        [ 0.4662,  0.9181,  0.5475]], requires_grad=True)
Parameter containing:
tensor([[-0.4678,  2.2914, -0.5324],
        [ 1.5016,  1.8787, -1.0479]], requires_grad=True)
Parameter containing:
tensor([[ 0.1593,  0.2353,  0.7865],
        [-0.8572,  0.4625,  1.3812]], requires_grad=True)
1.1285860935846965 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 0])
tensor([0, 2])
tensor([0, 1])
Parameter containing:
tensor([[0.2512, 0.6563, 0.3647],
        [1.0616, 0.7187, 0.3633]], requires_grad=True)
Parameter containing:
tensor([[ 2.5382, -0.9684,  0.1593],
        [-2.6303,  1.6496,  3.5409]], requires_grad=True)
Parameter containing:
tensor([[ 0.6531,  0.4098,  0.1049],
        [-0.0522,  1.1570,  0.6642]], requires_grad=True)
1.2117284536361694 29
[Parameter containing:
tensor([[0.2512, 0.6563, 0.3647],
        [1.0616, 0.7187, 0.3633]], requires_grad=True), Parameter containing:
tensor([[ 2.5382, -0.9684,  0.1593],
        [-2.6303,  1.6496,  3.5409]], requires_grad=True), Parameter containing:
tensor([[ 0.6531,  0.4098,  0.1049],
        [-0.0522,  1.1570,  0.6642]], requires_grad=True)]
Parameter containing:
tensor([13.6417], requires_grad=True)
Parameter containing:
tensor([62.6022], requires_grad=True)
Parameter containing:
tensor([-64.4072], requires_grad=True)


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([3, 2])
tensor([0, 6])
tensor([5, 2])
Parameter containing:
tensor([[0.9592, 0.8414, 0.4616, 1.0198, 0.2665, 0.5953, 0.2492, 0.7881],
        [0.9889, 0.1281, 1.0576, 0.6350, 0.7801, 0.5440, 0.7815, 0.2946]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.5863e+00,  5.2789e-03,  1.0739e-01,  4.8324e-01,  1.0035e-01,
          3.3150e-01,  2.4288e-01,  1.3361e+00],
        [-8.7098e-03,  7.0918e-01, -2.3276e-01, -2.5055e-04,  2.6642e-01,
         -1.3004e-01,  1.8913e+00,  1.8023e-02]], requires_grad=True)
Parameter containing:
tensor([[0.7780, 0.7549, 0.2704, 0.8653, 0.6721, 0.8920, 0.6033, 0.1587],
        [0.7624, 0.3307, 0.9308, 0.7238, 0.7203, 0.1841, 0.8601, 0.5920]],
       requires_grad=True)
0.825905978679657 21


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([6, 2])
tensor([7, 6])
tensor([0, 0])
Parameter containing:
tensor([[ 0.3205,  0.1782,  0.7599,  0.2827,  0.1202,  0.2487,  0.9327, -0.0345],
        [ 0.6570,  0.4239,  1.0330,  0.2005,  0.4702,  0.4165,  0.5459,  0.9949]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.0380, -0.8651,  1.0708, -0.3090,  0.2998, -0.8335,  0.0670,  4.2781],
        [-1.0933,  1.0169, -0.3795,  1.1033, -0.0894,  0.2238,  3.9341, -0.7167]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.4641,  0.8329, -0.7831,  0.3727,  1.2164,  0.4921,  0.4867,  0.5777],
        [ 1.3694,  0.6171,  0.7961,  0.4950,  0.1605,  0.8752, -0.5672,  0.2949]],
       requires_grad=True)
0.6814954578876495 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 1])
tensor([6, 7])
tensor([5, 5])
Parameter containing:
tensor([[ 0.9654,  0.4416,  0.6019,  0.4039,  0.8192,  0.8206,  0.0104,  0.2066],
        [ 0.6682,  1.4936,  0.9145,  0.8990,  0.8494, -0.1330,  0.4200,  0.4192]],
       requires_grad=True)
Parameter containing:
tensor([[0.5292, 0.6294, 0.7742, 0.2799, 0.8972, 0.6247, 0.9500, 0.5454],
        [0.5266, 0.3180, 0.0842, 0.7748, 0.7817, 0.4101, 0.0674, 0.8298]],
       requires_grad=True)
Parameter containing:
tensor([[-0.2985, -0.2570,  0.4862, -0.1811,  0.3246,  4.3507, -0.7535,  0.1269],
        [-0.2809,  1.8954, -0.6749, -0.6624, -0.5638,  3.4099,  0.2749, -0.3247]],
       requires_grad=True)
0.9583070377508799 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([6, 0])
tensor([1, 1])
tensor([1, 0])
Parameter containing:
tensor([[0.1416, 0.7172, 0.8361, 0.8994, 0.0056, 0.6211, 0.9132, 0.0451],
        [0.9855, 0.2163, 0.1034, 0.1248, 0.3528, 0.2641, 0.5045, 0.5275]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.7287,  3.0156,  0.1357, -0.1019,  0.2348, -0.2404,  0.4335,  0.4302],
        [ 0.1759,  1.4161,  0.7742,  0.8773,  0.5315, -0.0935, -0.2687,  0.7735]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.2134,  1.4075,  0.1138,  0.1285,  0.3599,  0.4200,  0.8389, -0.6554],
        [ 1.3519, -0.2940,  0.9631,  0.3537, -0.5432,  0.5970,  0.7352, -0.2427]],
       requires_grad=True)
0.6672248244285583 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([4, 4])
tensor([3, 6])
tensor([0, 5])
Parameter containing:
tensor([[0.6410, 0.0738, 0.4511, 0.1376, 0.8735, 0.7832, 0.0410, 0.5215],
        [0.1834, 0.3572, 0.9601, 0.0686, 0.9730, 0.4138, 0.7740, 0.8535]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.0917,  0.2560,  0.3851,  1.7034,  0.2536,  0.9389,  0.7680, -0.1243],
        [ 0.0047,  0.2307,  0.6995,  0.4787,  1.0233,  1.0845,  1.1310,  1.0698]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.8547,  0.3407,  0.4150,  0.6733,  0.5952,  0.2933,  0.1515,  0.3509],
        [ 0.1310,  0.5058,  0.8298,  0.1849,  0.5199,  0.8464,  0.1874, -0.0728]],
       requires_grad=True)
1.190451721350352 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 3])
tensor([4, 6])
tensor([7, 7])
Parameter containing:
tensor([[-0.0288, -1.7549,  5.3845, -0.1632,  0.4770, -0.5502,  0.0890,  0.5714],
        [-0.2791, -1.1796, -1.3197,  4.6192,  0.2074, -0.0517,  0.1883,  1.1117]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.6865,  0.0284, -0.3459,  0.0926,  1.6782, -0.1894, -0.0438,  0.9834],
        [ 0.6715, -0.1889,  0.6468,  0.5572,  0.0970, -0.4250,  1.4699,  0.8368]],
       requires_grad=True)
Parameter containing:
tensor([[  6.2388,   2.9743, -13.5052,   2.9998,   2.8817,  -7.5142,   9.0624,
          15.4194],
        [-12.3358, -14.2301,   9.0296,  12.3296,  12.6762,   7.7643,  -0.6609,
          13.3495]], requires_grad=True)
0.8297267556190491 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([6, 6])
tensor([2, 0])
tensor([5, 3])
Parameter containing:
tensor([[ 0.5619, -0.4887, -1.3923,  0.2175,  0.2931,  0.3860,  2.8554,  1.4553],
        [ 1.6183, -1.8336, -1.3480, -0.1470,  1.6507,  0.7674,  2.3858,  0.7473]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.3891, -0.3885,  7.5619,  2.7712, -2.2621, -0.8622, -3.3888, -0.6979],
        [12.1851, -1.4900, -0.2231, -1.9282, -0.4707, -1.5552, -1.1939, -0.1217]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.9024,  0.7231,  0.2812,  0.4169, -0.4677,  1.4364,  1.2443,  0.1625],
        [ 1.1283,  1.0334,  0.5744,  1.2609,  0.3428,  1.1220, -0.8211,  0.7285]],
       requires_grad=True)
2.538789987564087 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([3, 7])
tensor([4, 7])
tensor([6, 1])
Parameter containing:
tensor([[ 0.0311,  0.2749,  0.2342,  0.8979,  0.2637,  0.6259,  0.3122,  0.2030],
        [ 0.8020,  0.6788,  0.2874,  0.2974, -0.0591,  0.2679,  0.8910,  0.9542]],
       requires_grad=True)
Parameter containing:
tensor([[-0.0143,  0.3075,  0.3041,  0.5121,  0.9663,  0.8926,  0.6817,  0.8329],
        [ 0.1107,  0.2144,  0.5389,  0.5270,  0.3056,  0.3687,  0.8074,  0.8324]],
       requires_grad=True)
Parameter containing:
tensor([[0.3277, 0.7503, 0.0672, 0.3822, 0.2978, 0.3562, 0.8805, 0.8131],
        [0.7429, 0.9402, 0.0879, 0.1376, 0.2829, 0.3511, 0.3693, 0.4908]],
       requires_grad=True)
1.2246835033098857 21


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 1])
tensor([4, 4])
tensor([7, 0])
Parameter containing:
tensor([[0.6272, 0.0317, 0.3501, 0.0513, 0.4491, 0.1347, 0.4538, 0.5122],
        [0.5881, 0.9895, 0.0904, 0.3605, 0.4949, 0.9841, 0.6767, 0.5246]],
       requires_grad=True)
Parameter containing:
tensor([[0.4960, 0.2432, 0.3516, 0.2540, 0.9988, 0.3916, 0.1431, 0.4155],
        [0.2039, 0.4629, 0.2736, 0.4573, 0.8273, 0.4170, 0.1067, 0.1438]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.2505,  0.2215,  0.4966,  0.7419,  0.5362,  0.5968,  0.2566,  1.0112],
        [ 1.0577, -0.0011,  0.0272,  0.4002,  0.4630,  0.0271,  0.1738,  0.7828]],
       requires_grad=True)
0.5923577745755514 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([7, 5])
tensor([6, 2])
tensor([1, 6])
Parameter containing:
tensor([[-0.1862,  1.7120, -0.3002,  1.6185, -0.5292,  0.3237,  0.3872,  1.8277],
        [ 1.1667,  0.1454,  0.3866,  0.5749, -0.8258,  1.7547, -0.1888,  0.5151]],
       requires_grad=True)
Parameter containing:
tensor([[-0.0344,  0.4266,  0.8226,  0.3963,  0.5085,  0.9945,  1.0579,  0.8080],
        [ 0.6228,  0.4208,  0.8606,  0.3489, -0.0181,  0.4644,  0.1668,  0.3092]],
       requires_grad=True)
Parameter containing:
tensor([[-0.0425,  1.1399,  0.8739,  0.8259, -0.1169,  0.0321,  0.3236,  0.9413],
        [-0.0745, -0.8120,  0.9745, -0.0813,  1.2966,  0.1296,  2.4826, -0.4184]],
       requires_grad=True)
0.7961198488871256 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 6])
tensor([4, 4])
tensor([6, 3])
Parameter containing:
tensor([[0.7391, 1.0297, 0.8188, 0.4238, 0.9679, 0.8497, 0.2315, 0.4634],
        [0.1639, 0.6363, 0.8063, 0.1560, 0.8168, 0.9652, 1.0292, 0.1603]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.7947, -0.0682,  0.4184,  0.8319,  1.1671,  0.2410,  1.0544,  1.1428],
        [ 1.0562, -0.1698,  0.5086,  0.8176,  1.1920,  0.7529,  0.2751, -0.2630]],
       requires_grad=True)
Parameter containing:
tensor([[0.2378, 0.4823, 0.4984, 0.4057, 0.2462, 0.0836, 1.0759, 0.3519],
        [0.5026, 0.4347, 0.2764, 1.0952, 0.1305, 0.7675, 0.1600, 0.1179]],
       requires_grad=True)
0.7907640337944031 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 1])
tensor([0, 7])
tensor([2, 7])
Parameter containing:
tensor([[ 0.8374,  3.2104,  0.8213,  0.5740, -0.2409,  0.8877, -0.6029, -1.1438],
        [-0.1794,  3.9900, -1.5387,  0.6392,  0.7061,  1.2422, -1.1945,  0.8484]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.6263,  0.1737, -0.0984,  0.2936,  0.6016,  0.5967,  0.5811,  0.4169],
        [ 0.2397,  0.4591,  0.9799,  0.7060,  0.0993,  0.4449,  0.4226,  1.0120]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.4751,  0.8338,  1.1870,  0.2383,  0.2516,  0.1422,  0.0309,  1.0174],
        [ 0.5422,  0.8512, -0.2142,  0.0880,  0.0412,  0.4175,  0.8237,  0.9854]],
       requires_grad=True)
0.8972276051839193 21


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 6])
tensor([0, 7])
tensor([1, 0])
Parameter containing:
tensor([[0.6552, 0.6492, 0.9920, 0.5104, 0.3728, 0.8930, 0.7521, 0.3451],
        [0.2254, 0.5087, 0.1382, 0.1621, 0.2569, 0.6164, 0.8089, 0.5710]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.1234,  0.4397,  0.4847, -0.1533,  0.3209,  0.9658,  0.8424,  0.6593],
        [ 0.5146,  0.7212,  0.5614,  0.0693,  0.5364,  0.4272,  0.1289,  0.9430]],
       requires_grad=True)
Parameter containing:
tensor([[0.4644, 0.9334, 0.4776, 0.5931, 0.3434, 0.1749, 0.0625, 0.2996],
        [1.1240, 0.3473, 0.0650, 0.5534, 0.7154, 0.3332, 0.9458, 0.9675]],
       requires_grad=True)
1.273648977279663 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 5])
tensor([5, 7])
tensor([0, 5])
Parameter containing:
tensor([[ 0.6880,  6.2486,  0.8523,  0.4268,  0.5984, -2.5869, -0.1083, -0.8295],
        [-1.2879, -1.0065, -0.0128,  0.1277, -2.7553,  7.7448,  1.6255, -0.1810]],
       requires_grad=True)
Parameter containing:
tensor([[0.7061, 0.3756, 0.2922, 0.3888, 0.2147, 0.8301, 0.5722, 0.5154],
        [0.3940, 0.2035, 0.4032, 0.0967, 0.5909, 0.4127, 0.0265, 0.7451]],
       requires_grad=True)
Parameter containing:
tensor([[ 2.0895, -1.4866,  0.6748,  0.4940,  0.7038,  0.3171,  0.3191,  1.1204],
        [-0.9704,  0.1523,  0.8733,  0.7360, -0.6844,  2.7988,  0.2170,  0.5199]],
       requires_grad=True)
2.2572301626205444 21


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([3, 3])
tensor([4, 0])
tensor([0, 3])
Parameter containing:
tensor([[0.9220, 0.6225, 0.3598, 0.9784, 0.5239, 0.7275, 0.2029, 0.4209],
        [0.7211, 0.5317, 0.1443, 0.7923, 0.4380, 0.2654, 0.4294, 0.6927]],
       requires_grad=True)
Parameter containing:
tensor([[0.4844, 0.4565, 0.4757, 0.3162, 0.5119, 0.0217, 0.1305, 0.3610],
        [0.7796, 0.4513, 0.6106, 0.7481, 0.6511, 0.2512, 0.6133, 0.4495]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.4404, -0.1325,  0.8343,  0.7008,  0.5859,  0.6243, -0.1242,  0.6290],
        [ 0.2413,  0.8857,  0.8323,  1.1287, -0.1544,  0.1552,  0.0382,  0.0605]],
       requires_grad=True)
1.2132111191749573 21


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 5])
tensor([2, 7])
tensor([0, 0])
Parameter containing:
tensor([[ 0.3195,  6.2223, -0.4623, -2.1542,  0.4725, -0.5966, -0.6237,  0.2966],
        [-1.5115, -2.9724,  1.1729,  2.5331, -3.7749,  8.0542,  0.3930,  0.6093]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.5156, -0.0060,  0.7290,  0.5657,  0.4292,  0.2422, -0.0072,  0.2679],
        [ 0.6002,  0.2915, -0.0135,  0.2060,  0.7195,  0.2221,  0.7126,  1.0592]],
       requires_grad=True)
Parameter containing:
tensor([[0.7572, 0.5240, 0.7258, 0.5537, 0.4133, 0.1504, 0.2047, 0.3133],
        [0.9373, 0.3368, 0.3826, 0.9207, 0.0627, 0.5318, 0.8797, 0.4720]],
       requires_grad=True)
1.0467012325922649 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 4])
tensor([0, 1])
tensor([6, 1])
Parameter containing:
tensor([[ 0.3135,  0.7193,  0.8950, -0.1793,  0.6034,  0.6148,  0.3511,  0.0099],
        [ 0.3720,  0.2821,  0.3850,  0.7412,  0.7906,  0.6068,  0.6098,  0.6355]],
       requires_grad=True)
Parameter containing:
tensor([[ 8.1947, -0.9671,  0.2515,  0.1140,  5.0375, -1.5209, -1.6865, -4.9179],
        [-2.8600, 12.0383, -1.0578, -1.9121,  2.3586, -2.3550,  1.8448, -4.0896]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.6779, -2.0417, -0.4954, -0.2813, -0.7294,  1.1271,  6.1425, -0.3675],
        [ 0.1776,  7.1846,  1.7894, -1.1307, -1.1123, -0.6149, -2.0695, -1.6427]],
       requires_grad=True)
1.371602217356364 21


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 3])
tensor([7, 4])
tensor([7, 3])
Parameter containing:
tensor([[0.5741, 1.0106, 0.4238, 0.2245, 0.9409, 0.6718, 0.5197, 0.7547],
        [0.8878, 0.7887, 0.2043, 0.9430, 0.5977, 0.1419, 0.4050, 0.5627]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.1984,  0.7091,  0.0652,  0.5808,  0.6727,  0.0980,  0.2566,  1.1020],
        [ 0.0122, -0.0492,  0.2765,  0.1304,  0.9701,  0.9040,  0.3596,  0.2491]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.4213, -0.6894, -0.5078,  0.5413,  0.7048,  0.5393, -0.1351,  2.5519],
        [ 0.7991,  1.0593, -0.2352,  1.6670,  1.2240, -0.2186, -0.0115, -0.0828]],
       requires_grad=True)
0.8394970496495565 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 2])
tensor([5, 7])
tensor([4, 6])
Parameter containing:
tensor([[-0.0319,  0.2405,  0.8812,  0.5542,  0.6697,  0.6379,  0.4850,  0.8343],
        [ 0.1696,  0.6677,  0.8611,  0.7660,  0.1352,  0.3483,  0.6564,  0.3755]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.6992,  0.0420,  0.6765,  0.8064,  0.2540,  1.0301,  0.6514,  0.4166],
        [ 0.3504,  0.5595, -0.0046,  0.3418,  0.6384,  0.0200, -0.0118,  0.9717]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.2126,  0.4613,  0.1112, -0.3821,  1.7685,  0.9562,  1.0773,  0.0123],
        [ 1.0041,  0.2069,  1.3412,  0.9637,  0.3141,  1.1055,  1.3560, -0.3006]],
       requires_grad=True)
0.9659113089243571 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([7, 1])
tensor([4, 2])
tensor([1, 1])
Parameter containing:
tensor([[-0.0102,  0.9240,  0.3680,  0.6052,  0.4565,  0.9093,  0.9300,  0.9409],
        [ 0.1252,  1.0487,  0.5478,  0.6492,  0.7200,  0.6276, -0.0235,  0.1231]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.4612,  2.0181, -0.8788, -0.8536,  2.2070,  0.1916,  1.3755, -0.2554],
        [-0.1959, -1.0194,  6.7859, -0.1973,  1.0975, -0.4400, -0.0080, -0.9598]],
       requires_grad=True)
Parameter containing:
tensor([[0.8172, 0.8789, 0.7030, 0.1784, 0.0780, 0.7663, 0.5264, 0.8536],
        [0.2652, 1.0739, 0.2192, 0.1550, 0.4041, 0.8497, 0.0780, 0.2824]],
       requires_grad=True)
0.7287692030270895 21
[Parameter containing:
tensor([[-0.0102,  0.9240,  0.3680,  0.6052,  0.4565,  0.9093,  0.9300,  0.9409],
        [ 0.1252,  1.0487,  0.5478,  0.6492,  0.7200,  0.6276, -0.0235,  0.1231]],
       requires_grad=True), Parameter containing:
tensor([[ 0.4612,  2.0181, -0.8788, -0.8536,  2.2070,  0.1916,  1.375

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 3])
tensor([2, 1])
tensor([0, 3])
tensor([0, 0])
Parameter containing:
tensor([[0.9310, 0.9689, 0.2900, 0.9495],
        [0.2862, 0.6840, 0.0937, 0.9777]], requires_grad=True)
Parameter containing:
tensor([[ 0.0195,  0.3864,  2.4323, -0.2360],
        [ 0.8867,  3.3378, -0.6384, -0.9785]], requires_grad=True)
Parameter containing:
tensor([[ 3.5931, -0.2584, -0.9031, -0.0782],
        [-0.9535,  0.1122,  0.6523,  2.0287]], requires_grad=True)
Parameter containing:
tensor([[ 4.6011, -0.9525, -1.1873, -1.5284],
        [ 4.3398, -0.7320, -0.3245, -1.7032]], requires_grad=True)
1.1222872883081436 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 2])
tensor([2, 0])
tensor([2, 3])
tensor([1, 0])
Parameter containing:
tensor([[ 0.7089,  0.0469,  0.5995,  0.5623],
        [ 0.3124, -0.1366,  0.5274,  0.1876]], requires_grad=True)
Parameter containing:
tensor([[0.7517, 0.4779, 1.0390, 0.1773],
        [0.8977, 0.3063, 0.2531, 0.8389]], requires_grad=True)
Parameter containing:
tensor([[ 0.1546,  0.2018,  0.9011,  0.4419],
        [-0.2145,  0.1773,  0.4281,  1.6558]], requires_grad=True)
Parameter containing:
tensor([[0.1106, 0.8953, 0.5289, 0.4108],
        [0.9022, 0.2796, 0.6017, 0.2701]], requires_grad=True)
1.1199121177196503 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 1])
tensor([0, 2])
tensor([2, 2])
tensor([3, 0])
Parameter containing:
tensor([[0.9679, 0.7222, 0.3666, 0.3557],
        [0.5207, 0.8788, 0.3902, 0.0674]], requires_grad=True)
Parameter containing:
tensor([[ 0.9617,  0.9457,  0.7969,  0.9002],
        [ 0.6486, -0.1073,  0.7992,  0.5859]], requires_grad=True)
Parameter containing:
tensor([[0.5109, 0.5744, 0.7532, 0.4845],
        [0.8711, 0.8245, 0.9894, 0.2220]], requires_grad=True)
Parameter containing:
tensor([[-0.5710, -0.1745,  0.3789,  1.9933],
        [ 1.2374,  0.0385,  0.1070,  0.7829]], requires_grad=True)
0.5684142112731934 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([3, 1])
tensor([0, 2])
tensor([3, 3])
tensor([1, 2])
Parameter containing:
tensor([[ 0.2069,  0.7475,  0.3918,  1.3843],
        [ 0.1901,  0.7672,  0.6916, -0.2001]], requires_grad=True)
Parameter containing:
tensor([[ 0.7351,  0.1081,  0.2885, -0.0569],
        [ 0.4572,  0.2204,  0.7568,  0.5697]], requires_grad=True)
Parameter containing:
tensor([[ 0.8734,  0.4858,  0.6131,  0.9596],
        [ 0.2289, -0.0190,  0.3051,  1.1894]], requires_grad=True)
Parameter containing:
tensor([[0.3202, 0.8792, 0.6861, 0.2783],
        [0.2627, 0.5825, 0.6707, 0.5067]], requires_grad=True)
0.9854388236999512 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 1])
tensor([3, 3])
tensor([1, 2])
tensor([1, 1])
Parameter containing:
tensor([[0.6243, 0.0842, 0.5396, 0.1501],
        [0.7853, 0.8223, 0.0591, 0.4578]], requires_grad=True)
Parameter containing:
tensor([[-1.1993, -4.2257,  1.9976,  4.8845],
        [-1.6062, -2.6679, -0.1648,  7.5652]], requires_grad=True)
Parameter containing:
tensor([[-0.2630,  4.8113, -0.9296, -0.9268],
        [ 0.2117, -0.8897,  3.5255, -1.2672]], requires_grad=True)
Parameter containing:
tensor([[-0.1406,  1.7430,  0.2026,  0.1692],
        [ 1.2721,  1.6195,  0.4271,  0.4291]], requires_grad=True)
0.6114194691181183 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 2])
tensor([3, 0])
tensor([2, 0])
tensor([1, 1])
Parameter containing:
tensor([[-1.5004, -0.0913,  2.7048,  0.3333],
        [-1.8526,  0.2724,  4.6035, -0.4448]], requires_grad=True)
Parameter containing:
tensor([[-1.1546, -0.3425,  0.9211,  2.7445],
        [ 1.0007,  0.7580,  0.0080, -0.6386]], requires_grad=True)
Parameter containing:
tensor([[0.4337, 0.1506, 0.5430, 0.1995],
        [0.9143, 0.1883, 0.1478, 0.3128]], requires_grad=True)
Parameter containing:
tensor([[0.2249, 0.9647, 0.5424, 0.4181],
        [0.0425, 0.5935, 0.5140, 0.3652]], requires_grad=True)
1.029593512415886 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 0])
tensor([0, 1])
tensor([1, 2])
tensor([3, 3])
Parameter containing:
tensor([[0.4965, 0.1206, 0.2875, 0.2122],
        [0.8690, 0.5384, 0.7987, 0.5654]], requires_grad=True)
Parameter containing:
tensor([[0.8066, 0.5785, 0.0795, 0.4341],
        [0.3632, 0.8343, 0.5003, 0.2445]], requires_grad=True)
Parameter containing:
tensor([[ 0.8277,  1.3605,  0.1945, -0.1623],
        [ 0.8672, -0.0661,  1.2094, -0.1082]], requires_grad=True)
Parameter containing:
tensor([[0.7007, 0.3389, 0.5150, 1.0113],
        [0.2775, 0.6973, 0.8097, 0.8519]], requires_grad=True)
0.8237058445811272 39


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([3, 1])
tensor([0, 1])
tensor([1, 2])
tensor([2, 3])
Parameter containing:
tensor([[-0.0604,  0.2808,  0.3274,  0.9504],
        [ 0.1366,  0.6478,  0.2794,  0.2809]], requires_grad=True)
Parameter containing:
tensor([[ 0.7216,  0.3341,  0.3428,  0.5596],
        [-0.0266,  0.9984,  0.3415,  0.8480]], requires_grad=True)
Parameter containing:
tensor([[ 2.3753,  9.2067, -9.3544, -0.8771],
        [ 0.1145, -6.5029,  7.8072,  1.7135]], requires_grad=True)
Parameter containing:
tensor([[0.1481, 0.2495, 0.6749, 0.6420],
        [0.3805, 0.1940, 0.5324, 0.8833]], requires_grad=True)
0.6632617115974426 31


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 2])
tensor([2, 1])
tensor([1, 0])
tensor([3, 0])
Parameter containing:
tensor([[0.4960, 0.1986, 0.2717, 0.1501],
        [0.4625, 0.1276, 0.6041, 0.2994]], requires_grad=True)
Parameter containing:
tensor([[ 0.2556,  0.5590,  0.8017,  0.4675],
        [ 0.8957,  3.4452, -1.1210, -0.5950]], requires_grad=True)
Parameter containing:
tensor([[ 0.6771,  0.7834, -0.0034,  0.3152],
        [ 1.1524, -0.1652,  0.4946,  0.0396]], requires_grad=True)
Parameter containing:
tensor([[0.3410, 0.4291, 0.0544, 0.5252],
        [0.5266, 0.1337, 0.4920, 0.3906]], requires_grad=True)
0.7494066208600998 31


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 3])
tensor([3, 3])
tensor([2, 3])
tensor([2, 1])
Parameter containing:
tensor([[ 4.1530, -0.7097, -0.4336, -0.4802],
        [ 0.3970, -1.2362, -0.2551,  3.4182]], requires_grad=True)
Parameter containing:
tensor([[ 0.6466,  0.1758, -0.0364,  0.8492],
        [ 0.4849,  0.5263,  0.1530,  0.7294]], requires_grad=True)
Parameter containing:
tensor([[ 0.1582, -0.0084,  1.1507,  0.4976],
        [ 0.6216,  0.8142,  0.7517,  0.9945]], requires_grad=True)
Parameter containing:
tensor([[0.5627, 0.5552, 0.9584, 0.2424],
        [0.0347, 0.4427, 0.2193, 0.1600]], requires_grad=True)
0.7592117786407471 21


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 3])
tensor([2, 2])
tensor([2, 0])
tensor([2, 2])
Parameter containing:
tensor([[0.6122, 1.0965, 0.4400, 0.4785],
        [0.5669, 0.7167, 0.4623, 1.1508]], requires_grad=True)
Parameter containing:
tensor([[0.1235, 0.5529, 0.9391, 0.0875],
        [0.8703, 0.9168, 0.9874, 0.2565]], requires_grad=True)
Parameter containing:
tensor([[ 0.7799,  0.8655,  1.0550, -0.3200],
        [ 1.0022,  0.7936,  0.6077,  0.7982]], requires_grad=True)
Parameter containing:
tensor([[0.4218, 0.0752, 1.1760, 1.0918],
        [0.3335, 0.1995, 0.5775, 0.2942]], requires_grad=True)
1.5651326328516006 29


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 3])
tensor([3, 2])
tensor([1, 0])
tensor([1, 1])
Parameter containing:
tensor([[ 2.6215,  0.2633, -0.0716, -0.0041],
        [ 1.8506, -1.9175, -1.1624,  2.7633]], requires_grad=True)
Parameter containing:
tensor([[ 1.1764,  0.3804, -0.2703,  1.1929],
        [ 0.3871, -0.1400,  1.1090,  0.6777]], requires_grad=True)
Parameter containing:
tensor([[ 0.1248,  1.0374,  0.5681, -0.6166],
        [ 0.9712,  0.5728,  0.4976,  0.0364]], requires_grad=True)
Parameter containing:
tensor([[ 0.4680,  3.2747, -0.1757, -1.2672],
        [-0.1131,  2.9979,  0.2284, -1.0494]], requires_grad=True)
0.852791354060173 39


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 1])
tensor([0, 2])
tensor([0, 1])
tensor([0, 3])
Parameter containing:
tensor([[0.6656, 0.6255, 0.9533, 0.6030],
        [0.3872, 0.8697, 0.6237, 0.4418]], requires_grad=True)
Parameter containing:
tensor([[0.3621, 0.2919, 0.2116, 0.1086],
        [0.1596, 0.6460, 0.8047, 0.7032]], requires_grad=True)
Parameter containing:
tensor([[0.8802, 0.4918, 0.6119, 0.0144],
        [0.2460, 0.9559, 0.5900, 0.8928]], requires_grad=True)
Parameter containing:
tensor([[0.7839, 0.6291, 0.2224, 0.0353],
        [0.7923, 0.4619, 0.1210, 0.8562]], requires_grad=True)
1.0245967507362366 29


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 3])
tensor([0, 3])
tensor([0, 2])
tensor([2, 3])
Parameter containing:
tensor([[0.9543, 1.0469, 0.8883, 0.1953],
        [0.7812, 0.3755, 0.0944, 0.9533]], requires_grad=True)
Parameter containing:
tensor([[0.8865, 0.2376, 0.1015, 0.0899],
        [0.8110, 0.6595, 0.6331, 0.8352]], requires_grad=True)
Parameter containing:
tensor([[ 1.5412,  1.0199, -0.2823, -0.5234],
        [ 0.0811,  1.2300,  1.6231, -0.7946]], requires_grad=True)
Parameter containing:
tensor([[0.2300, 0.2098, 0.4283, 0.1786],
        [0.2556, 0.4856, 0.2563, 0.8284]], requires_grad=True)
1.368780218064785 21


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 1])
tensor([3, 3])
tensor([0, 0])
tensor([0, 2])
Parameter containing:
tensor([[0.9634, 0.5659, 0.4183, 0.8654],
        [0.5786, 0.7104, 0.2010, 0.4550]], requires_grad=True)
Parameter containing:
tensor([[ 0.8385,  0.2190, -0.2634,  1.4686],
        [ 0.3839,  0.5811, -0.0273,  0.8145]], requires_grad=True)
Parameter containing:
tensor([[0.7283, 0.6006, 0.2121, 0.1528],
        [0.5367, 0.1967, 0.1549, 0.1760]], requires_grad=True)
Parameter containing:
tensor([[ 1.0312,  0.4163,  0.4571,  0.3834],
        [ 0.6784, -0.1309,  1.2060,  0.5132]], requires_grad=True)
1.0554072856903076 38


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 1])
tensor([2, 0])
tensor([2, 0])
tensor([0, 3])
Parameter containing:
tensor([[ 3.3161, -1.6230,  1.2301, -1.4897],
        [-1.5449,  4.2253,  0.2393, -0.8791]], requires_grad=True)
Parameter containing:
tensor([[ 1.0653,  0.4137,  6.5243, -5.2825],
        [ 5.3304, -6.0240,  1.5057,  0.9708]], requires_grad=True)
Parameter containing:
tensor([[ 0.6264,  0.0399,  0.8003,  0.4934],
        [ 0.4322,  0.0233, -0.0141,  0.3349]], requires_grad=True)
Parameter containing:
tensor([[ 6.8343, -1.2223, -2.4625, -1.7672],
        [-1.4727, -2.5599, -1.1255,  7.5734]], requires_grad=True)
0.7152110114693642 29


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 0])
tensor([3, 3])
tensor([3, 0])
tensor([0, 3])
Parameter containing:
tensor([[ 0.4181,  0.3490,  0.3990,  0.3544],
        [ 1.4882,  0.0419,  0.4469, -0.1698]], requires_grad=True)
Parameter containing:
tensor([[ 0.0345, -1.5158, -0.4925,  4.0143],
        [ 1.2043, -1.7681,  1.1019,  1.8445]], requires_grad=True)
Parameter containing:
tensor([[ 0.5349, -0.3660,  0.4165,  1.1565],
        [ 1.2562,  0.8764,  0.8051, -0.1742]], requires_grad=True)
Parameter containing:
tensor([[ 5.6011, -4.5129,  1.2439, -0.6268],
        [-1.8058, -1.6265, -3.9224,  9.6166]], requires_grad=True)
1.3375451266765594 20


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 3])
tensor([0, 0])
tensor([1, 0])
tensor([0, 2])
Parameter containing:
tensor([[ 0.5149,  1.1652,  0.4630, -0.0136],
        [ 0.8168,  0.8389,  0.2754,  1.0597]], requires_grad=True)
Parameter containing:
tensor([[0.9833, 0.8271, 0.0996, 0.9129],
        [0.6261, 0.2247, 0.3635, 0.4940]], requires_grad=True)
Parameter containing:
tensor([[ 0.2915,  0.7532, -0.0227,  0.6673],
        [ 0.7845,  0.2849,  0.2163,  0.7077]], requires_grad=True)
Parameter containing:
tensor([[ 0.6145,  0.1786,  0.0989, -0.3211],
        [ 0.4640,  0.9046,  1.3810, -0.4678]], requires_grad=True)
0.8830416053533554 29


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 0])
tensor([1, 2])
tensor([1, 2])
tensor([3, 0])
Parameter containing:
tensor([[0.0684, 0.2054, 0.9357, 0.4822],
        [0.8115, 0.7331, 0.3474, 0.6873]], requires_grad=True)
Parameter containing:
tensor([[ 0.1967,  1.3489,  0.5659,  0.3654],
        [-0.1269,  0.2079,  1.0049,  0.4170]], requires_grad=True)
Parameter containing:
tensor([[-1.4686,  3.1350,  0.3452,  0.1557],
        [ 0.5873,  0.0612,  2.7366, -0.9763]], requires_grad=True)
Parameter containing:
tensor([[ 0.4256,  0.4505, -0.7688,  1.1498],
        [ 0.6530,  0.3558,  0.5806,  0.0188]], requires_grad=True)
0.7159275412559509 21


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 2])
tensor([1, 3])
tensor([0, 2])
tensor([2, 2])
Parameter containing:
tensor([[0.1429, 0.7683, 0.3851, 0.6864],
        [0.5739, 0.8099, 0.9651, 0.7921]], requires_grad=True)
Parameter containing:
tensor([[ 0.6547,  1.1316,  0.8189, -0.0239],
        [ 0.5865, -0.0592, -0.7587,  1.4681]], requires_grad=True)
Parameter containing:
tensor([[1.0789, 0.7796, 0.3300, 0.6867],
        [0.1205, 0.4365, 0.6460, 0.1873]], requires_grad=True)
Parameter containing:
tensor([[0.4610, 0.6569, 1.0300, 0.5636],
        [0.6355, 0.0368, 0.9366, 0.7427]], requires_grad=True)
0.7999183535575867 21
[Parameter containing:
tensor([[0.1429, 0.7683, 0.3851, 0.6864],
        [0.5739, 0.8099, 0.9651, 0.7921]], requires_grad=True), Parameter containing:
tensor([[ 0.6547,  1.1316,  0.8189, -0.0239],
        [ 0.5865, -0.0592, -0.7587,  1.4681]], requires_grad=True), Parameter containing:
tensor([[1.0789, 0.7796, 0.3300, 0.6867],
        [0.1205, 0.4365, 0.6460, 0.1873]], requires_grad=True), Parameter

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([3, 0])
tensor([7, 6])
tensor([5, 5])
tensor([5, 2])
Parameter containing:
tensor([[0.7100, 1.0098, 0.3576, 1.1294, 0.4189, 0.4979, 0.3832, 0.6741],
        [1.0366, 0.1800, 0.9146, 0.6533, 0.8776, 0.5981, 0.7257, 0.2238]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.0126,  0.3844, -0.3293,  0.1898,  0.0563,  0.2788,  0.5917,  2.0085],
        [-0.1669, -0.5751,  0.8017,  0.3112, -0.3089,  0.4947,  2.3026, -0.3461]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.1473,  1.1385,  0.1667,  0.2670,  0.4476,  1.3771,  0.6404, -0.1898],
        [ 0.4963,  0.2462,  0.8606,  1.1858,  0.4123,  1.3193,  0.3900,  0.1936]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.5283,  0.6877, -0.1377,  0.0888,  0.3242,  0.8946,  0.6276,  0.6254],
        [ 0.2145,  1.0163,  1.0413, -0.0611,  0.1519,  0.1690,  0.9700,  0.3559]],
       requires_grad=True)
1.1629900485277176 39


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 2])
tensor([7, 6])
tensor([2, 1])
tensor([5, 7])
Parameter containing:
tensor([[0.5021, 0.2003, 0.8591, 0.3233, 0.1290, 0.1031, 0.6123, 0.0792],
        [0.8229, 0.6642, 0.8787, 0.0834, 0.4537, 0.5682, 0.5549, 0.7160]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.1873,  0.4384, -0.2857,  0.0475,  0.1336,  1.4717,  0.1947,  1.5587],
        [ 0.1722,  0.3900,  0.9295,  1.0609,  0.7576, -0.5055,  1.0800,  0.1146]],
       requires_grad=True)
Parameter containing:
tensor([[0.7187, 0.6882, 0.9679, 0.3220, 0.6176, 0.3951, 0.8546, 0.0953],
        [0.2864, 0.8349, 0.7869, 0.3723, 0.7042, 0.6646, 0.2782, 0.1136]],
       requires_grad=True)
Parameter containing:
tensor([[-0.0921,  1.0148,  0.2489,  0.7583,  0.1502,  1.0713,  0.7776,  0.3856],
        [ 0.4714,  0.2234, -0.0282, -0.0353,  0.3092,  0.8633,  0.1073,  1.0659]],
       requires_grad=True)
1.161688432097435 39


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 4])
tensor([6, 7])
tensor([2, 7])
tensor([1, 7])
Parameter containing:
tensor([[ 0.7915,  0.4649,  1.1546, -0.1156,  0.4924,  0.8447,  0.4932,  0.1438],
        [ 0.6886,  0.7315,  1.0144,  0.5644,  1.2060,  0.4711,  0.5233,  0.3316]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.6358,  0.6046,  0.6848,  0.4217,  0.9107,  0.7735,  0.9717,  0.2273],
        [ 0.2568,  0.2260, -0.0091,  0.9153,  0.5403,  0.5203,  0.2051,  1.1380]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.6764,  0.1065,  0.8666, -0.0546,  0.4764,  0.5051,  0.8488,  0.3729],
        [ 0.0839,  0.7914,  0.6625, -0.2065,  0.1825, -0.0958,  0.7009,  0.9547]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.0847,  2.1536, -0.0047, -0.5131, -0.4028,  0.1233,  0.1657,  2.0296],
        [-0.7920,  0.5765,  0.8974,  0.5296,  0.3402,  0.4228,  0.4216,  1.8939]],
       requires_grad=True)
0.6804591417312622 21


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 6])
tensor([7, 7])
tensor([2, 7])
tensor([1, 4])
Parameter containing:
tensor([[ 0.2281,  0.9568,  0.6779,  0.9360, -0.0658,  0.6195,  0.7884,  0.0383],
        [ 0.7555,  0.2121,  0.0521, -0.1126,  0.4225,  0.2659,  0.9870,  0.4963]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.4917,  0.9562,  0.5459,  0.9402, -0.1155,  0.0358,  0.7964,  0.9855],
        [ 0.2113,  0.6326,  0.5536,  0.2213,  0.3021,  0.8229,  0.2653,  1.1774]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.1497,  0.6316,  1.0351, -0.1688,  0.8993,  0.3400, -0.0662,  1.0058],
        [-0.2081,  0.0900,  0.6751, -0.2569,  0.0823,  0.1683,  1.0776,  1.2928]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.1483,  0.7644,  0.3725,  0.4012,  0.6407,  0.7585, -0.0108,  0.6615],
        [ 0.3090,  0.5028,  0.5673,  0.8449,  0.8637,  0.8178,  0.2849,  0.1972]],
       requires_grad=True)
1.6094705164432526 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([5, 4])
tensor([3, 7])
tensor([1, 7])
tensor([6, 6])
Parameter containing:
tensor([[0.6504, 0.0821, 0.4714, 0.1219, 0.7879, 0.8519, 0.0404, 0.5167],
        [0.1530, 0.3467, 0.9586, 0.0022, 0.9692, 0.4019, 0.9132, 0.8386]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.0104,  1.7770,  0.3851,  4.3935, -0.9983,  3.0157, -4.4000, -0.9112],
        [-1.1877,  1.5941, -1.6297,  0.6071,  1.2065,  1.2323, -1.6672,  5.5667]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.1573,  1.8259,  0.0303,  1.3128,  0.0603, -0.5621, -0.2286,  0.0788],
        [ 0.2253,  0.4932,  0.6301,  0.0733,  0.7132,  0.4124, -0.3323,  0.9171]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.6931, -0.0620, -0.6177,  0.6959,  0.9217,  0.8108,  1.2929, -0.1017],
        [ 0.2010,  0.0614,  0.9958,  0.7024,  0.4662,  0.2177,  1.0361,  0.9731]],
       requires_grad=True)
1.3018960356712341 31


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([7, 2])
tensor([0, 7])
tensor([2, 1])
tensor([7, 2])
Parameter containing:
tensor([[ 4.5161e-01, -6.7295e-01, -1.3404e+00,  1.3458e-01,  2.4705e-03,
         -1.9186e-01,  2.6577e-01,  5.3755e+00],
        [ 1.1881e+00,  3.5188e-01,  4.0515e+00, -1.3266e+00, -6.9661e-01,
         -4.6566e-01,  4.3266e-01, -2.3863e-01]], requires_grad=True)
Parameter containing:
tensor([[ 2.2657, -0.7659, -0.2140,  0.6397,  0.7235, -0.0076, -0.1636,  0.4123],
        [ 0.8885,  1.1905,  0.6471,  0.3640, -0.9889, -1.3432,  0.2159,  2.6915]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.5610,  0.0520,  1.2523,  0.5688, -0.0236,  0.4772,  0.1444,  0.2840],
        [ 0.2171,  0.8439,  0.5523,  0.2281,  0.4470,  0.1862,  0.0811,  0.4765]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.1911,  0.4919,  0.8452,  0.0065,  0.8085, -0.2515, -0.1577,  0.9822],
        [-0.4074,  0.0132,  2.7082,  1.1178, -0.1357, -0.1222, -0.2765,  0.6244]],
       requires_grad=True)
1.2098337

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([7, 0])
tensor([7, 0])
tensor([4, 3])
tensor([5, 5])
Parameter containing:
tensor([[0.4137, 0.3519, 0.3438, 0.3442, 0.6428, 0.5466, 0.5481, 0.6970],
        [0.8652, 0.6462, 0.0302, 0.7107, 0.1000, 0.8113, 0.4239, 0.2533]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.5713,  0.6225,  0.2057,  0.8426,  0.9003, -0.1295,  0.2026,  0.9071],
        [ 0.9441,  0.1480,  0.2708,  0.9218,  0.8005,  0.9317,  0.5394,  0.6461]],
       requires_grad=True)
Parameter containing:
tensor([[0.1325, 0.1594, 0.3555, 0.6783, 1.0335, 0.9270, 0.6743, 0.7386],
        [0.6995, 0.2217, 0.5902, 1.0326, 0.7791, 0.9328, 0.7317, 0.3814]],
       requires_grad=True)
Parameter containing:
tensor([[0.6058, 0.4655, 0.2406, 0.8231, 0.8090, 0.8835, 0.4806, 0.4503],
        [0.4636, 0.1589, 0.2164, 0.9422, 0.7935, 0.9642, 0.8425, 0.1820]],
       requires_grad=True)
0.8484278470277786 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([3, 0])
tensor([6, 6])
tensor([5, 0])
tensor([1, 3])
Parameter containing:
tensor([[0.0603, 0.2818, 0.2863, 0.9105, 0.1379, 0.6365, 0.2687, 0.2609],
        [1.0075, 0.7423, 0.0674, 0.2195, 0.0145, 0.3575, 0.8514, 0.8593]],
       requires_grad=True)
Parameter containing:
tensor([[-0.1832, -0.2393,  0.7182,  0.6989,  0.9420,  0.6169,  1.2294,  0.6999],
        [ 0.2252, -0.3035,  0.4622,  0.5997, -0.1499,  0.0685,  1.5848,  1.2180]],
       requires_grad=True)
Parameter containing:
tensor([[-0.8826, -1.5794,  2.1765,  2.0598, -0.6107,  4.0618,  0.9427, -2.2932],
        [ 3.9417, -1.8110,  1.1147,  0.6935, -1.9206, -1.0472, -0.6756,  3.1070]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.3699,  1.0439,  0.8887,  0.6309, -0.1403,  0.2720,  0.9075, -0.4828],
        [ 0.5453,  0.4306, -0.2839,  0.9223,  0.4474,  0.7471, -0.0643,  0.4749]],
       requires_grad=True)
0.4628312587738037 31


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 5])
tensor([0, 1])
tensor([5, 5])
tensor([3, 3])
Parameter containing:
tensor([[ 0.6325,  0.0788,  0.4384, -0.0789,  0.3845,  0.0384,  0.5209,  0.5955],
        [ 0.6776,  0.9448,  0.1439,  0.3708,  0.5589,  1.0681,  0.5364,  0.4084]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.1733, -0.0351,  0.7097,  0.3083,  0.5359,  0.5212, -0.0055,  0.0858],
        [ 0.1019,  1.4259, -0.0579,  0.5538, -0.0676,  0.1304,  0.6451,  0.1610]],
       requires_grad=True)
Parameter containing:
tensor([[-0.1898, -0.2217,  0.7108, -0.9940,  2.1369,  2.3762, -0.3733,  0.6661],
        [ 1.3914, -1.8735, -0.1740,  0.0713,  0.7465,  2.3247, -1.0105,  1.4548]],
       requires_grad=True)
Parameter containing:
tensor([[-0.0238,  0.8605,  0.3356,  1.1192,  0.7460,  0.8955,  0.1893,  0.4691],
        [-0.1480,  0.4746, -0.0113,  0.9984, -0.0915,  0.3251,  0.8920,  0.7498]],
       requires_grad=True)
0.8366266787052155 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 5])
tensor([6, 4])
tensor([0, 3])
tensor([7, 7])
Parameter containing:
tensor([[ 1.3384,  0.1997,  1.0388,  0.2919,  0.2164,  0.7924, -0.2417,  1.2175],
        [ 0.2239, -0.1215, -0.2116,  1.6479, -0.4656,  2.3700,  0.3019, -0.2163]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.4876,  0.0846, -0.1968,  0.7962, -0.3985,  0.4550,  2.3306,  1.4212],
        [ 1.0960, -0.4510,  0.0928, -0.4310,  1.8515,  1.6798, -0.0172, -0.6456]],
       requires_grad=True)
Parameter containing:
tensor([[0.9037, 0.3332, 0.7476, 0.8702, 0.0240, 0.1664, 0.3933, 0.5391],
        [0.0920, 0.1118, 0.6425, 0.8372, 0.6608, 0.1364, 0.6845, 0.3320]],
       requires_grad=True)
Parameter containing:
tensor([[0.0902, 0.6765, 0.5832, 0.0445, 0.6218, 0.7238, 0.5779, 0.7543],
        [0.5828, 0.6308, 0.8044, 0.8664, 0.5789, 0.1301, 0.2479, 1.0090]],
       requires_grad=True)
1.0780232101678848 39


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 6])
tensor([0, 2])
tensor([6, 5])
tensor([6, 5])
Parameter containing:
tensor([[0.7592, 0.8997, 0.6371, 0.4843, 0.8359, 0.7514, 0.3458, 0.8105],
        [0.1921, 0.6266, 0.7300, 0.1034, 0.8707, 0.8454, 0.9150, 0.4509]],
       requires_grad=True)
Parameter containing:
tensor([[0.9803, 0.2719, 0.4369, 0.7185, 0.9043, 0.8588, 0.7494, 0.6620],
        [0.8420, 0.2173, 0.8950, 0.7874, 0.8274, 0.1748, 0.3535, 0.0721]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.0962, -0.0558,  0.7853,  0.0887,  0.0673,  0.5764,  1.5268,  0.2969],
        [-0.1877,  0.6961,  0.0552,  0.9264,  0.1880,  0.9762,  0.3379,  0.4927]],
       requires_grad=True)
Parameter containing:
tensor([[-0.1349,  0.8189,  0.6130,  0.3574,  0.4055,  0.1098,  0.9731,  0.7039],
        [ 0.1625,  0.8057, -0.1104,  0.2386,  0.9265,  1.1642,  0.7561,  0.1435]],
       requires_grad=True)
0.8754342794418335 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 4])
tensor([5, 7])
tensor([4, 7])
tensor([1, 6])
Parameter containing:
tensor([[ 2.4737,  1.2241, -0.7953,  1.7034,  0.6484,  1.0640, -0.0241, -1.9510],
        [-0.5988, -0.7043, -0.0378,  0.2777,  5.0986, -1.0407,  0.5729,  0.9456]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.3033,  0.2073, -0.0074,  0.5659,  0.2135,  0.8224,  0.6115,  0.4751],
        [ 0.0038,  0.5663,  0.8735,  0.8650,  0.2259,  0.1692,  0.3632,  1.2969]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.3047,  0.4531,  0.9074,  0.4359,  1.0691, -0.0348,  0.4519,  0.5889],
        [ 0.1575,  0.3886, -0.1707,  0.2986,  0.1957,  0.7415,  0.7401,  1.1837]],
       requires_grad=True)
Parameter containing:
tensor([[ 2.9821,  3.3510, -3.7634,  0.4967,  1.5833,  1.4324, -1.1964,  0.1273],
        [-2.6979,  1.3162,  0.8453,  1.9095,  0.5143, -0.3986,  2.6030, -1.2361]],
       requires_grad=True)
0.9360440373420715 31


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 6])
tensor([0, 4])
tensor([1, 6])
tensor([3, 4])
Parameter containing:
tensor([[0.5837, 0.6136, 0.9763, 0.6439, 0.3469, 0.8493, 0.6723, 0.4839],
        [0.2030, 0.5967, 0.0661, 0.1512, 0.1249, 0.7316, 0.8376, 0.5765]],
       requires_grad=True)
Parameter containing:
tensor([[1.1445, 0.3379, 0.7228, 0.2245, 0.1386, 0.8746, 0.3824, 0.8575],
        [0.5157, 0.6087, 0.4977, 0.0160, 0.8232, 0.1966, 0.4868, 0.7575]],
       requires_grad=True)
Parameter containing:
tensor([[0.2543, 0.7931, 0.7765, 0.4306, 0.4027, 0.1089, 0.2865, 0.2963],
        [0.8089, 0.6983, 0.0281, 0.5522, 0.8398, 0.1514, 1.0308, 0.9422]],
       requires_grad=True)
Parameter containing:
tensor([[0.8103, 0.3094, 0.1607, 1.0028, 0.8369, 0.7628, 0.3048, 0.1607],
        [0.1681, 0.6490, 0.6033, 0.7138, 0.8944, 0.1502, 0.1911, 0.4239]],
       requires_grad=True)
0.6378799676895142 22


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([5, 0])
tensor([5, 7])
tensor([6, 2])
tensor([0, 2])
Parameter containing:
tensor([[ 0.5569,  0.5318, -0.1228,  0.6157,  1.3162,  1.9722, -0.1830,  0.6023],
        [ 2.0379, -0.2265,  0.0298, -0.2661,  0.5265,  1.2256,  0.6658,  0.2612]],
       requires_grad=True)
Parameter containing:
tensor([[0.7736, 0.2813, 0.3989, 0.2868, 0.1766, 0.9087, 0.5846, 0.4845],
        [0.4343, 0.3058, 0.2783, 0.1251, 0.5645, 0.2986, 0.0519, 0.8140]],
       requires_grad=True)
Parameter containing:
tensor([[-0.0793,  0.7338,  0.1233,  0.9948,  0.1063,  0.2780,  1.2285,  0.8469],
        [-0.0243,  0.3121,  0.9858,  0.2761,  0.7918,  0.5533,  0.1033,  0.6444]],
       requires_grad=True)
Parameter containing:
tensor([[0.9953, 0.8936, 0.6983, 0.6526, 0.5429, 0.1843, 0.0425, 0.3559],
        [0.3879, 0.7540, 0.8224, 0.4883, 0.2285, 0.0392, 0.7229, 0.6611]],
       requires_grad=True)
1.0285494476556778 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 6])
tensor([1, 6])
tensor([2, 5])
tensor([0, 3])
Parameter containing:
tensor([[ 1.2397,  0.5115,  0.0237,  0.6133,  0.5661,  1.1807,  0.3770,  0.2459],
        [ 0.6212,  0.5522, -0.0673,  0.3815,  0.4443,  0.3353,  1.1401,  0.6077]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.3572,  0.6842,  0.4742,  0.2268,  0.5734, -0.1771,  0.1016,  0.5178],
        [ 0.5426,  0.1600,  0.7864,  0.4610,  0.8520,  0.2233,  1.0554,  0.4740]],
       requires_grad=True)
Parameter containing:
tensor([[0.5645, 0.1209, 0.9867, 0.6087, 0.8649, 0.3739, 0.4078, 0.6305],
        [0.4567, 0.5120, 0.4178, 0.5178, 0.2002, 0.7662, 0.1676, 0.1492]],
       requires_grad=True)
Parameter containing:
tensor([[ 7.1186, -0.5631,  0.3118, -0.5048, -1.7628, -1.1212,  1.3026, -0.0424],
        [ 0.7136,  0.4975, -0.7969,  5.2005, -0.6287,  0.1981,  0.8253, -1.3894]],
       requires_grad=True)
0.7829696238040924 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([6, 1])
tensor([3, 6])
tensor([2, 6])
tensor([0, 3])
Parameter containing:
tensor([[-0.7647, -0.6798, -1.2269, -1.0373, -1.4445, -0.2149, 10.7110, -1.8688],
        [-0.5920, 13.2491, -1.0334, -2.8989, -1.1707, -0.3846,  0.2559, -2.9217]],
       requires_grad=True)
Parameter containing:
tensor([[-2.2005, -4.0567,  1.3039,  5.6736, -0.6186, -0.1756, -0.2952,  3.1052],
        [ 2.4285, -2.0506,  0.6266, -0.1101, -1.7446,  0.8244,  2.4512,  1.3723]],
       requires_grad=True)
Parameter containing:
tensor([[0.6432, 0.5462, 0.7793, 0.4250, 0.3561, 0.2657, 0.2790, 0.3480],
        [0.8824, 0.5088, 0.2729, 0.9342, 0.0467, 0.3607, 0.9610, 0.5568]],
       requires_grad=True)
Parameter containing:
tensor([[ 3.4250, -0.6457, -0.7435,  0.8838, -0.1258,  0.2491,  1.1377,  1.0793],
        [-0.4601, -0.2100,  1.1491,  3.2700,  0.2598, -0.7987,  1.0831,  1.1007]],
       requires_grad=True)
0.7227747477591038 31


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([6, 6])
tensor([6, 0])
tensor([6, 5])
tensor([0, 2])
Parameter containing:
tensor([[ 0.0303,  1.0493, -0.0733,  0.2931,  0.9463, -0.1980,  1.2516,  0.0283],
        [-0.0073,  0.9105,  0.1143,  0.2481, -0.5277,  0.1744,  3.6898, -0.1791]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.9566e-01,  1.5071e-03, -2.3843e-02,  6.6952e-02, -5.1659e-02,
          6.3395e-01,  4.0185e+00, -3.3571e-01],
        [ 1.6462e+00, -6.5123e-01,  6.9388e-01,  6.9733e-01,  5.0965e-01,
          7.8552e-01,  1.3050e+00, -1.0191e+00]], requires_grad=True)
Parameter containing:
tensor([[-0.2550,  0.9344,  0.7909,  0.7698,  0.1716,  0.2406,  1.1920,  0.1879],
        [ 0.3396,  0.2713, -0.1663,  0.0346,  0.0828,  0.8748,  0.6312,  0.5134]],
       requires_grad=True)
Parameter containing:
tensor([[ 9.1275, -0.8008,  0.8003, -1.9259, -0.2778,  0.0206, -3.5783,  0.8681],
        [-3.4311, -1.7041,  9.3452, -2.1896, -1.6230,  2.3487,  2.2516, -1.3507]],
       requires_grad=True)
1.1105342

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([7, 0])
tensor([7, 4])
tensor([5, 3])
tensor([7, 2])
Parameter containing:
tensor([[0.6629, 0.8565, 0.1753, 0.1651, 1.2677, 0.4114, 0.2718, 1.3095],
        [1.2827, 0.8014, 0.1745, 0.7240, 0.7729, 0.1435, 0.5195, 0.1124]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.2094,  0.9718,  0.4092,  0.7966,  0.5562, -0.1703, -0.0862,  0.9960],
        [ 0.0645, -0.2362,  0.2618,  0.5179,  0.9498,  0.7008,  0.3787,  0.2155]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.7444,  0.2654,  0.2945,  0.3506, -0.1135,  1.0369,  0.0183,  0.8297],
        [ 0.1979,  0.0166,  0.9369,  1.4111,  0.3598, -0.2018,  1.3027,  0.1781]],
       requires_grad=True)
Parameter containing:
tensor([[-4.2708e-01, -1.6268e-01, -1.8798e-01,  7.2806e-01,  4.5361e-01,
          4.0737e-01, -3.4571e-01,  3.2383e+00],
        [-7.8772e-01,  1.5038e+00,  1.7031e+00, -7.0151e-01, -1.6780e-01,
          1.6795e-01,  1.0223e+00, -1.7486e-03]], requires_grad=True)
0.7268946319818497 38


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([4, 3])
tensor([6, 4])
tensor([4, 5])
tensor([2, 4])
Parameter containing:
tensor([[-0.2450,  0.2589,  0.7618,  0.5315,  0.9201,  0.7319,  0.5521,  0.7595],
        [-0.2614,  0.7526,  0.5243,  0.7707,  0.5784,  0.6437,  0.7142,  0.2572]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.4618,  0.0492,  0.8811,  0.8833,  0.0243,  0.8038,  1.1544,  0.3182],
        [ 0.6853,  0.2737,  0.0677,  0.1280,  0.9281,  0.2208, -0.3089,  0.8706]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.4823,  0.1989,  0.0178, -0.2912,  3.1611, -0.8395,  0.1740,  0.3139],
        [ 1.1259,  0.7875, -0.0514,  0.6762, -0.3634,  2.8317,  1.9746, -0.9903]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.2102,  0.7942,  0.8442,  0.2588,  0.1297,  0.6320,  0.5955,  0.2078],
        [ 0.3477,  0.2838,  0.4341,  0.1303,  0.9156,  0.6080,  0.1817, -0.0703]],
       requires_grad=True)
0.8680248707532883 30


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([6, 2])
tensor([0, 7])
tensor([0, 5])
tensor([7, 6])
Parameter containing:
tensor([[-0.0407,  0.7823,  0.4713,  0.6215,  0.6140,  0.8036,  0.9535,  0.9182],
        [ 0.0551,  0.9357,  0.9961,  0.6249,  0.3865,  0.5871,  0.1694,  0.0634]],
       requires_grad=True)
Parameter containing:
tensor([[ 1.1341,  0.9073,  0.3252,  0.5982, -0.0440,  1.0708,  0.6163, -0.3422],
        [ 0.5999,  0.7295,  0.9327,  0.6321,  0.6991, -0.1990,  0.7195,  0.9493]],
       requires_grad=True)
Parameter containing:
tensor([[1.0334, 0.6726, 0.9499, 0.3013, 0.0438, 0.7300, 0.3600, 0.7107],
        [0.3030, 0.8018, 0.0827, 0.1560, 0.3716, 1.0227, 0.0515, 0.5381]],
       requires_grad=True)
Parameter containing:
tensor([[ 0.4631,  0.5475,  0.4734,  0.1090,  0.2091,  0.2701,  0.2372,  0.6350],
        [ 0.5768,  0.8937,  0.6198,  0.4600,  0.7633, -0.0101,  0.9276,  0.5441]],
       requires_grad=True)
1.0500913262367249 39
[Parameter containing:
tensor([[-0.0407,  0.7823,  0.4713,  0.6215,  0.6140,  