In [63]:
import matplotlib.pylab as plt
%matplotlib inline
import numpy as np
import torch
import tqdm
from sklearn.model_selection import StratifiedShuffleSplit

In [101]:
epoch_num = 2000
train_num = 32# 8
DIM = 32 # 4
D = 4
crit2 = torch.nn.L1Loss()

In [50]:
def make_angles():
    if D == 2:
        return [0, 180]
    step = 360/D
    return [i*step for i in range(D)]
make_angles()

[0, 180]

In [51]:
from scipy.ndimage import rotate
def gen(N1, N2, sigma=.1, seed=42, angle = 0.0):
    angle = np.pi/2 * angle/90
    N = N1+N2
    rs = np.random.RandomState(seed)
    x = rs.randn(N, DIM) * sigma
    y = x[:,0]*0.25 + x[:,1]*0.75
    
    if angle:
        M = np.array([[np.cos(angle), -np.sin(angle)], [np.sin(angle), np.cos(angle)]])
        x[:,:2] = (M@x[:,:2].T).T
    elems = list(range(len(x)))
    rs.shuffle(elems)
    trainval, test = elems[:N1], elems[N1:]
    xtrainval, ytrainval = x[trainval], y[trainval]
    xtest,ytest = x[test], y[test]
    elems = list(range(len(xtrainval)))
    rs.shuffle(elems)
    train, val = elems[:N1//2], elems[N1//2:]
    
    xtrain, ytrain = xtrainval[train], ytrainval[train]
    xval, yval = xtrainval[val], ytrainval[val]
    return (xtrain, ytrain), (xval, yval), (xtest, ytest)
xy, _,_ = gen(4, 2, angle=0)


In [66]:
# one-model
accs = []
for k in range(5):
    torch.manual_seed(42+k)
    (x_train, y_train), (x_val, y_val), (x_test, y_test) = gen(train_num, 2000, seed=42+k)
    x_train = np.concatenate([x_train, x_val])
    y_train = np.concatenate([y_train, y_val])
    
    model = torch.nn.Linear(DIM,1)
    opt = torch.optim.SGD(model.parameters(), lr=1e-1)
    crit = torch.nn.MSELoss()
    tq = tqdm.tqdm_notebook(range(epoch_num))
    
    for e in tq:
        opt.zero_grad()
        out = model(torch.tensor(x_train).float())[:,0]
        loss = crit(out, torch.tensor(y_train).float())
        loss.backward()
        opt.step()
        tq.set_description(str(loss.item()))
    acc = crit2(model(torch.tensor(x_test).float())[:,0], torch.tensor(y_test).float()).item()
    #acc = torch.eq(model(torch.tensor(x_test).float()).argmax(1), torch.tensor(y_test)).float().mean()
    accs.append(acc)
accs, np.mean(accs) 
# old: 2d: 0.02
# 2d: 0.03

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  tq = tqdm.tqdm_notebook(range(epoch_num))


  0%|          | 0/2000 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

([0.02337760105729103,
  0.03822921961545944,
  0.032760124653577805,
  0.036191198974847794,
  0.030610783025622368],
 0.03223378546535969)

In [91]:
# shared-model
accs = []
for k in range(5):
    torch.manual_seed(42+k)
    x_train, y_train, x_test, y_test = [],[],[],[]
    for angle in make_angles():
        (_x_train, _y_train), (x_val, y_val), (_x_test, _y_test) = gen(train_num, 2000, seed=42+k+int(angle),
                                                                       angle=angle)
        
        _x_train = np.concatenate([_x_train, x_val])
        _y_train = np.concatenate([_y_train, y_val])
        x_train.extend(_x_train)
        y_train.extend(_y_train)
        x_test.extend(_x_test)
        y_test.extend(_y_test)
    
    model = torch.nn.Linear(DIM,1) 
    opt = torch.optim.SGD(model.parameters(), lr=1e-1)
    crit = torch.nn.MSELoss()
    tq = tqdm.tqdm_notebook(range(epoch_num))
    
    for e in tq:
        opt.zero_grad()
        out = model(torch.tensor(x_train).float())[:,0]
        loss = crit(out, torch.tensor(y_train).float())
        loss.backward()
        opt.step()
        tq.set_description(str(loss.item()))
    acc = crit2(model(torch.tensor(x_test).float())[:,0], torch.tensor(y_test).float()).item()
    #acc = torch.eq(model(torch.tensor(x_test).float()).argmax(1), torch.tensor(y_test)).float().mean()
    accs.append(acc)
accs, np.mean(accs) 
# old: 
# 2d: 0.07
# 3d: 0.07

# 2d: 0.08
# 3d: 0.07
# 4d: 0.07

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  tq = tqdm.tqdm_notebook(range(epoch_num))


  0%|          | 0/2000 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

([0.06856674700975418,
  0.07092997431755066,
  0.06963121145963669,
  0.07302738726139069,
  0.07072072476148605],
 0.07057520896196365)

In [102]:
# perfect-case
accs = []
for k in range(5):
    torch.manual_seed(42+k)
    x_train, y_train, x_test, y_test = [],[],[],[]
    for angle in make_angles():
        (_x_train, _y_train), (x_val, y_val), (_x_test, _y_test) = gen(train_num, 2000, seed=42+k+int(angle),
                                                                       angle=0.0)
        
        _x_train = np.concatenate([_x_train, x_val])
        _y_train = np.concatenate([_y_train, y_val])
        x_train.extend(_x_train)
        y_train.extend(_y_train)
        x_test.extend(_x_test)
        y_test.extend(_y_test)
    
    model = torch.nn.Linear(DIM,1) 
    opt = torch.optim.SGD(model.parameters(), lr=1e-1)
    crit = torch.nn.MSELoss()
    tq = tqdm.tqdm_notebook(range(epoch_num))
    
    for e in tq:
        opt.zero_grad()
        out = model(torch.tensor(x_train).float())[:,0]
        loss = crit(out, torch.tensor(y_train).float())
        loss.backward()
        opt.step()
        tq.set_description(str(loss.item()))
    acc = crit2(model(torch.tensor(x_test).float())[:,0], torch.tensor(y_test).float()).item()
    #acc = torch.eq(model(torch.tensor(x_test).float()).argmax(1), torch.tensor(y_test)).float().mean()
    accs.append(acc)
accs, np.mean(accs) 
# old: 
#2d: 0.006820007972419262
#3d: 0.0062

# 2d: 0.016 
# 3d: 0.008
# 4d: 0.009

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  tq = tqdm.tqdm_notebook(range(epoch_num))


  0%|          | 0/2000 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

  0%|          | 0/2000 [00:00<?, ?it/s]

([0.010324080474674702,
  0.01435939408838749,
  0.01239052601158619,
  0.010584849864244461,
  0.018240517005324364],
 0.013179873488843441)

In [93]:
class Rot(torch.nn.Module):
    def __init__(self, angle = None):
        super().__init__()
        if angle is None:
            angle = torch.randn(1)*90
        else:
            angle = torch.tensor(angle).float()
        self.angle = torch.nn.Parameter(angle)
    
    def forward(self, x):
        angle = self.angle * np.pi/2 / 90
        c = torch.cos(angle)
        s = torch.sin(angle)
        M = torch.vstack([torch.hstack([c, -s]), torch.hstack([s, c])])
        result = x * 1.0
        result[:,:2] = (M@x[:,:2].T).T
        return result


In [94]:
def make_inference(x, rots, models, gammas, use_softmax=True, only_angle=False, c_head = False):
    if use_softmax:
        g0 = torch.nn.functional.softmax(gammas[0])
    else:
        g0 = (gammas[0] == gammas[0].max()) * 1
        
    x = torch.sum(torch.cat([(r(x)*g0[i]).unsqueeze(2) for i, r in enumerate(rots)], 2), 2)
    if only_angle:
        return x 
    if c_head:
        g1 = torch.zeros(gammas[1].shape)
        g1[0] += 0
    if use_softmax:
        g1 = torch.nn.functional.softmax(gammas[1])
    else:
        g1 = (gammas[1] == gammas[1].max()) * 1

    x = torch.sum(torch.cat([(m(x)*g1[i]).unsqueeze(2) for i, m in enumerate(models)], 2), 2)
    
    return x


In [95]:
nn = torch.nn
class MdTripletLoss(nn.Module):
    def __init__(self, m =0.0, p=2, subset_size = 8, k = 1.0):
        super(MdTripletLoss, self).__init__()
        self.triplet_loss = nn.TripletMarginLoss(margin=m, p=p)
        self.rs = np.random.RandomState(42)
        self.subset_size = subset_size
        self.k = k
        
    def forward(self, h1: torch.Tensor, h2: torch.Tensor, labels1: torch.LongTensor, labels2: torch.LongTensor):
        """
        :param: h1: hidden representations of size (bs, *), anchors
        :param: h1: hidden representations of size (bs, *), positives and negatives candidates
        """
        
        bs = h1.size(0)
        if bs > self.subset_size:
            elems = list(range(bs))
            self.rs.shuffle(elems)
            h1 = h1[elems[:self.subset_size]]
            labels1 = labels1[elems[:self.subset_size]]
            self.rs.shuffle(elems)
            h2 = h2[elems[:self.subset_size]]
            labels2 = labels2[elems[:self.subset_size]]
            bs = self.subset_size
            
        
        h1 = h1.view(bs, -1)
        h2 = h2.view(bs, -1)
        
        loss = 0.0
        anch =  []
        pos = []
        neg = []
        
        for i in range(len(h1)):
            found = False 
            for j in range(len(h2)):
                if found:
                    break
                for k in range(j+1, len(h2)):
                    if   abs(labels1[i] - labels2[j]) > abs(labels1[i] - labels2[k]):\
                         #torch.linalg.norm(h1[i] - h2[j]) < torch.linalg.norm(h1[i] - h2[k]) and\
                        anch.append(h1[i])
                        pos.append(h2[k])
                        neg.append(h2[j])
                        found = True
                        break
                
        if len(anch) >0:
            a = torch.vstack(anch)
            p = torch.vstack(pos)
            n = torch.vstack(neg)
            loss = self.triplet_loss(a, p, n)
            return loss
        else:
            return 0.0
    
def calc_params(gammas):
    cnt = 0
    rots = set()
    for i in range(D):
        rots.add(gammas[i][0].argmax().item())
    
    linears = set()
    for i in range(D):
        linears.add(gammas[i][1].argmax().item())
    return len(rots) + len(linears)*2


In [96]:
class ArchModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        
        self.gammas = [torch.nn.Parameter(torch.rand(2, D)) for _ in range(D)]
        self.rots = torch.nn.ModuleList([Rot() for _ in range(D)])
        self.models = torch.nn.ModuleList([torch.nn.Linear(DIM, 1) for _ in range(D)])
    def forward(self, x, d, only_angle = False, use_softmax = True):
        return make_inference(x, self.rots, self.models, self.gammas[d],
                              use_softmax=use_softmax, only_angle=only_angle)
   
def multi_js_divergence(alphas) -> torch.tensor:
    alpha_full = torch.stack(alphas, dim=0)
    centroid = torch.mean(alpha_full, 0)
    d_centroid = torch.distributions.Categorical(probs=centroid)
    js = 0.0
    for alpha in alphas:
        d = torch.distributions.Categorical(probs=alpha)
        js += 1.0/len(alphas) * \
            torch.distributions.kl.kl_divergence(d, d_centroid)
    return js



In [97]:
def dartslike2(triplet_coef_search = 0.0, triplet_coef_train = 0.0, js_coef = 0.0, unroll: bool = True):
    lr = 0.1 # for models
    lr0 = 0.01 # for alpha
    lr2 = 0.01
    accs = []
    param_num = []
    tl = MdTripletLoss(p=1.0, subset_size=9999)
    for k in range(5):
        torch.manual_seed(42+k)
        x_train, y_train, x_val, y_val, x_test, y_test = [],[],[],[],[],[]
        for angle in make_angles():
            (_x_train, _y_train), (_x_val, _y_val), (_x_test, _y_test) = gen(train_num, 2000, seed=42+k+int(angle),
                                                                             angle=angle)
            x_train.append(_x_train)
            y_train.append(_y_train)
            x_val.append(_x_val)
            y_val.append(_y_val)
            x_test.append(_x_test)
            y_test.append(_y_test)
        model = ArchModel()
        rs = np.random.RandomState(42+k)
        
        opt = torch.optim.SGD([{'params': model.rots.parameters(), 'lr': lr0},
                              {'params': model.models.parameters(), 'lr': lr}],
                              lr=lr)
                              
                              
        opt2 = torch.optim.SGD(model.gammas, lr=lr2)

        crit = torch.nn.MSELoss()
        tq = tqdm.tqdm_notebook(range(epoch_num//2))
        losses = []
        losses2 = []
        for e in tq:
            
            
            for d in range(D):
            
                opt2.zero_grad()
                d2 = rs.choice([i for i in range(D) if i != d])

                if unroll:
                    opt.zero_grad()
                    x = torch.tensor(x_train[d]).float()
                    y = torch.tensor(y_train[d]).float()

                    x2 = torch.tensor(x_train[d2]).float()
                    y2 = torch.tensor(y_train[d2]).float()

                    out_angle1 = model(x, d, only_angle=True)
                    out_angle2 = model(x2, d2, only_angle=True)

                    out = model(x, d)[:,0]
                    loss = crit(out, y)
                    if triplet_coef_search:
                        tloss = tl(out_angle1, out_angle2, y, y2) * triplet_coef_search
                        loss += tloss
                    grads = torch.autograd.grad(loss, model.parameters())
                    with torch.no_grad():
                        for p,g in zip(model.parameters(), grads):
                            #print ('g', g)
                            #print ('v0', p)
                            p = p + lr * g
                            #print ('v1', p)
                            #break
                x = torch.tensor(x_val[d]).float()
                y = torch.tensor(y_val[d]).float()

                x2 = torch.tensor(x_val[d2]).float()
                y2 = torch.tensor(y_val[d2]).float()

                out_angle1 = model(x, d, only_angle=True)
                out_angle2 = model(x2, d2, only_angle=True)
                
                

                out = model(x, d)[:,0]
                loss2 = crit(out, y) 
                if triplet_coef_search:
                        tloss = tl(out_angle1, out_angle2, y, y2) * triplet_coef_search

                        loss2 += tloss
                if js_coef:
                    loss2 += multi_js_divergence(model.gammas).mean() * js_coef
                #print ('gg', grads[0])
                loss2.backward()
                #print ('gg', grads[0])
                #if triplet_coef_search:
                #    print ([(r.angle.item(), r.angle.grad) for r in rots]) 
                opt2.step()
                
                if unroll:
                    with torch.no_grad():
                        for i,g in zip(model.parameters(), grads):
                            #print ('g', g)
                            p = p - lr * g
                            #print ('v2', p)
                            #break
                #1/0
                opt.zero_grad()
                x = torch.tensor(x_train[d]).float()
                y = torch.tensor(y_train[d]).float()

                x2 = torch.tensor(x_train[d2]).float()
                y2 = torch.tensor(y_train[d2]).float()

                out_angle1 = model(x, d, only_angle=True)
                out_angle2 = model(x2, d2, only_angle=True)
                
                out = model(x, d)[:,0]
                loss = crit(out, y)
                if triplet_coef_search:
                    tloss = tl(out_angle1, out_angle2, y, y2) * triplet_coef_search
                    loss += tloss
                loss.backward()
                opt.step()
                losses.append(loss.item())
                losses2.append(loss2.item())
            
            tq.set_description(str(np.mean(losses))+';'+str(np.mean(losses2)))

        tq = tqdm.tqdm_notebook(range(epoch_num//2))
        
        
        x_train = np.concatenate([x_train, x_val], axis=1)
        y_train =  np.concatenate([y_train, y_val], axis=1)
        """
        new_model = ArchModel()
        for g_old, g_new in zip(model.gammas, new_model.gammas):
            g_new.data *= 0.0
            g_new.data += g_old.data 
        model = new_model
        
        gammas = model.gammas
        gammas[0][1].data *=0 
        gammas[1][1].data *=0 
        
        gammas[0][1, 1].data +=1 
        gammas[1][1, 1].data +=1
        
        
        gammas[0][0].data *=0 
        gammas[1][0].data *=0 
        
        gammas[0][0, 1].data +=1 
        gammas[1][0, 0].data +=1
        
        model.rots[0].angle.data *= 0.0
        model.rots[1].angle.data *= 0.0
        model.rots[1].angle.data += 180.0
        """
        
        for g in model.gammas:
            print (g.argmax(1))
        
        for g in model.gammas:
            print (g)
        
        opt = torch.optim.SGD([{'params': model.rots.parameters(), 'lr': lr0},
                              {'params': model.models.parameters(), 'lr': lr}],
                              lr=lr)
        
        for e in tq:

            for d in range(D):
                loss = 0.0
                opt.zero_grad()

                d2 = rs.choice([i for i in range(D) if i != d])

                x = torch.tensor(x_train[d]).float()
                y = torch.tensor(y_train[d]).float()
                out = model(x, d, use_softmax=False)[:,0]

               
                loss = crit(out, y) 
                if triplet_coef_train:
                    x2 = torch.tensor(x_train[d2]).float()
                    y2 = torch.tensor(y_train[d2]).float()

                    out_angle1 = model(x, d, only_angle=True, use_softmax=False)
                    out_angle2 = model(x2, d2, only_angle=True, use_softmax=False)

                    tloss = tl(out_angle1, out_angle2, y, y2)* triplet_coef_train
                    loss += tloss
                    

                loss.backward()
                
            #print ('before', [r.angle.item() for r in rots])

                opt.step()
            #print ([r.angle.grad for r in rots])
            #print ('after', [r.angle.item() for r in rots])

            angles= [str(r.angle.item()) for r in model.rots]
            #tq.set_description(str(loss.item())+';'+str(loss2.item()))
            tq.set_description(' '.join(angles))
        total = 0
        for d in range(D):

            out = model(torch.tensor(x_test[d]).float(), d, use_softmax=False)
            total = crit2(out[:,0], torch.tensor(y_test[d]).float()).item()

        acc = total/D
        accs.append(acc)
        param_num.append(calc_params(model.gammas))
        print (acc, param_num[-1])
    return (np.mean(accs), np.mean(param_num))
#dartslike2(10000.0, 10000.0)

In [98]:
# no reg
dartslike2(0.0, 0.0)
# old:
# 2d: 0.02343731336295605, 4.0
# 3d: 0.016, 6.6

#2d: 0.03537779226899147, 4.0
#3d: 0.017075569182634354, 6.6
#4d: (0.01488051600754261, 9.2)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  tq = tqdm.tqdm_notebook(range(epoch_num//2))


  0%|          | 0/1000 [00:00<?, ?it/s]

  g0 = torch.nn.functional.softmax(gammas[0])
  g1 = torch.nn.functional.softmax(gammas[1])
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  tq = tqdm.tqdm_notebook(range(epoch_num//2))


  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([3, 3])
tensor([0, 0])
tensor([0, 3])
tensor([2, 2])
Parameter containing:
tensor([[0.8832, 0.9156, 0.3823, 0.9583],
        [0.3901, 0.5953, 0.2587, 0.7974]], requires_grad=True)
Parameter containing:
tensor([[0.9423, 0.1347, 0.9328, 0.5924],
        [0.8704, 0.6003, 0.7208, 0.4160]], requires_grad=True)
Parameter containing:
tensor([[0.8833, 0.5729, 0.2679, 0.6293],
        [0.2820, 0.4298, 0.3076, 0.8201]], requires_grad=True)
Parameter containing:
tensor([[0.1050, 0.2672, 0.3603, 0.2006],
        [0.5554, 0.0548, 0.8893, 0.0806]], requires_grad=True)
0.01534047070890665 9


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 2])
tensor([2, 3])
tensor([2, 3])
tensor([1, 0])
Parameter containing:
tensor([[0.4538, 0.1979, 0.9180, 0.3479],
        [0.1455, 0.1039, 0.5600, 0.0814]], requires_grad=True)
Parameter containing:
tensor([[0.7505, 0.6231, 0.9386, 0.1337],
        [0.5298, 0.5135, 0.5809, 0.6718]], requires_grad=True)
Parameter containing:
tensor([[0.4271, 0.2755, 0.8415, 0.1554],
        [0.1208, 0.7169, 0.3439, 0.8650]], requires_grad=True)
Parameter containing:
tensor([[0.1315, 0.8373, 0.5637, 0.4132],
        [0.8664, 0.2721, 0.5899, 0.3252]], requires_grad=True)
0.017621809616684914 8


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 1])
tensor([0, 0])
tensor([2, 2])
tensor([3, 3])
Parameter containing:
tensor([[0.7170, 0.7306, 0.8312, 0.1337],
        [0.6284, 0.7161, 0.2912, 0.2213]], requires_grad=True)
Parameter containing:
tensor([[0.9829, 0.8780, 0.9631, 0.7805],
        [0.8010, 0.0684, 0.7224, 0.3345]], requires_grad=True)
Parameter containing:
tensor([[0.5378, 0.7121, 0.7938, 0.2794],
        [0.8887, 0.6729, 0.8955, 0.4499]], requires_grad=True)
Parameter containing:
tensor([[0.4799, 0.3503, 0.0462, 0.7504],
        [0.7811, 0.4300, 0.1132, 0.8415]], requires_grad=True)
0.00875304639339447 11


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 2])
tensor([0, 2])
tensor([3, 3])
tensor([1, 1])
Parameter containing:
tensor([[0.1854, 0.9671, 0.6828, 0.8952],
        [0.0501, 0.5841, 0.7377, 0.0768]], requires_grad=True)
Parameter containing:
tensor([[0.7844, 0.1480, 0.0381, 0.1041],
        [0.4112, 0.2540, 0.7913, 0.5476]], requires_grad=True)
Parameter containing:
tensor([[0.6850, 0.6488, 0.6122, 0.9858],
        [0.1825, 0.1282, 0.4311, 0.9626]], requires_grad=True)
Parameter containing:
tensor([[0.6246, 0.7819, 0.4108, 0.3465],
        [0.2900, 0.7298, 0.3974, 0.6055]], requires_grad=True)
0.015362891368567944 9


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 1])
tensor([2, 0])
tensor([1, 2])
tensor([2, 0])
Parameter containing:
tensor([[0.6566, 0.0582, 0.5212, 0.1621],
        [0.7452, 0.8399, 0.0739, 0.4655]], requires_grad=True)
Parameter containing:
tensor([[0.1479, 0.3602, 0.9323, 0.0168],
        [0.9702, 0.4104, 0.8734, 0.8723]], requires_grad=True)
Parameter containing:
tensor([[0.6589, 0.8172, 0.4793, 0.7366],
        [0.0287, 0.7103, 0.7319, 0.1095]], requires_grad=True)
Parameter containing:
tensor([[0.1454, 0.2626, 0.9038, 0.6625],
        [0.9824, 0.9360, 0.8630, 0.9663]], requires_grad=True)
0.017324361950159073 9


(0.01488051600754261, 9.2)

In [99]:
# triplets
# old:
#2d: 0.008606004761531949, 4.8

# 2d: 0.0240134846419096, 4.6
# 3d: 0.010908304899930953, 7.0
# 4d: (0.011116203665733338, 9.8)
dartslike2(10000.0, 10000.0)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  tq = tqdm.tqdm_notebook(range(epoch_num//2))


  0%|          | 0/1000 [00:00<?, ?it/s]

  g0 = torch.nn.functional.softmax(gammas[0])
  g1 = torch.nn.functional.softmax(gammas[1])
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  tq = tqdm.tqdm_notebook(range(epoch_num//2))


  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([3, 3])
tensor([0, 0])
tensor([1, 3])
tensor([2, 2])
Parameter containing:
tensor([[-2.5283, -2.7558,  0.6322,  7.7914],
        [ 0.3917,  0.5991,  0.2530,  0.7977]], requires_grad=True)
Parameter containing:
tensor([[ 4.3672, -3.1780,  0.2710,  1.1419],
        [ 0.8674,  0.5886,  0.7350,  0.4166]], requires_grad=True)
Parameter containing:
tensor([[-0.1411,  4.5671, -0.9066, -1.1660],
        [ 0.2822,  0.4345,  0.3020,  0.8210]], requires_grad=True)
Parameter containing:
tensor([[-3.7467, -3.4386,  7.2505,  0.8678],
        [ 0.5448,  0.0396,  0.9222,  0.0737]], requires_grad=True)
0.009076532907783985 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([3, 2])
tensor([0, 3])
tensor([2, 3])
tensor([1, 0])
Parameter containing:
tensor([[-1.5949, -0.8412, -2.7876,  7.1413],
        [ 0.1515,  0.0946,  0.5703,  0.0744]], requires_grad=True)
Parameter containing:
tensor([[ 8.8506, -2.1934, -2.7439, -1.4675],
        [ 0.5220,  0.5232,  0.5605,  0.6903]], requires_grad=True)
Parameter containing:
tensor([[-1.1780, -2.2187,  7.4021, -2.3059],
        [ 0.1146,  0.7202,  0.3350,  0.8769]], requires_grad=True)
Parameter containing:
tensor([[-2.6189,  8.4810, -2.7957, -1.1210],
        [ 0.8628,  0.2783,  0.5840,  0.3284]], requires_grad=True)
0.010668590664863586 10


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([3, 1])
tensor([2, 0])
tensor([1, 2])
tensor([1, 3])
Parameter containing:
tensor([[-1.1542, -2.6737, -1.6668,  7.9071],
        [ 0.6268,  0.7216,  0.2915,  0.2171]], requires_grad=True)
Parameter containing:
tensor([[-0.7223, -1.0273,  6.6966, -1.3425],
        [ 0.8028,  0.0657,  0.7225,  0.3353]], requires_grad=True)
Parameter containing:
tensor([[-3.0036,  5.4971,  2.5948, -2.7653],
        [ 0.8934,  0.6669,  0.8978,  0.4489]], requires_grad=True)
Parameter containing:
tensor([[ 1.2382,  5.6749, -3.1343, -2.1520],
        [ 0.7782,  0.4330,  0.1133,  0.8414]], requires_grad=True)
0.009599536657333374 11


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 2])
tensor([1, 2])
tensor([3, 3])
tensor([2, 1])
Parameter containing:
tensor([[-1.9955,  8.2286, -1.4443, -2.0582],
        [ 0.0505,  0.5722,  0.7506,  0.0754]], requires_grad=True)
Parameter containing:
tensor([[-0.3737,  1.5546,  1.0041, -1.1102],
        [ 0.4090,  0.2518,  0.7958,  0.5475]], requires_grad=True)
Parameter containing:
tensor([[-1.0391, -1.8535, -2.1180,  7.9424],
        [ 0.1824,  0.1291,  0.4295,  0.9634]], requires_grad=True)
Parameter containing:
tensor([[-1.5081, -2.0694,  7.7285, -1.9873],
        [ 0.2860,  0.7429,  0.3780,  0.6158]], requires_grad=True)
0.008096055127680302 9


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 1])
tensor([1, 0])
tensor([0, 2])
tensor([0, 0])
Parameter containing:
tensor([[-1.4323, -1.3248,  2.9234,  1.2319],
        [ 0.7404,  0.8466,  0.0721,  0.4656]], requires_grad=True)
Parameter containing:
tensor([[-0.5106,  7.0278, -2.4625, -2.5975],
        [ 0.9712,  0.4126,  0.8701,  0.8724]], requires_grad=True)
Parameter containing:
tensor([[ 6.9504, -0.2166, -2.0633, -1.9785],
        [ 0.0268,  0.7147,  0.7300,  0.1088]], requires_grad=True)
Parameter containing:
tensor([[ 6.7994, -2.0254, -2.1724, -0.6273],
        [ 0.9807,  0.9333,  0.8641,  0.9696]], requires_grad=True)
0.01814030297100544 9


(0.011116203665733338, 9.8)

In [100]:
# js
# old: 2d: 0.029673847928643228, 3.0

# 2d: (0.04007584974169731, 3.0)
# 3d: (0.025106737514336903, 3.0)
# 4d: (0.01823251470923424, 3.0)
dartslike2(js_coef=10.0)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  tq = tqdm.tqdm_notebook(range(epoch_num//2))


  0%|          | 0/1000 [00:00<?, ?it/s]

  g0 = torch.nn.functional.softmax(gammas[0])
  g1 = torch.nn.functional.softmax(gammas[1])
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  tq = tqdm.tqdm_notebook(range(epoch_num//2))


  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([0, 2])
tensor([0, 2])
tensor([0, 2])
tensor([0, 2])
Parameter containing:
tensor([[1.0081, 0.6950, 0.6811, 0.8435],
        [0.5465, 0.4668, 0.6008, 0.5778]], requires_grad=True)
Parameter containing:
tensor([[0.9021, 0.6220, 0.6094, 0.7548],
        [0.6656, 0.5692, 0.7325, 0.7036]], requires_grad=True)
Parameter containing:
tensor([[0.7723, 0.5325, 0.5219, 0.6464],
        [0.5102, 0.4353, 0.5604, 0.5386]], requires_grad=True)
Parameter containing:
tensor([[0.3138, 0.2163, 0.2120, 0.2626],
        [0.5466, 0.4671, 0.6006, 0.5783]], requires_grad=True)
0.01659386046230793 3


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 3])
tensor([2, 3])
tensor([2, 3])
tensor([2, 3])
Parameter containing:
tensor([[0.4698, 0.5202, 0.8046, 0.2793],
        [0.2964, 0.2886, 0.3259, 0.3529]], requires_grad=True)
Parameter containing:
tensor([[0.5782, 0.6403, 0.9900, 0.3438],
        [0.5400, 0.5263, 0.5959, 0.6434]], requires_grad=True)
Parameter containing:
tensor([[0.4239, 0.4693, 0.7259, 0.2520],
        [0.5564, 0.5415, 0.6116, 0.6620]], requires_grad=True)
Parameter containing:
tensor([[0.4683, 0.5185, 0.8020, 0.2784],
        [0.5286, 0.5150, 0.5825, 0.6301]], requires_grad=True)
0.021483253687620163 3


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 0])
tensor([2, 0])
tensor([2, 0])
tensor([2, 0])
Parameter containing:
tensor([[0.6970, 0.6830, 0.7207, 0.5248],
        [0.6653, 0.4395, 0.4883, 0.4274]], requires_grad=True)
Parameter containing:
tensor([[0.9547, 0.9357, 0.9865, 0.7186],
        [0.7341, 0.4853, 0.5386, 0.4712]], requires_grad=True)
Parameter containing:
tensor([[0.6463, 0.6331, 0.6677, 0.4865],
        [0.9698, 0.6405, 0.7119, 0.6227]], requires_grad=True)
Parameter containing:
tensor([[0.5092, 0.4988, 0.5262, 0.3833],
        [0.7988, 0.5278, 0.5861, 0.5129]], requires_grad=True)
0.017450008541345596 3


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([1, 2])
tensor([1, 2])
tensor([1, 2])
tensor([1, 2])
Parameter containing:
tensor([[0.6939, 0.8320, 0.6011, 0.8086],
        [0.2587, 0.4419, 0.5850, 0.5787]], requires_grad=True)
Parameter containing:
tensor([[0.3910, 0.4685, 0.3387, 0.4556],
        [0.2897, 0.4950, 0.6554, 0.6479]], requires_grad=True)
Parameter containing:
tensor([[0.7001, 0.8386, 0.6064, 0.8160],
        [0.2896, 0.4948, 0.6551, 0.6482]], requires_grad=True)
Parameter containing:
tensor([[0.5363, 0.6427, 0.4646, 0.6250],
        [0.2885, 0.4929, 0.6528, 0.6450]], requires_grad=True)
0.016816221177577972 3


  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]

tensor([2, 0])
tensor([2, 0])
tensor([2, 0])
tensor([2, 0])
Parameter containing:
tensor([[0.3659, 0.3642, 0.5757, 0.3899],
        [0.6513, 0.6243, 0.5918, 0.5805]], requires_grad=True)
Parameter containing:
tensor([[0.4320, 0.4299, 0.6796, 0.4602],
        [0.8594, 0.8216, 0.7803, 0.7654]], requires_grad=True)
Parameter containing:
tensor([[0.5729, 0.5701, 0.9010, 0.6102],
        [0.5532, 0.5290, 0.5021, 0.4929]], requires_grad=True)
Parameter containing:
tensor([[0.4911, 0.4887, 0.7725, 0.5232],
        [0.9988, 0.9529, 0.9054, 0.8891]], requires_grad=True)
0.018819229677319527 3


(0.01823251470923424, 3.0)