In [34]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from timeit import default_timer as timer
from scipy.special import softmax



In [35]:
df = pd.read_csv('ESS8_data.csv')

In [36]:
X = df[['SD1', 'PO1', 'UN1', 'AC1', 'SC1',
       'ST1', 'CO1', 'TR1', 'HD1', 'AC2', 'SC2', 'ST2',
       'CO2', 'PO2', 'BE2', 'TR2', 'HD2']].iloc[range(1000),:]
X = X.to_numpy().T
N, M = X.T.shape
K = 5

In [37]:
# applies both for eq. 7 and 8
def error(a_i,Qt,qt):
    return (0.5 * a_i.T @ Qt @ a_i) - (qt.T @ a_i)

## Defining the helper functions applyConstraints and furthestSum

applyConstrants ensures that the constraints of the problem are upheld i.e. $e_{ij} \geq 0$ and $1^T \textbf{e}_j = 1$


furthestSum is an effective way to initialize the values of $\textbf{b}_i$

In [38]:
def applyConstrains(M):
    return softmax(M, axis = 0)

In [39]:
def furthestSum(X):
    # Choose a random point for initialization
    idx = int(np.random.choice(range(0,N)))
    x_j = X[:,idx]

    j_news = list()
    j_news.append(idx)

    excluded = [idx]

    # Loop over the K archetypes
    for n in range(K):
        best_val = 0
        best_idx = 0
        # Loop over all unseen samples
        for i in range(N-len(excluded)):
            if i not in excluded:
                val = 0
                # sum over each element for each point
                for ele in j_news:
                    for j in range(M):
                        
                        val += np.abs(X[j,i] - X[j , ele])
                if val > best_val:
                    best_val = val
                    best_idx = i
                    
        # 
        j_news.append(int(best_idx))
        excluded.append(best_idx)
        # Remove the random initialization
        if n == 0:
            j_news.pop(0)
            excluded.pop(0)
        
    return j_news


#init_vals_b = X[init_idxs[i],:].astype(np.float64)
#init_vals_bt = torch.tensor(init_vals_b, requires_grad = False).float()

#init_vals_b.shape


## Initializing variables for AA

In [40]:
# Initializing Z through furthest sum
#Z = X[:,furthestSum(X)]
import random
Z = X[:,random.sample(range(1, M), K)]
print(Z.shape)
R = X.T @ X
Rt = torch.tensor(X.T @ X,requires_grad=False).float()

RSS_values = list()
A = np.zeros((K,N))#.tolist()
B = np.zeros((N,K))#.tolist()

torch.manual_seed(42)


(17, 5)


<torch._C.Generator at 0x1eb8ce3a5d0>

In [45]:
start = timer()
torch.manual_seed(42)

RSS_values = list()
A = np.zeros((K,N))#.tolist()
B = np.zeros((N,K))#.tolist()

# LOOP UNTIL RSS IS LOW
for n in range(2): #N
    Q = Z.T @ Z
    Qt = torch.tensor(Q,requires_grad=False).float()

    # LOOP THROUGH ENTIRE A
    for i in range(N):
        q = Z.T @ X[:,i]
        qt = torch.tensor(q,requires_grad=False).float()
        
        if n == 0:
            a_i = torch.autograd.Variable(torch.rand(K, 1), requires_grad=True) # eller er det Kx1 ?
            optimizer_a = optim.Adam([a_i], lr=0.01)
        
        stop_loss = 1e-5
        step_size = 0.001 # stop_loss / 3.0

        err = error(a_i,Qt,qt)
        print('Loss before: %s' % (torch.norm( err, p=2)))

        # TRAINING LOOP
        for k in range(100): # 100000
            #optimizer_a.zero_grad()
            Delta = error(a_i,Qt,qt) 
            L = torch.norm(Delta, p=2)
            L.backward()
            optimizer_a.step()
            #a_i.data -= step_size * a_i.grad.data # step
            #a_i.grad.data.zero_()
            if k % 10000 == 0: print('Loss for a is %s at iteration %i' % (L, k))
            if abs(L) < stop_loss:
                print('It took %s iterations to achieve %s loss.' % (k, step_size))
                break
        
        A[:,i] = np.array(a_i.tolist()).flatten() 
        
        print('Loss after: %s' % (torch.norm( error(a_i,Qt,qt) )))
    
    A = applyConstrains(A)
    Z = X @ A.T @ np.linalg.inv(A@A.T)
    
    
    # LOOP THROUGH ENTIRE B
    for i in range(K): #K
        r = X.T @ Z[:,i]
        rt = torch.tensor(r,requires_grad=False).float()
        
        if n == 0:
            b_i = torch.autograd.Variable(torch.randn(N,1), requires_grad=True)
            optimizer_b = optim.Adam([b_i], lr=0.01)
        
        
        stop_loss = 1e-5
        step_size = 0.001 # stop_loss / 3.0

        err = error(b_i,Rt,rt)
        #print('Loss before: %s' % (torch.norm( err, p=2)))

        # TRAINING LOOP
        for k in range(10000): # 100000
            optimizer_b.zero_grad()
            Delta = error(b_i,Rt,rt)
            L = torch.norm(Delta, p=2)
            L.backward()
            optimizer_b.step()
            
            # b_i.data -= step_size * b_i.grad.data # step
            # b_i.grad.data.zero_()
            if k % 10000 == 0: print('Loss for b is %s at iteration %i' % (L, k))
            if abs(L) < stop_loss:
                print('It took %s iterations to achieve %s loss.' % (k, step_size))
                break

        B[:,i] = np.array(b_i.tolist()).flatten() 
        #print('Loss after: %s' % (torch.norm( error(b_i,Rt,rt) )))    
    
    # apply softmax here
    B = applyConstrains(B)
    Z = X @ B
    
    print(n)
    Zt = torch.tensor(Z, requires_grad=False).float()
    At = torch.tensor(A, requires_grad=False).float()
    Xt = torch.tensor(X,requires_grad=False).float()
    print("RSS at n=%s" % n, torch.norm(Xt-Zt@At,p='fro'))
    RSS_values.append( torch.norm(Xt-Zt@At,p='fro'))
    
end = timer()

print("It took: {0} seconds to finish running".format(end - start))
print("The best RSS value was", min(RSS_values))
print(RSS_values)

Loss before: tensor(300.8392, grad_fn=<NormBackward1>)
Loss for a is tensor(300.8392, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(323.7737, grad_fn=<CopyBackwards>)
Loss before: tensor(2.2918, grad_fn=<NormBackward1>)
Loss for a is tensor(2.2918, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(126.4243, grad_fn=<CopyBackwards>)
Loss before: tensor(317.4768, grad_fn=<NormBackward1>)
Loss for a is tensor(317.4768, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(327.1076, grad_fn=<CopyBackwards>)
Loss before: tensor(207.6703, grad_fn=<NormBackward1>)
Loss for a is tensor(207.6703, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(58.7195, grad_fn=<CopyBackwards>)
Loss before: tensor(12.1112, grad_fn=<NormBackward1>)
Loss for a is tensor(12.1112, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(204.7800, grad_fn=<CopyBackwards>)
Loss before: tensor(51.8335, grad_fn=<NormBackward1>)
Loss for a is tensor(51.8335, grad_fn=<NormBackward1>

Loss after: tensor(42.0948, grad_fn=<CopyBackwards>)
Loss before: tensor(33.9076, grad_fn=<NormBackward1>)
Loss for a is tensor(33.9076, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(137.4123, grad_fn=<CopyBackwards>)
Loss before: tensor(51.3154, grad_fn=<NormBackward1>)
Loss for a is tensor(51.3154, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(51.7916, grad_fn=<CopyBackwards>)
Loss before: tensor(108.6993, grad_fn=<NormBackward1>)
Loss for a is tensor(108.6993, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(167.6154, grad_fn=<CopyBackwards>)
Loss before: tensor(114.2388, grad_fn=<NormBackward1>)
Loss for a is tensor(114.2388, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(51.9505, grad_fn=<CopyBackwards>)
Loss before: tensor(142.5853, grad_fn=<NormBackward1>)
Loss for a is tensor(142.5853, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(32.1600, grad_fn=<CopyBackwards>)
Loss before: tensor(90.1635, grad_fn=<NormBackward1>)


Loss after: tensor(205.7693, grad_fn=<CopyBackwards>)
Loss before: tensor(21.1095, grad_fn=<NormBackward1>)
Loss for a is tensor(21.1095, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(257.5295, grad_fn=<CopyBackwards>)
Loss before: tensor(238.2631, grad_fn=<NormBackward1>)
Loss for a is tensor(238.2631, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(210.7515, grad_fn=<CopyBackwards>)
Loss before: tensor(153.2475, grad_fn=<NormBackward1>)
Loss for a is tensor(153.2475, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(15.1951, grad_fn=<CopyBackwards>)
Loss before: tensor(110.0352, grad_fn=<NormBackward1>)
Loss for a is tensor(110.0352, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(3.3438, grad_fn=<CopyBackwards>)
Loss before: tensor(327.3164, grad_fn=<NormBackward1>)
Loss for a is tensor(327.3164, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(274.4658, grad_fn=<CopyBackwards>)
Loss before: tensor(52.6183, grad_fn=<NormBackward1

Loss before: tensor(120.1029, grad_fn=<NormBackward1>)
Loss for a is tensor(120.1029, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(38.9199, grad_fn=<CopyBackwards>)
Loss before: tensor(171.7059, grad_fn=<NormBackward1>)
Loss for a is tensor(171.7059, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(71.4929, grad_fn=<CopyBackwards>)
Loss before: tensor(60.2455, grad_fn=<NormBackward1>)
Loss for a is tensor(60.2455, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(209.2016, grad_fn=<CopyBackwards>)
Loss before: tensor(4.5679, grad_fn=<NormBackward1>)
Loss for a is tensor(4.5679, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(75.6931, grad_fn=<CopyBackwards>)
Loss before: tensor(182.9128, grad_fn=<NormBackward1>)
Loss for a is tensor(182.9128, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(34.0566, grad_fn=<CopyBackwards>)
Loss before: tensor(252.8237, grad_fn=<NormBackward1>)
Loss for a is tensor(252.8237, grad_fn=<NormBackward1>)

Loss before: tensor(83.7215, grad_fn=<NormBackward1>)
Loss for a is tensor(83.7215, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(104.5795, grad_fn=<CopyBackwards>)
Loss before: tensor(44.5207, grad_fn=<NormBackward1>)
Loss for a is tensor(44.5207, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(112.8283, grad_fn=<CopyBackwards>)
Loss before: tensor(42.0614, grad_fn=<NormBackward1>)
Loss for a is tensor(42.0614, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(13.0506, grad_fn=<CopyBackwards>)
Loss before: tensor(377.9245, grad_fn=<NormBackward1>)
Loss for a is tensor(377.9245, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(330.8713, grad_fn=<CopyBackwards>)
Loss before: tensor(153.6602, grad_fn=<NormBackward1>)
Loss for a is tensor(153.6602, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(235.8551, grad_fn=<CopyBackwards>)
Loss before: tensor(328.1022, grad_fn=<NormBackward1>)
Loss for a is tensor(328.1022, grad_fn=<NormBackward

Loss before: tensor(94.2581, grad_fn=<NormBackward1>)
Loss for a is tensor(94.2581, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(38.7576, grad_fn=<CopyBackwards>)
Loss before: tensor(13.8201, grad_fn=<NormBackward1>)
Loss for a is tensor(13.8201, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(13.9906, grad_fn=<CopyBackwards>)
Loss before: tensor(19.2500, grad_fn=<NormBackward1>)
Loss for a is tensor(19.2500, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(218.9970, grad_fn=<CopyBackwards>)
Loss before: tensor(193.6577, grad_fn=<NormBackward1>)
Loss for a is tensor(193.6577, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(99.7156, grad_fn=<CopyBackwards>)
Loss before: tensor(376.7183, grad_fn=<NormBackward1>)
Loss for a is tensor(376.7183, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(333.4858, grad_fn=<CopyBackwards>)
Loss before: tensor(420.7881, grad_fn=<NormBackward1>)
Loss for a is tensor(420.7881, grad_fn=<NormBackward1>

Loss before: tensor(113.1466, grad_fn=<NormBackward1>)
Loss for a is tensor(113.1466, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(8.8352, grad_fn=<CopyBackwards>)
Loss before: tensor(20.7363, grad_fn=<NormBackward1>)
Loss for a is tensor(20.7363, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(46.4425, grad_fn=<CopyBackwards>)
Loss before: tensor(24.0256, grad_fn=<NormBackward1>)
Loss for a is tensor(24.0256, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(15.2036, grad_fn=<CopyBackwards>)
Loss before: tensor(231.4613, grad_fn=<NormBackward1>)
Loss for a is tensor(231.4613, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(144.2032, grad_fn=<CopyBackwards>)
Loss before: tensor(26.3767, grad_fn=<NormBackward1>)
Loss for a is tensor(26.3767, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(23.5412, grad_fn=<CopyBackwards>)
Loss before: tensor(36.4454, grad_fn=<NormBackward1>)
Loss for a is tensor(36.4454, grad_fn=<NormBackward1>) at

Loss after: tensor(68.8001, grad_fn=<CopyBackwards>)
Loss before: tensor(21.8276, grad_fn=<NormBackward1>)
Loss for a is tensor(21.8276, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(170.1996, grad_fn=<CopyBackwards>)
Loss before: tensor(78.6160, grad_fn=<NormBackward1>)
Loss for a is tensor(78.6160, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(67.0841, grad_fn=<CopyBackwards>)
Loss before: tensor(219.0133, grad_fn=<NormBackward1>)
Loss for a is tensor(219.0133, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(45.8931, grad_fn=<CopyBackwards>)
Loss before: tensor(126.2834, grad_fn=<NormBackward1>)
Loss for a is tensor(126.2834, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(53.9636, grad_fn=<CopyBackwards>)
Loss before: tensor(186.4688, grad_fn=<NormBackward1>)
Loss for a is tensor(186.4688, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(22.6221, grad_fn=<CopyBackwards>)
Loss before: tensor(306.0192, grad_fn=<NormBackward1>)


Loss before: tensor(177.0859, grad_fn=<NormBackward1>)
Loss for a is tensor(177.0859, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(0.1487, grad_fn=<CopyBackwards>)
Loss before: tensor(115.8933, grad_fn=<NormBackward1>)
Loss for a is tensor(115.8933, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(57.7049, grad_fn=<CopyBackwards>)
Loss before: tensor(1.5952, grad_fn=<NormBackward1>)
Loss for a is tensor(1.5952, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(240.6065, grad_fn=<CopyBackwards>)
Loss before: tensor(86.3269, grad_fn=<NormBackward1>)
Loss for a is tensor(86.3269, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(228.1895, grad_fn=<CopyBackwards>)
Loss before: tensor(115.3024, grad_fn=<NormBackward1>)
Loss for a is tensor(115.3024, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(107.4028, grad_fn=<CopyBackwards>)
Loss before: tensor(143.1650, grad_fn=<NormBackward1>)
Loss for a is tensor(143.1650, grad_fn=<NormBackward1>

Loss before: tensor(75.8499, grad_fn=<NormBackward1>)
Loss for a is tensor(75.8499, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(35.0896, grad_fn=<CopyBackwards>)
Loss before: tensor(94.2439, grad_fn=<NormBackward1>)
Loss for a is tensor(94.2439, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(28.4578, grad_fn=<CopyBackwards>)
Loss before: tensor(401.7304, grad_fn=<NormBackward1>)
Loss for a is tensor(401.7304, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(377.5697, grad_fn=<CopyBackwards>)
Loss before: tensor(181.7325, grad_fn=<NormBackward1>)
Loss for a is tensor(181.7325, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(66.5087, grad_fn=<CopyBackwards>)
Loss before: tensor(33.0934, grad_fn=<NormBackward1>)
Loss for a is tensor(33.0934, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(217.7743, grad_fn=<CopyBackwards>)
Loss before: tensor(204.6051, grad_fn=<NormBackward1>)
Loss for a is tensor(204.6051, grad_fn=<NormBackward1>

Loss before: tensor(175.3185, grad_fn=<NormBackward1>)
Loss for a is tensor(175.3185, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(17.7622, grad_fn=<CopyBackwards>)
Loss before: tensor(533.0519, grad_fn=<NormBackward1>)
Loss for a is tensor(533.0519, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(404.7673, grad_fn=<CopyBackwards>)
Loss before: tensor(34.5114, grad_fn=<NormBackward1>)
Loss for a is tensor(34.5114, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(208.1956, grad_fn=<CopyBackwards>)
Loss before: tensor(28.8638, grad_fn=<NormBackward1>)
Loss for a is tensor(28.8638, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(2.1045, grad_fn=<CopyBackwards>)
Loss before: tensor(62.6278, grad_fn=<NormBackward1>)
Loss for a is tensor(62.6278, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(202.0222, grad_fn=<CopyBackwards>)
Loss before: tensor(276.6953, grad_fn=<NormBackward1>)
Loss for a is tensor(276.6953, grad_fn=<NormBackward1>

Loss before: tensor(163.0724, grad_fn=<NormBackward1>)
Loss for a is tensor(163.0724, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(31.6468, grad_fn=<CopyBackwards>)
Loss before: tensor(106.0963, grad_fn=<NormBackward1>)
Loss for a is tensor(106.0963, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(58.8816, grad_fn=<CopyBackwards>)
Loss before: tensor(126.9662, grad_fn=<NormBackward1>)
Loss for a is tensor(126.9662, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(31.2840, grad_fn=<CopyBackwards>)
Loss before: tensor(40.4983, grad_fn=<NormBackward1>)
Loss for a is tensor(40.4983, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(166.9915, grad_fn=<CopyBackwards>)
Loss before: tensor(224.3091, grad_fn=<NormBackward1>)
Loss for a is tensor(224.3091, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(91.5694, grad_fn=<CopyBackwards>)
Loss before: tensor(319.4965, grad_fn=<NormBackward1>)
Loss for a is tensor(319.4965, grad_fn=<NormBackwar

Loss before: tensor(281.7203, grad_fn=<NormBackward1>)
Loss for a is tensor(281.7203, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(308.6913, grad_fn=<CopyBackwards>)
Loss before: tensor(324.5795, grad_fn=<NormBackward1>)
Loss for a is tensor(324.5795, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(305.8875, grad_fn=<CopyBackwards>)
Loss before: tensor(80.9658, grad_fn=<NormBackward1>)
Loss for a is tensor(80.9658, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(248.7290, grad_fn=<CopyBackwards>)
Loss before: tensor(72.0463, grad_fn=<NormBackward1>)
Loss for a is tensor(72.0463, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(29.4204, grad_fn=<CopyBackwards>)
Loss before: tensor(110.7896, grad_fn=<NormBackward1>)
Loss for a is tensor(110.7896, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(143.1915, grad_fn=<CopyBackwards>)
Loss before: tensor(256.7381, grad_fn=<NormBackward1>)
Loss for a is tensor(256.7381, grad_fn=<NormBackwa

Loss before: tensor(123.9382, grad_fn=<NormBackward1>)
Loss for a is tensor(123.9382, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(24.7075, grad_fn=<CopyBackwards>)
Loss before: tensor(131.0953, grad_fn=<NormBackward1>)
Loss for a is tensor(131.0953, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(54.6070, grad_fn=<CopyBackwards>)
Loss before: tensor(17.9258, grad_fn=<NormBackward1>)
Loss for a is tensor(17.9258, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(0.1273, grad_fn=<CopyBackwards>)
Loss before: tensor(292.2140, grad_fn=<NormBackward1>)
Loss for a is tensor(292.2140, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(257.5831, grad_fn=<CopyBackwards>)
Loss before: tensor(158.5626, grad_fn=<NormBackward1>)
Loss for a is tensor(158.5626, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(92.9814, grad_fn=<CopyBackwards>)
Loss before: tensor(167.3985, grad_fn=<NormBackward1>)
Loss for a is tensor(167.3985, grad_fn=<NormBackward

Loss before: tensor(36.6210, grad_fn=<NormBackward1>)
Loss for a is tensor(36.6210, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(200.9942, grad_fn=<CopyBackwards>)
Loss before: tensor(49.0254, grad_fn=<NormBackward1>)
Loss for a is tensor(49.0254, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(24.0403, grad_fn=<CopyBackwards>)
Loss before: tensor(59.0648, grad_fn=<NormBackward1>)
Loss for a is tensor(59.0648, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(203.9792, grad_fn=<CopyBackwards>)
Loss before: tensor(4.6042, grad_fn=<NormBackward1>)
Loss for a is tensor(4.6042, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(8.0677, grad_fn=<CopyBackwards>)
Loss before: tensor(163.2708, grad_fn=<NormBackward1>)
Loss for a is tensor(163.2708, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(16.2920, grad_fn=<CopyBackwards>)
Loss before: tensor(6.7136, grad_fn=<NormBackward1>)
Loss for a is tensor(6.7136, grad_fn=<NormBackward1>) at iter

Loss after: tensor(186.0126, grad_fn=<CopyBackwards>)
Loss before: tensor(462.2733, grad_fn=<NormBackward1>)
Loss for a is tensor(462.2733, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(387.1731, grad_fn=<CopyBackwards>)
Loss before: tensor(260.3030, grad_fn=<NormBackward1>)
Loss for a is tensor(260.3030, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(171.1438, grad_fn=<CopyBackwards>)
Loss before: tensor(32.2180, grad_fn=<NormBackward1>)
Loss for a is tensor(32.2180, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(55.0761, grad_fn=<CopyBackwards>)
Loss before: tensor(171.2400, grad_fn=<NormBackward1>)
Loss for a is tensor(171.2400, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(8.8711, grad_fn=<CopyBackwards>)
Loss before: tensor(46.9658, grad_fn=<NormBackward1>)
Loss for a is tensor(46.9658, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(79.9692, grad_fn=<CopyBackwards>)
Loss before: tensor(65.2477, grad_fn=<NormBackward1>)


Loss after: tensor(74.3556, grad_fn=<CopyBackwards>)
Loss before: tensor(103.3574, grad_fn=<NormBackward1>)
Loss for a is tensor(103.3574, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(31.8461, grad_fn=<CopyBackwards>)
Loss before: tensor(37.5512, grad_fn=<NormBackward1>)
Loss for a is tensor(37.5512, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(161.9050, grad_fn=<CopyBackwards>)
Loss before: tensor(11.6143, grad_fn=<NormBackward1>)
Loss for a is tensor(11.6143, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(228.7979, grad_fn=<CopyBackwards>)
Loss before: tensor(159.7957, grad_fn=<NormBackward1>)
Loss for a is tensor(159.7957, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(24.5569, grad_fn=<CopyBackwards>)
Loss before: tensor(44.4802, grad_fn=<NormBackward1>)
Loss for a is tensor(44.4802, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(84.3807, grad_fn=<CopyBackwards>)
Loss before: tensor(416.1224, grad_fn=<NormBackward1>)
L

Loss after: tensor(91.5585, grad_fn=<CopyBackwards>)
Loss before: tensor(48.5112, grad_fn=<NormBackward1>)
Loss for a is tensor(48.5112, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(27.3940, grad_fn=<CopyBackwards>)
Loss before: tensor(236.9676, grad_fn=<NormBackward1>)
Loss for a is tensor(236.9676, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(144.3464, grad_fn=<CopyBackwards>)
Loss before: tensor(420.5465, grad_fn=<NormBackward1>)
Loss for a is tensor(420.5465, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(383.1764, grad_fn=<CopyBackwards>)
Loss before: tensor(212.7490, grad_fn=<NormBackward1>)
Loss for a is tensor(212.7490, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(85.5733, grad_fn=<CopyBackwards>)
Loss before: tensor(39.9891, grad_fn=<NormBackward1>)
Loss for a is tensor(39.9891, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(85.3260, grad_fn=<CopyBackwards>)
Loss before: tensor(178.2234, grad_fn=<NormBackward1>)

Loss after: tensor(184.7472, grad_fn=<CopyBackwards>)
Loss before: tensor(134.2220, grad_fn=<NormBackward1>)
Loss for a is tensor(134.2220, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(54.9825, grad_fn=<CopyBackwards>)
Loss before: tensor(33.8224, grad_fn=<NormBackward1>)
Loss for a is tensor(33.8224, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(208.4920, grad_fn=<CopyBackwards>)
Loss before: tensor(5.7505, grad_fn=<NormBackward1>)
Loss for a is tensor(5.7505, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(240.7773, grad_fn=<CopyBackwards>)
Loss before: tensor(270.1059, grad_fn=<NormBackward1>)
Loss for a is tensor(270.1059, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(202.3095, grad_fn=<CopyBackwards>)
Loss before: tensor(34.2169, grad_fn=<NormBackward1>)
Loss for a is tensor(34.2169, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(173.3436, grad_fn=<CopyBackwards>)
Loss before: tensor(267.9919, grad_fn=<NormBackward1>)


Loss after: tensor(12.1018, grad_fn=<CopyBackwards>)
Loss before: tensor(107.0731, grad_fn=<NormBackward1>)
Loss for a is tensor(107.0731, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(33.6232, grad_fn=<CopyBackwards>)
Loss before: tensor(14.2149, grad_fn=<NormBackward1>)
Loss for a is tensor(14.2149, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(101.6754, grad_fn=<CopyBackwards>)
Loss before: tensor(1.9567, grad_fn=<NormBackward1>)
Loss for a is tensor(1.9567, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(154.9419, grad_fn=<CopyBackwards>)
Loss before: tensor(74.4352, grad_fn=<NormBackward1>)
Loss for a is tensor(74.4352, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(24.0986, grad_fn=<CopyBackwards>)
Loss before: tensor(74.2021, grad_fn=<NormBackward1>)
Loss for a is tensor(74.2021, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(257.6781, grad_fn=<CopyBackwards>)
Loss before: tensor(103.9747, grad_fn=<NormBackward1>)
Loss

Loss after: tensor(6.4988, grad_fn=<CopyBackwards>)
Loss before: tensor(35.7133, grad_fn=<NormBackward1>)
Loss for a is tensor(35.7133, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(168.1167, grad_fn=<CopyBackwards>)
Loss before: tensor(38.1447, grad_fn=<NormBackward1>)
Loss for a is tensor(38.1447, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(125.1247, grad_fn=<CopyBackwards>)
Loss before: tensor(32.6998, grad_fn=<NormBackward1>)
Loss for a is tensor(32.6998, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(106.6620, grad_fn=<CopyBackwards>)
Loss before: tensor(217.4122, grad_fn=<NormBackward1>)
Loss for a is tensor(217.4122, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(224.5769, grad_fn=<CopyBackwards>)
Loss before: tensor(55.9161, grad_fn=<NormBackward1>)
Loss for a is tensor(55.9161, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(38.0556, grad_fn=<CopyBackwards>)
Loss before: tensor(83.5261, grad_fn=<NormBackward1>)
Los

Loss after: tensor(295.7697, grad_fn=<CopyBackwards>)
Loss before: tensor(30.4817, grad_fn=<NormBackward1>)
Loss for a is tensor(30.4817, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(79.2947, grad_fn=<CopyBackwards>)
Loss before: tensor(47.0681, grad_fn=<NormBackward1>)
Loss for a is tensor(47.0681, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(30.6035, grad_fn=<CopyBackwards>)
Loss before: tensor(198.0891, grad_fn=<NormBackward1>)
Loss for a is tensor(198.0891, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(82.5894, grad_fn=<CopyBackwards>)
Loss before: tensor(26.0636, grad_fn=<NormBackward1>)
Loss for a is tensor(26.0636, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(2.2021, grad_fn=<CopyBackwards>)
Loss before: tensor(259.0878, grad_fn=<NormBackward1>)
Loss for a is tensor(259.0878, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(192.2755, grad_fn=<CopyBackwards>)
Loss before: tensor(3.4306, grad_fn=<NormBackward1>)
Loss

Loss before: tensor(1662.5623, grad_fn=<NormBackward1>)
Loss for a is tensor(1662.5623, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(8064.9673, grad_fn=<CopyBackwards>)
Loss before: tensor(8481.1064, grad_fn=<NormBackward1>)
Loss for a is tensor(8481.1064, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(10704.3027, grad_fn=<CopyBackwards>)
Loss before: tensor(10909.2988, grad_fn=<NormBackward1>)
Loss for a is tensor(10909.2988, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(4873.1021, grad_fn=<CopyBackwards>)
Loss before: tensor(4335.2690, grad_fn=<NormBackward1>)
Loss for a is tensor(4335.2690, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(92.7822, grad_fn=<CopyBackwards>)
Loss before: tensor(31.9035, grad_fn=<NormBackward1>)
Loss for a is tensor(31.9035, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(3265.7217, grad_fn=<CopyBackwards>)
Loss before: tensor(3351.3818, grad_fn=<NormBackward1>)
Loss for a is tensor(3351.3818, 

Loss after: tensor(14085.3672, grad_fn=<CopyBackwards>)
Loss before: tensor(14511.5332, grad_fn=<NormBackward1>)
Loss for a is tensor(14511.5332, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(9412.7764, grad_fn=<CopyBackwards>)
Loss before: tensor(8485.3584, grad_fn=<NormBackward1>)
Loss for a is tensor(8485.3584, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(1733.7052, grad_fn=<CopyBackwards>)
Loss before: tensor(2010.5321, grad_fn=<NormBackward1>)
Loss for a is tensor(2010.5321, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(142.8842, grad_fn=<CopyBackwards>)
Loss before: tensor(137.3105, grad_fn=<NormBackward1>)
Loss for a is tensor(137.3105, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(4689.0786, grad_fn=<CopyBackwards>)
Loss before: tensor(4866.1895, grad_fn=<NormBackward1>)
Loss for a is tensor(4866.1895, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(11750.8516, grad_fn=<CopyBackwards>)
Loss before: tensor(11973.352

Loss after: tensor(17392.9902, grad_fn=<CopyBackwards>)
Loss before: tensor(16634.5234, grad_fn=<NormBackward1>)
Loss for a is tensor(16634.5234, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(13541.9648, grad_fn=<CopyBackwards>)
Loss before: tensor(13680.8008, grad_fn=<NormBackward1>)
Loss for a is tensor(13680.8008, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(6009.8511, grad_fn=<CopyBackwards>)
Loss before: tensor(5723.8340, grad_fn=<NormBackward1>)
Loss for a is tensor(5723.8340, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(347.4109, grad_fn=<CopyBackwards>)
Loss before: tensor(485.0126, grad_fn=<NormBackward1>)
Loss for a is tensor(485.0126, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(883.3464, grad_fn=<CopyBackwards>)
Loss before: tensor(1423.2986, grad_fn=<NormBackward1>)
Loss for a is tensor(1423.2986, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(7865.0776, grad_fn=<CopyBackwards>)
Loss before: tensor(7610.177

Loss before: tensor(5306.8555, grad_fn=<NormBackward1>)
Loss for a is tensor(5306.8555, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(210.6825, grad_fn=<CopyBackwards>)
Loss before: tensor(222.7522, grad_fn=<NormBackward1>)
Loss for a is tensor(222.7522, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(1565.7910, grad_fn=<CopyBackwards>)
Loss before: tensor(1620.6101, grad_fn=<NormBackward1>)
Loss for a is tensor(1620.6101, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(8177.0420, grad_fn=<CopyBackwards>)
Loss before: tensor(8199.7949, grad_fn=<NormBackward1>)
Loss for a is tensor(8199.7949, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(15810.2607, grad_fn=<CopyBackwards>)
Loss before: tensor(15816.4912, grad_fn=<NormBackward1>)
Loss for a is tensor(15816.4912, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(19794.6816, grad_fn=<CopyBackwards>)
Loss before: tensor(19486.5645, grad_fn=<NormBackward1>)
Loss for a is tensor(19486.

Loss after: tensor(12585.6865, grad_fn=<CopyBackwards>)
Loss before: tensor(12931.7666, grad_fn=<NormBackward1>)
Loss for a is tensor(12931.7666, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(4412.9971, grad_fn=<CopyBackwards>)
Loss before: tensor(4816.4639, grad_fn=<NormBackward1>)
Loss for a is tensor(4816.4639, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(45.2115, grad_fn=<CopyBackwards>)
Loss before: tensor(30.8165, grad_fn=<NormBackward1>)
Loss for a is tensor(30.8165, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(2252.6968, grad_fn=<CopyBackwards>)
Loss before: tensor(2352.6311, grad_fn=<NormBackward1>)
Loss for a is tensor(2352.6311, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(9485.3633, grad_fn=<CopyBackwards>)
Loss before: tensor(8880.1006, grad_fn=<NormBackward1>)
Loss for a is tensor(8880.1006, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(16648.4316, grad_fn=<CopyBackwards>)
Loss before: tensor(16595.1094, 

Loss before: tensor(19384.2402, grad_fn=<NormBackward1>)
Loss for a is tensor(19384.2402, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(10842.7881, grad_fn=<CopyBackwards>)
Loss before: tensor(10358.4014, grad_fn=<NormBackward1>)
Loss for a is tensor(10358.4014, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(2503.5981, grad_fn=<CopyBackwards>)
Loss before: tensor(2610.8252, grad_fn=<NormBackward1>)
Loss for a is tensor(2610.8252, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(30.1508, grad_fn=<CopyBackwards>)
Loss before: tensor(3.5160, grad_fn=<NormBackward1>)
Loss for a is tensor(3.5160, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(3779.8877, grad_fn=<CopyBackwards>)
Loss before: tensor(3672.3621, grad_fn=<NormBackward1>)
Loss for a is tensor(3672.3621, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(11316.2539, grad_fn=<CopyBackwards>)
Loss before: tensor(11562.1992, grad_fn=<NormBackward1>)
Loss for a is tensor(11562.199

Loss after: tensor(20689.4688, grad_fn=<CopyBackwards>)
Loss before: tensor(20895.6406, grad_fn=<NormBackward1>)
Loss for a is tensor(20895.6406, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(23999.4141, grad_fn=<CopyBackwards>)
Loss before: tensor(23632.1367, grad_fn=<NormBackward1>)
Loss for a is tensor(23632.1367, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(20046.2188, grad_fn=<CopyBackwards>)
Loss before: tensor(20391.7598, grad_fn=<NormBackward1>)
Loss for a is tensor(20391.7598, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(11718.2607, grad_fn=<CopyBackwards>)
Loss before: tensor(10686.4492, grad_fn=<NormBackward1>)
Loss for a is tensor(10686.4492, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(2768.6550, grad_fn=<CopyBackwards>)
Loss before: tensor(3135.5273, grad_fn=<NormBackward1>)
Loss for a is tensor(3135.5273, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(74.2200, grad_fn=<CopyBackwards>)
Loss before: tensor(

Loss after: tensor(1052.5076, grad_fn=<CopyBackwards>)
Loss before: tensor(869.6763, grad_fn=<NormBackward1>)
Loss for a is tensor(869.6763, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(680.5939, grad_fn=<CopyBackwards>)
Loss before: tensor(702.9707, grad_fn=<NormBackward1>)
Loss for a is tensor(702.9707, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(6345.9126, grad_fn=<CopyBackwards>)
Loss before: tensor(6731.3340, grad_fn=<NormBackward1>)
Loss for a is tensor(6731.3340, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(15273.6475, grad_fn=<CopyBackwards>)
Loss before: tensor(14632.1709, grad_fn=<NormBackward1>)
Loss for a is tensor(14632.1709, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(21531.7480, grad_fn=<CopyBackwards>)
Loss before: tensor(21456.3926, grad_fn=<NormBackward1>)
Loss for a is tensor(21456.3926, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(23319.0859, grad_fn=<CopyBackwards>)
Loss before: tensor(23039.97

Loss after: tensor(20275.4531, grad_fn=<CopyBackwards>)
Loss before: tensor(22199.5176, grad_fn=<NormBackward1>)
Loss for a is tensor(22199.5176, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(14668.6934, grad_fn=<CopyBackwards>)
Loss before: tensor(14306.0098, grad_fn=<NormBackward1>)
Loss for a is tensor(14306.0098, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(5303.8799, grad_fn=<CopyBackwards>)
Loss before: tensor(5193.4492, grad_fn=<NormBackward1>)
Loss for a is tensor(5193.4492, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(193.5675, grad_fn=<CopyBackwards>)
Loss before: tensor(258.2149, grad_fn=<NormBackward1>)
Loss for a is tensor(258.2149, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(1498.3866, grad_fn=<CopyBackwards>)
Loss before: tensor(1692.3738, grad_fn=<NormBackward1>)
Loss for a is tensor(1692.3738, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(8430.9453, grad_fn=<CopyBackwards>)
Loss before: tensor(8210.32

Loss before: tensor(20.8603, grad_fn=<NormBackward1>)
Loss for a is tensor(20.8603, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(3498.4819, grad_fn=<CopyBackwards>)
Loss before: tensor(2954.4856, grad_fn=<NormBackward1>)
Loss for a is tensor(2954.4856, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(10266.7930, grad_fn=<CopyBackwards>)
Loss before: tensor(10782.8662, grad_fn=<NormBackward1>)
Loss for a is tensor(10782.8662, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(18880.5840, grad_fn=<CopyBackwards>)
Loss before: tensor(18791.8457, grad_fn=<NormBackward1>)
Loss for a is tensor(18791.8457, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(23456.8477, grad_fn=<CopyBackwards>)
Loss before: tensor(24393.7441, grad_fn=<NormBackward1>)
Loss for a is tensor(24393.7441, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(22777.4316, grad_fn=<CopyBackwards>)
Loss before: tensor(22215.5957, grad_fn=<NormBackward1>)
Loss for a is tensor(2

Loss after: tensor(19635.0059, grad_fn=<CopyBackwards>)
Loss before: tensor(19217.2305, grad_fn=<NormBackward1>)
Loss for a is tensor(19217.2305, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(23514.3750, grad_fn=<CopyBackwards>)
Loss before: tensor(23938.0605, grad_fn=<NormBackward1>)
Loss for a is tensor(23938.0605, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(21947.3086, grad_fn=<CopyBackwards>)
Loss before: tensor(21719.3535, grad_fn=<NormBackward1>)
Loss for a is tensor(21719.3535, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(14028.3584, grad_fn=<CopyBackwards>)
Loss before: tensor(13639.3604, grad_fn=<NormBackward1>)
Loss for a is tensor(13639.3604, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(4881.9604, grad_fn=<CopyBackwards>)
Loss before: tensor(5139.9907, grad_fn=<NormBackward1>)
Loss for a is tensor(5139.9907, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(187.1116, grad_fn=<CopyBackwards>)
Loss before: tensor

Loss before: tensor(3687.4636, grad_fn=<NormBackward1>)
Loss for a is tensor(3687.4636, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(38.6044, grad_fn=<CopyBackwards>)
Loss before: tensor(163.7390, grad_fn=<NormBackward1>)
Loss for a is tensor(163.7390, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(3120.5161, grad_fn=<CopyBackwards>)
Loss before: tensor(2563.5850, grad_fn=<NormBackward1>)
Loss for a is tensor(2563.5850, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(9696.5840, grad_fn=<CopyBackwards>)
Loss before: tensor(9886.0254, grad_fn=<NormBackward1>)
Loss for a is tensor(9886.0254, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(18092.1895, grad_fn=<CopyBackwards>)
Loss before: tensor(18911.2285, grad_fn=<NormBackward1>)
Loss for a is tensor(18911.2285, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(24143.9922, grad_fn=<CopyBackwards>)
Loss before: tensor(23734.0879, grad_fn=<NormBackward1>)
Loss for a is tensor(23734.0

Loss before: tensor(17874.0352, grad_fn=<NormBackward1>)
Loss for a is tensor(17874.0352, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(23378.1523, grad_fn=<CopyBackwards>)
Loss before: tensor(23723.0078, grad_fn=<NormBackward1>)
Loss for a is tensor(23723.0078, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(23397.4844, grad_fn=<CopyBackwards>)
Loss before: tensor(23343.6914, grad_fn=<NormBackward1>)
Loss for a is tensor(23343.6914, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(16770.1855, grad_fn=<CopyBackwards>)
Loss before: tensor(17060.8672, grad_fn=<NormBackward1>)
Loss for a is tensor(17060.8672, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(7693.7739, grad_fn=<CopyBackwards>)
Loss before: tensor(7604.2515, grad_fn=<NormBackward1>)
Loss for a is tensor(7604.2515, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(1018.4114, grad_fn=<CopyBackwards>)
Loss before: tensor(1030.4460, grad_fn=<NormBackward1>)
Loss for a is tens

Loss after: tensor(15784.1113, grad_fn=<CopyBackwards>)
Loss before: tensor(15563.3838, grad_fn=<NormBackward1>)
Loss for a is tensor(15563.3838, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(6344.4312, grad_fn=<CopyBackwards>)
Loss before: tensor(6506.6299, grad_fn=<NormBackward1>)
Loss for a is tensor(6506.6299, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(613.0547, grad_fn=<CopyBackwards>)
Loss before: tensor(595.8535, grad_fn=<NormBackward1>)
Loss for a is tensor(595.8535, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(929.2587, grad_fn=<CopyBackwards>)
Loss before: tensor(689.9109, grad_fn=<NormBackward1>)
Loss for a is tensor(689.9109, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(6236.5278, grad_fn=<CopyBackwards>)
Loss before: tensor(6516.3242, grad_fn=<NormBackward1>)
Loss for a is tensor(6516.3242, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(14956.5684, grad_fn=<CopyBackwards>)
Loss before: tensor(15382.3418, 

Loss after: tensor(129.1431, grad_fn=<CopyBackwards>)
Loss before: tensor(135.3759, grad_fn=<NormBackward1>)
Loss for a is tensor(135.3759, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(4519.6621, grad_fn=<CopyBackwards>)
Loss before: tensor(4490.6938, grad_fn=<NormBackward1>)
Loss for a is tensor(4490.6938, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(12512.8252, grad_fn=<CopyBackwards>)
Loss before: tensor(12332.2051, grad_fn=<NormBackward1>)
Loss for a is tensor(12332.2051, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(20346.4805, grad_fn=<CopyBackwards>)
Loss before: tensor(20823.8906, grad_fn=<NormBackward1>)
Loss for a is tensor(20823.8906, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(25206.0996, grad_fn=<CopyBackwards>)
Loss before: tensor(24854.4883, grad_fn=<NormBackward1>)
Loss for a is tensor(24854.4883, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(22950.5430, grad_fn=<CopyBackwards>)
Loss before: tensor(230

Loss after: tensor(12425.6562, grad_fn=<CopyBackwards>)
Loss before: tensor(11748.2617, grad_fn=<NormBackward1>)
Loss for a is tensor(11748.2617, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(19663.7188, grad_fn=<CopyBackwards>)
Loss before: tensor(20734.7617, grad_fn=<NormBackward1>)
Loss for a is tensor(20734.7617, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(25134.4180, grad_fn=<CopyBackwards>)
Loss before: tensor(25155.1309, grad_fn=<NormBackward1>)
Loss for a is tensor(25155.1309, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(23164.9590, grad_fn=<CopyBackwards>)
Loss before: tensor(22804.6660, grad_fn=<NormBackward1>)
Loss for a is tensor(22804.6660, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(15047.3633, grad_fn=<CopyBackwards>)
Loss before: tensor(15225.5771, grad_fn=<NormBackward1>)
Loss for a is tensor(15225.5771, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(5931.3447, grad_fn=<CopyBackwards>)
Loss before: te

Loss before: tensor(24929.1680, grad_fn=<NormBackward1>)
Loss for a is tensor(24929.1680, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(22412.7559, grad_fn=<CopyBackwards>)
Loss before: tensor(22354.5977, grad_fn=<NormBackward1>)
Loss for a is tensor(22354.5977, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(14301.9170, grad_fn=<CopyBackwards>)
Loss before: tensor(13385.2891, grad_fn=<NormBackward1>)
Loss for a is tensor(13385.2891, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(4710.6694, grad_fn=<CopyBackwards>)
Loss before: tensor(5411.7471, grad_fn=<NormBackward1>)
Loss for a is tensor(5411.7471, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(234.7338, grad_fn=<CopyBackwards>)
Loss before: tensor(179.1242, grad_fn=<NormBackward1>)
Loss for a is tensor(179.1242, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(1640.0271, grad_fn=<CopyBackwards>)
Loss before: tensor(1569.2778, grad_fn=<NormBackward1>)
Loss for a is tensor(156

Loss before: tensor(25.3825, grad_fn=<NormBackward1>)
Loss for a is tensor(25.3825, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(2242.6528, grad_fn=<CopyBackwards>)
Loss before: tensor(2160.9797, grad_fn=<NormBackward1>)
Loss for a is tensor(2160.9797, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(9109.0098, grad_fn=<CopyBackwards>)
Loss before: tensor(8725.2012, grad_fn=<NormBackward1>)
Loss for a is tensor(8725.2012, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(17040.6660, grad_fn=<CopyBackwards>)
Loss before: tensor(17788.1641, grad_fn=<NormBackward1>)
Loss for a is tensor(17788.1641, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(23986.4121, grad_fn=<CopyBackwards>)
Loss before: tensor(23845.2578, grad_fn=<NormBackward1>)
Loss for a is tensor(23845.2578, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(24430.0605, grad_fn=<CopyBackwards>)
Loss before: tensor(24238.6543, grad_fn=<NormBackward1>)
Loss for a is tensor(2423

Loss before: tensor(6310.1538, grad_fn=<NormBackward1>)
Loss for a is tensor(6310.1538, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(14722.1162, grad_fn=<CopyBackwards>)
Loss before: tensor(12788.2793, grad_fn=<NormBackward1>)
Loss for a is tensor(12788.2793, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(19966.7480, grad_fn=<CopyBackwards>)
Loss before: tensor(21547.2266, grad_fn=<NormBackward1>)
Loss for a is tensor(21547.2266, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(24796.8965, grad_fn=<CopyBackwards>)
Loss before: tensor(24276.0117, grad_fn=<NormBackward1>)
Loss for a is tensor(24276.0117, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(21216.7969, grad_fn=<CopyBackwards>)
Loss before: tensor(22254.8848, grad_fn=<NormBackward1>)
Loss for a is tensor(22254.8848, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(13881.5869, grad_fn=<CopyBackwards>)
Loss before: tensor(13179.8789, grad_fn=<NormBackward1>)
Loss for a is t

Loss before: tensor(20146.1328, grad_fn=<NormBackward1>)
Loss for a is tensor(20146.1328, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(24541.6602, grad_fn=<CopyBackwards>)
Loss before: tensor(24912.1641, grad_fn=<NormBackward1>)
Loss for a is tensor(24912.1641, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(23139.1387, grad_fn=<CopyBackwards>)
Loss before: tensor(23919.5801, grad_fn=<NormBackward1>)
Loss for a is tensor(23919.5801, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(16146.3105, grad_fn=<CopyBackwards>)
Loss before: tensor(14736.0293, grad_fn=<NormBackward1>)
Loss for a is tensor(14736.0293, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(5749.1792, grad_fn=<CopyBackwards>)
Loss before: tensor(6218.5518, grad_fn=<NormBackward1>)
Loss for a is tensor(6218.5518, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(516.4135, grad_fn=<CopyBackwards>)
Loss before: tensor(538.1691, grad_fn=<NormBackward1>)
Loss for a is tensor

Loss after: tensor(6417.1348, grad_fn=<CopyBackwards>)
Loss before: tensor(6366.5483, grad_fn=<NormBackward1>)
Loss for a is tensor(6366.5483, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(537.1934, grad_fn=<CopyBackwards>)
Loss before: tensor(484.9053, grad_fn=<NormBackward1>)
Loss for a is tensor(484.9053, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(1051.1650, grad_fn=<CopyBackwards>)
Loss before: tensor(971.9641, grad_fn=<NormBackward1>)
Loss for a is tensor(971.9641, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(6895.1709, grad_fn=<CopyBackwards>)
Loss before: tensor(7315.9624, grad_fn=<NormBackward1>)
Loss for a is tensor(7315.9624, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(16014.8369, grad_fn=<CopyBackwards>)
Loss before: tensor(15552.1367, grad_fn=<NormBackward1>)
Loss for a is tensor(15552.1367, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(22746.6328, grad_fn=<CopyBackwards>)
Loss before: tensor(22655.9746,

Loss before: tensor(72.5284, grad_fn=<NormBackward1>)
Loss for a is tensor(72.5284, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(3781.4727, grad_fn=<CopyBackwards>)
Loss before: tensor(4054.5830, grad_fn=<NormBackward1>)
Loss for a is tensor(4054.5830, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(12035.8457, grad_fn=<CopyBackwards>)
Loss for b is tensor(13.9404, grad_fn=<NormBackward1>) at iteration 0
Loss for b is tensor(4820.3184, grad_fn=<NormBackward1>) at iteration 0
Loss for b is tensor(743.0152, grad_fn=<NormBackward1>) at iteration 0
Loss for b is tensor(653.1400, grad_fn=<NormBackward1>) at iteration 0
Loss for b is tensor(3202.8928, grad_fn=<NormBackward1>) at iteration 0
1
RSS at n=1 tensor(160.4064)
It took: 288.2289836 seconds to finish running
The best RSS value was tensor(160.3343)
[tensor(160.3343), tensor(160.4064)]


## Time at datasize 1000 x 17 for:
### Adam
$\textbf{Time}$ in seconds: 351.60987780000005 

$\textbf{RSS:}$ 160.3371 

$\textbf{Step size:}$ 0.00001

$\textbf{Stop loss:}$ $1\cdot 10^{-6}$

$\textbf{Learning rate:}$ 0.05

____________________________________________
$\textbf{Time}$ in seconds: 288.2289836 

$\textbf{RSS:}$ 160.4064

$\textbf{Step size:}$ 0.001

$\textbf{Stop loss:}$ $1\cdot 10^{-5}$

$\textbf{Learning rate:}$ 0.01

_____________________________________________

### Ordinal GD(?)
$\textbf{Time}$ in seconds: 236.4697258000001

$\textbf{RSS:}$ 160.3474

$\textbf{Step size:}$ 0.00001

$\textbf{Stop loss:}$ $1\cdot 10^{-6}$
______________________________________________

$\textbf{Time}$ in seconds: 220.46896449999986

$\textbf{RSS:}$ 160.3430

$\textbf{Step size:}$ 0.001

$\textbf{Stop loss:}$ $1\cdot 10^{-5}$

____________________________________________
### SGD
$\textbf{Time}$ in seconds: Never converged

$\textbf{RSS:}$ never converged $\rightarrow$ loss after for $\textbf{a}_i$ was consistently inf


In [61]:
class AA:
    """
    Class for applying conventional archetypal analysis.
    
    Input: 
    X = M x N matrix of data (features x samples)
    
    K = number of archetypes to be computed
    """
    
    def __init__(self, X, K):
        
        self.M, self.N = X.shape
        self.X = X
        self.Rt = torch.tensor(X.T @ X,requires_grad=False).float()
        self.K = K
    
    
    def applyConstraints(self, A):
        return softmax(A, axis = 0)
    
    def error(self, a_i, Qt, qt):
        return (0.5 * a_i.T @ Qt @ a_i) - (qt.T @ a_i)
        
    
    def furthestSum(self):
        # Choose a random point for initialization
        idx = int(np.random.choice(range(0,N)))
        x_j = self.X[:,idx]

        j_news = list()
        j_news.append(idx)

        excluded = [idx]

        # Loop over the K archetypes
        for n in range(self.K):
            best_val = 0
            best_idx = 0
            # Loop over all unseen samples
            for i in range(N-len(excluded)):
                if i not in excluded:
                    val = 0
                    # sum over each element for each point
                    for ele in j_news:
                        for j in range(M):

                            val += np.abs(self.X[j,i] - self.X[j , ele])
                    if val > best_val:
                        best_val = val
                        best_idx = i

            j_news.append(int(best_idx))
            excluded.append(best_idx)
            # Remove the random initialization
            if n == 0:
                j_news.pop(0)
                excluded.pop(0)
        return j_news

    """
    Function for applying conventional archetypal analysis:
    -----------------------------------------------------------------------
        Inputs: 
            self
            Z: Matrix that will contain the archetypes, initialize the values using furthestSum
            time: Boolean, if True the time of convergence will be measured
            lr: Learning rate for the optimizer
            stop_loss: stop loss value for the inner loop
            amsgrad: Boolean, if True uses the "amsgrad" method for Adam optimizer
            n_outer: Number of iterations to run the outermost loop
            n_inner: Number of iterations to run the inner loop
    """
    def applyAA(self, Z, time = True, lr = 0.01, stop_loss=1e-05, amsgrad=True, n_outer=2, n_inner=1000):
        torch.manual_seed(42)
        
        if time == True:
            start = timer()

        RSS_values = list()
        A = np.zeros((self.K, self.N))#.tolist()
        B = np.zeros((self.N, self.K))#.tolist()

        # LOOP UNTIL RSS IS LOW
        for n in range(n_outer):
            Q = Z.T @ Z
            Qt = torch.tensor(Q,requires_grad=False).float()

            # LOOP THROUGH ENTIRE A
            for i in range(self.N):
                q = Z.T @ self.X[:,i]
                qt = torch.tensor(q,requires_grad=False).float()

                if n == 0:
                    a_i = torch.autograd.Variable(torch.rand(self.K, 1), requires_grad=True) # eller er det Kx1 ?
                    optimizer_a = optim.Adam([a_i], lr=0.01, amsgrad = amsgrad)

                

                err = error(a_i,Qt,qt)
                print('Loss before: %s' % (torch.norm( err, p=2)))

                # TRAINING LOOP
                for k in range(n_inner): # 100000
                    optimizer_a.zero_grad()
                    Delta = error(a_i,Qt,qt) 
                    L = torch.norm(Delta, p=2)
                    L.backward()
                    optimizer_a.step()
                    
                    if k % 10000 == 0: print('Loss for a is %s at iteration %i' % (L, k))
                    if abs(L) < stop_loss:
                        print('It took %s iterations to achieve %s loss.' % (k, step_size))
                        break

                A[:,i] = np.array(a_i.tolist()).flatten() 

                print('Loss after: %s' % (torch.norm( error(a_i,Qt,qt) )))

            A = applyConstrains(A)
            Z = self.X @ A.T @ np.linalg.inv(A@A.T)


            # LOOP THROUGH ENTIRE B
            for i in range(self.K): #K
                r = self.X.T @ Z[:,i]
                rt = torch.tensor(r,requires_grad=False).float()

                if n == 0:
                    b_i = torch.autograd.Variable(torch.randn(self.N,1), requires_grad=True)
                    optimizer_b = optim.Adam([b_i], lr= lr, amsgrad = amsgrad)


                

                err = error(b_i, self.Rt, rt)
                # print('Loss before: %s' % (torch.norm( err, p=2)))

                # TRAINING LOOP
                for k in range(n_inner):
                    optimizer_b.zero_grad()
                    Delta = error(b_i, self.Rt,rt)
                    L = torch.norm(Delta, p=2)
                    L.backward()
                    optimizer_b.step()

                    # b_i.data -= step_size * b_i.grad.data # step
                    # b_i.grad.data.zero_()
                    if k % 10000 == 0: print('Loss for b is %s at iteration %i' % (L, k))
                    if abs(L) < stop_loss:
                        print('It took %s iterations to achieve %s loss.' % (k, step_size))
                        break

                B[:,i] = np.array(b_i.tolist()).flatten() 
                #print('Loss after: %s' % (torch.norm( error(b_i,Rt,rt) )))    

            # apply softmax here
            B = applyConstrains(B)
            Z = self.X @ B

            Zt = torch.tensor(Z, requires_grad=False).float()
            At = torch.tensor(A, requires_grad=False).float()
            Xt = torch.tensor(self.X,requires_grad=False).float()
            print("RSS at n=%s" % n, torch.norm(Xt-Zt@At, p='fro'))
            RSS_values.append( torch.norm(Xt-Zt@At, p='fro'))
        if time == True:
            end = timer()
            print("It took: {0} seconds to finish running".format(end - start))
            
        print("The best RSS value was", min(RSS_values))
        
        return RSS_values, Z, B, A

        

    
        
        

### Testing the class
##### Defining variables:

In [62]:
X = df[['SD1', 'PO1', 'UN1', 'AC1', 'SC1',
       'ST1', 'CO1', 'UN2', 'TR1', 'HD1', 'SD2','BE1','AC2', 'SC2', 'ST2',
       'CO2', 'PO2', 'BE2', 'UN3', 'TR2','HD2']].iloc[range(100),:]

X = X.to_numpy()
X = X.T
print(X.shape)
K = 5

lr = 0.01
stop_loss = 1e-05
armsgrad = True
time = True




(17, 1000)


In [66]:
AA_model = AA(X, K)
Z = AA_model.X[:,AA_model.furthestSum()]

RSS, Z, A, B = AA_model.applyAA(Z = Z, time = True, lr = 0.01, stop_loss=1e-05, amsgrad=True, n_outer=2, n_inner=1000)

Loss before: tensor(789.0945, grad_fn=<NormBackward1>)
Loss for a is tensor(789.0945, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(1.3415, grad_fn=<CopyBackwards>)
Loss before: tensor(416.0961, grad_fn=<NormBackward1>)
Loss for a is tensor(416.0961, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(2.5127, grad_fn=<CopyBackwards>)
Loss before: tensor(694.3499, grad_fn=<NormBackward1>)
Loss for a is tensor(694.3499, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(0.6056, grad_fn=<CopyBackwards>)
Loss before: tensor(200.2817, grad_fn=<NormBackward1>)
Loss for a is tensor(200.2817, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(1.3930, grad_fn=<CopyBackwards>)
Loss before: tensor(148.3585, grad_fn=<NormBackward1>)
Loss for a is tensor(148.3585, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(0.8758, grad_fn=<CopyBackwards>)
Loss before: tensor(17.6309, grad_fn=<NormBackward1>)
Loss for a is tensor(17.6309, grad_fn=<NormBackward1>) a

Loss after: tensor(0.8954, grad_fn=<CopyBackwards>)
Loss before: tensor(106.7076, grad_fn=<NormBackward1>)
Loss for a is tensor(106.7076, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(1.5074, grad_fn=<CopyBackwards>)
Loss before: tensor(151.4555, grad_fn=<NormBackward1>)
Loss for a is tensor(151.4555, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(1.8972, grad_fn=<CopyBackwards>)
Loss before: tensor(81.0436, grad_fn=<NormBackward1>)
Loss for a is tensor(81.0436, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(0.0302, grad_fn=<CopyBackwards>)
Loss before: tensor(86.0173, grad_fn=<NormBackward1>)
Loss for a is tensor(86.0173, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(1.9207, grad_fn=<CopyBackwards>)
Loss before: tensor(128.8573, grad_fn=<NormBackward1>)
Loss for a is tensor(128.8573, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(0.8256, grad_fn=<CopyBackwards>)
Loss before: tensor(392.5201, grad_fn=<NormBackward1>)
Loss fo

KeyboardInterrupt: 