In [34]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from timeit import default_timer as timer
from scipy.special import softmax



In [35]:
df = pd.read_csv('ESS8_data.csv')

In [36]:
X = df[['SD1', 'PO1', 'UN1', 'AC1', 'SC1',
       'ST1', 'CO1', 'UN2', 'TR1', 'HD1', 'SD2','BE1','AC2', 'SC2', 'ST2',
       'CO2', 'PO2', 'BE2', 'UN3', 'TR2','HD2']].iloc[range(100),:]
X = X.to_numpy().T
N, M = X.T.shape
K = 5

In [37]:
# applies both for eq. 7 and 8
def error(a_i,Qt,qt):
    return (0.5 * a_i.T @ Qt @ a_i) - (qt.T @ a_i)

## Defining the helper functions applyConstraints and furthestSum

applyConstrants ensures that the constraints of the problem are upheld i.e. $e_{ij} \geq 0$ and $1^T \textbf{e}_j = 1$


furthestSum is an effective way to initialize the values of $\textbf{b}_i$

In [38]:
def applyConstrainsArray(M):
    return softmax(M, axis = 0)



def applyConstrainsTensor(M):
    return #..

In [39]:
def furthestSum(X):
    # Choose a random point for initialization
    idx = int(np.random.choice(range(0,N)))
    x_j = X[:,idx]

    j_news = list()
    j_news.append(idx)

    excluded = [idx]

    # Loop over the K archetypes
    for n in range(K):
        best_val = 0
        best_idx = 0
        # Loop over all unseen samples
        for i in range(N-len(excluded)):
            if i not in excluded:
                val = 0
                # sum over each element for each point
                for ele in j_news:
                    for j in range(M):
                        
                        val += np.abs(X[j,i] - X[j , ele])
                if val > best_val:
                    best_val = val
                    best_idx = i
                    
        # 
        j_news.append(int(best_idx))
        excluded.append(best_idx)
        # Remove the random initialization
        if n == 0:
            j_news.pop(0)
            excluded.pop(0)
        
    return j_news


#init_vals_b = X[init_idxs[i],:].astype(np.float64)
#init_vals_bt = torch.tensor(init_vals_b, requires_grad = False).float()

#init_vals_b.shape


## Initializing variables for AA

In [40]:
# Initializing Z through furthest sum
#Z = X[:,furthestSum(X)]
import random
Z = X[:,random.sample(range(1, M), K)]
print(Z.shape)
R = X.T @ X
Rt = torch.tensor(X.T @ X,requires_grad=False).float()

RSS_values = list()
A = np.zeros((K,N))#.tolist()
B = np.zeros((N,K))#.tolist()

torch.manual_seed(42)


(17, 5)


<torch._C.Generator at 0x1eb8ce3a5d0>

In [67]:
start = timer()
torch.manual_seed(42)

RSS_values = list()
A = np.zeros((K,N))#.tolist()
B = np.zeros((N,K))#.tolist()

# LOOP UNTIL RSS IS LOW
for n in range(2): #N
    Q = Z.T @ Z
    Qt = torch.tensor(Q,requires_grad=False).float()

    # LOOP THROUGH ENTIRE A
    for i in range(N):
        q = Z.T @ X[:,i]
        qt = torch.tensor(q,requires_grad=False).float()
        
        if n == 0:
            a_i = torch.autograd.Variable(torch.rand(K, 1), requires_grad=True) # eller er det Kx1 ?
            optimizer_a = optim.SGD([a_i], lr=0.01)
        
        stop_loss = 1e-5
        step_size = 0.001 # stop_loss / 3.0

        err = error(a_i,Qt,qt) 
        #(0.5 * a_i.T @ Qt @ a_i) - (qt.T @ a_i)
        print('Loss before: %s' % (torch.norm( err, p=2)))

        # TRAINING LOOP
        for k in range(100): # 100000
            optimizer_a.zero_grad()
            #Delta = error(a_i,Qt,qt) 
            L = error(a_i,Qt,qt)      # error needs norm(X_i^2)
            L.backward()
            optimizer_a.step()
            #a_i.data -= step_size * a_i.grad.data # step
            #a_i.grad.data.zero_()
            
            #### --> Look at gradient instead - draw it from L.backward()
            
            #if k % 10000 == 0: print('Loss for a is %s at iteration %i' % (L, k))
            #if abs(L) < stop_loss:
                #print('It took %s iterations to achieve %s loss.' % (k, step_size))
                #break
        
        A[:,i] = np.array(a_i.tolist()).flatten() 
        
        print('Loss after: %s' % (torch.norm( error(a_i,Qt,qt) )))
    
    A = applyConstrains(A)
    Z = X @ A.T @ np.linalg.inv(A@A.T)
    
    
    # LOOP THROUGH ENTIRE B
    for i in range(K): #K
        r = X.T @ Z[:,i]
        rt = torch.tensor(r,requires_grad=False).float()
        
        if n == 0:
            b_i = torch.autograd.Variable(torch.randn(N,1), requires_grad=True)
            optimizer_b = optim.Adam([b_i], lr=0.01)
        
        
        stop_loss = 1e-5
        step_size = 0.001 # stop_loss / 3.0

        err = error(b_i,Rt,rt)
        #print('Loss before: %s' % (torch.norm( err, p=2)))

        # TRAINING LOOP
        for k in range(10000): # 100000
            optimizer_b.zero_grad()
            Delta = error(b_i,Rt,rt)
            L = torch.norm(Delta, p=2)
            L.backward()
            optimizer_b.step()
            
            # b_i.data -= step_size * b_i.grad.data # step
            # b_i.grad.data.zero_()
            if k % 10000 == 0: print('Loss for b is %s at iteration %i' % (L, k))
            if abs(L) < stop_loss:
                print('It took %s iterations to achieve %s loss.' % (k, step_size))
                break

        B[:,i] = np.array(b_i.tolist()).flatten() 
        #print('Loss after: %s' % (torch.norm( error(b_i,Rt,rt) )))    
    
    # apply softmax here
    B = applyConstrains(B)
    Z = X @ B
    
    print(n)
    Zt = torch.tensor(Z, requires_grad=False).float()
    At = torch.tensor(A, requires_grad=False).float()
    Xt = torch.tensor(X,requires_grad=False).float()
    print("RSS at n=%s" % n, torch.norm(Xt-Zt@At,p='fro')**2)  #does 'fro' take squared? 
    RSS_values.append( torch.norm(Xt-Zt@At,p='fro')**2)
    
end = timer()

print("It took: {0} seconds to finish running".format(end - start))
print("The best RSS value was", min(RSS_values))
print(RSS_values)

Loss before: tensor(789.0945, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(416.0961, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(694.3499, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(200.2817, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(148.3585, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(17.6309, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(538.1461, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(580.1072, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(684.3636, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(274.5775, grad_fn=<NormBackward1>)
Loss after

Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(10.6570, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(490.1133, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(35.8851, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(158.9080, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(70.1424, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(425.3495, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(299.9368, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(402.7873, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(243.0638, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tenso

Loss before: tensor(291.9218, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(284.3823, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(378.9233, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(196.2063, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(402.2027, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(110.5634, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(138.6390, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(13.1124, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(210.0560, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(373.4656, grad_fn=<NormBackward1>)
Loss after

Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(286.9678, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(1173.0708, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(894.7227, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(0.8871, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(290.7559, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(483.9376, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(109.9842, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(253.1187, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(90.0246, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tens

Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(774.4136, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(42.9957, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(301.8317, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(205.8557, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(201.2971, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(221.7495, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(148.3817, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(249.5917, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(36.2250, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tens

Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(102.2608, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(36.1099, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(48.7669, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(95.2936, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(51.9862, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(305.0663, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(295.4261, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(80.1008, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(106.8136, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(

Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(175.7291, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(484.1749, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(212.4673, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(1016.7271, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(0.9455, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(205.2320, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(483.9873, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(370.9055, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(1004.2947, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: te

Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(11.0999, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(405.4825, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(608.8716, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(362.9965, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(55.8563, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(756.8684, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(313.3364, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(85.2434, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(154.0103, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tenso

Loss before: tensor(410.6060, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(429.5453, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(122.5778, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(86.2906, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(459.4369, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(327.7678, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(98.9881, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(448.7747, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(332.4149, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(280.5437, grad_fn=<NormBackward1>)
Loss after:

Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(41.5622, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(50.9622, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(411.2273, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(549.8679, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(164.0605, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(991.1404, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(308.6964, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(135.1363, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(116.5377, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tens

Loss before: tensor(48.1219, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(622.2551, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(703.8230, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(371.9572, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(180.0228, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(349.3275, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(723.1590, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(153.2195, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(369.2832, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(58.6179, grad_fn=<NormBackward1>)
Loss after:

Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(80.4441, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(494.2890, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(92.7306, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(130.2069, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(48.5208, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(418.6490, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(12.9195, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(636.8829, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(38.3815, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(

Loss before: tensor(546.9777, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(190.1937, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(519.9162, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(519.1151, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(322.1840, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(51.1734, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(487.5696, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(9.8103, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(81.1957, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(233.9048, grad_fn=<NormBackward1>)
Loss after: t

Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss befor

Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after

Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss befor

Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss befor

Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss befor

Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after

Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss befor

Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss befor

Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss befor

Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss befor

Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss befor

Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss before: tensor(nan, grad_fn=<NormBackward1>)
Loss after: tensor(nan, grad_fn=<CopyBackwards>)
Loss befor

## Time at datasize 1000 x 17 for:
### Adam
$\textbf{Time}$ in seconds: 351.60987780000005 

$\textbf{RSS:}$ 160.3371 

$\textbf{Step size:}$ 0.00001

$\textbf{Stop loss:}$ $1\cdot 10^{-6}$

$\textbf{Learning rate:}$ 0.05

____________________________________________
$\textbf{Time}$ in seconds: 288.2289836 

$\textbf{RSS:}$ 160.4064

$\textbf{Step size:}$ 0.001

$\textbf{Stop loss:}$ $1\cdot 10^{-5}$

$\textbf{Learning rate:}$ 0.01

_____________________________________________

### Ordinal GD(?)
$\textbf{Time}$ in seconds: 236.4697258000001

$\textbf{RSS:}$ 160.3474

$\textbf{Step size:}$ 0.00001

$\textbf{Stop loss:}$ $1\cdot 10^{-6}$
______________________________________________

$\textbf{Time}$ in seconds: 220.46896449999986

$\textbf{RSS:}$ 160.3430

$\textbf{Step size:}$ 0.001

$\textbf{Stop loss:}$ $1\cdot 10^{-5}$

____________________________________________
### SGD
$\textbf{Time}$ in seconds: Never converged

$\textbf{RSS:}$ never converged $\rightarrow$ loss after for $\textbf{a}_i$ was consistently inf


In [61]:
class AA:
    """
    Class for applying conventional archetypal analysis.
    
    Input: 
    X = M x N matrix of data (features x samples)
    
    K = number of archetypes to be computed
    """
    
    def __init__(self, X, K):
        
        self.M, self.N = X.shape
        self.X = X
        self.Rt = torch.tensor(X.T @ X,requires_grad=False).float()
        self.K = K
    
    
    def applyConstraints(self, A):
        return softmax(A, axis = 0)
    
    def error(self, a_i, Qt, qt):
        return (0.5 * a_i.T @ Qt @ a_i) - (qt.T @ a_i)
        
    
    def furthestSum(self):
        # Choose a random point for initialization
        idx = int(np.random.choice(range(0,N)))
        x_j = self.X[:,idx]

        j_news = list()
        j_news.append(idx)

        excluded = [idx]

        # Loop over the K archetypes
        for n in range(self.K):
            best_val = 0
            best_idx = 0
            # Loop over all unseen samples
            for i in range(N-len(excluded)):
                if i not in excluded:
                    val = 0
                    # sum over each element for each point
                    for ele in j_news:
                        for j in range(M):

                            val += np.abs(self.X[j,i] - self.X[j , ele])
                    if val > best_val:
                        best_val = val
                        best_idx = i

            j_news.append(int(best_idx))
            excluded.append(best_idx)
            # Remove the random initialization
            if n == 0:
                j_news.pop(0)
                excluded.pop(0)
        return j_news

    """
    Function for applying conventional archetypal analysis:
    -----------------------------------------------------------------------
        Inputs: 
            self
            Z: Matrix that will contain the archetypes, initialize the values using furthestSum
            time: Boolean, if True the time of convergence will be measured
            lr: Learning rate for the optimizer
            stop_loss: stop loss value for the inner loop
            amsgrad: Boolean, if True uses the "amsgrad" method for Adam optimizer
            n_outer: Number of iterations to run the outermost loop
            n_inner: Number of iterations to run the inner loop
    """
    def applyAA(self, Z, time = True, lr = 0.01, stop_loss=1e-05, amsgrad=True, n_outer=2, n_inner=1000):
        torch.manual_seed(42)
        
        if time == True:
            start = timer()

        RSS_values = list()
        A = np.zeros((self.K, self.N))#.tolist()
        B = np.zeros((self.N, self.K))#.tolist()

        # LOOP UNTIL RSS IS LOW
        for n in range(n_outer):
            Q = Z.T @ Z
            Qt = torch.tensor(Q,requires_grad=False).float()

            # LOOP THROUGH ENTIRE A
            for i in range(self.N):
                q = Z.T @ self.X[:,i]
                qt = torch.tensor(q,requires_grad=False).float()

                if n == 0:
                    a_i = torch.autograd.Variable(torch.rand(self.K, 1), requires_grad=True) # eller er det Kx1 ?
                    optimizer_a = optim.Adam([a_i], lr=0.01, amsgrad = amsgrad)

                

                err = error(a_i,Qt,qt)
                print('Loss before: %s' % (torch.norm( err, p=2)))

                # TRAINING LOOP
                for k in range(n_inner): # 100000
                    optimizer_a.zero_grad()
                    Delta = error(a_i,Qt,qt) 
                    L = torch.norm(Delta, p=2)
                    L.backward()
                    optimizer_a.step()
                    
                    if k % 10000 == 0: print('Loss for a is %s at iteration %i' % (L, k))
                    if abs(L) < stop_loss:
                        print('It took %s iterations to achieve %s loss.' % (k, step_size))
                        break

                A[:,i] = np.array(a_i.tolist()).flatten() 

                print('Loss after: %s' % (torch.norm( error(a_i,Qt,qt) )))

            A = applyConstrains(A)
            Z = self.X @ A.T @ np.linalg.inv(A@A.T)


            # LOOP THROUGH ENTIRE B
            for i in range(self.K): #K
                r = self.X.T @ Z[:,i]
                rt = torch.tensor(r,requires_grad=False).float()

                if n == 0:
                    b_i = torch.autograd.Variable(torch.randn(self.N,1), requires_grad=True)
                    optimizer_b = optim.Adam([b_i], lr= lr, amsgrad = amsgrad)


                

                err = error(b_i, self.Rt, rt)
                # print('Loss before: %s' % (torch.norm( err, p=2)))

                # TRAINING LOOP
                for k in range(n_inner):
                    optimizer_b.zero_grad()
                    Delta = error(b_i, self.Rt,rt)
                    L = torch.norm(Delta, p=2)
                    L.backward()
                    optimizer_b.step()

                    # b_i.data -= step_size * b_i.grad.data # step
                    # b_i.grad.data.zero_()
                    if k % 10000 == 0: print('Loss for b is %s at iteration %i' % (L, k))
                    if abs(L) < stop_loss:
                        print('It took %s iterations to achieve %s loss.' % (k, step_size))
                        break

                B[:,i] = np.array(b_i.tolist()).flatten() 
                #print('Loss after: %s' % (torch.norm( error(b_i,Rt,rt) )))    

            # apply softmax here
            B = applyConstrains(B)
            Z = self.X @ B

            Zt = torch.tensor(Z, requires_grad=False).float()
            At = torch.tensor(A, requires_grad=False).float()
            Xt = torch.tensor(self.X,requires_grad=False).float()
            print("RSS at n=%s" % n, torch.norm(Xt-Zt@At, p='fro'))
            RSS_values.append( torch.norm(Xt-Zt@At, p='fro'))
        if time == True:
            end = timer()
            print("It took: {0} seconds to finish running".format(end - start))
            
        print("The best RSS value was", min(RSS_values))
        
        return RSS_values, Z, B, A

        

    
        
        

### Testing the class
##### Defining variables:

In [62]:
X = df[['SD1', 'PO1', 'UN1', 'AC1', 'SC1',
       'ST1', 'CO1', 'UN2', 'TR1', 'HD1', 'SD2','BE1','AC2', 'SC2', 'ST2',
       'CO2', 'PO2', 'BE2', 'UN3', 'TR2','HD2']].iloc[range(100),:]

X = X.to_numpy()
X = X.T
print(X.shape)
K = 5

lr = 0.01
stop_loss = 1e-05
armsgrad = True
time = True




(17, 1000)


In [66]:
AA_model = AA(X, K)
Z = AA_model.X[:,AA_model.furthestSum()]

RSS, Z, A, B = AA_model.applyAA(Z = Z, time = True, lr = 0.01, stop_loss=1e-05, amsgrad=True, n_outer=2, n_inner=1000)

Loss before: tensor(789.0945, grad_fn=<NormBackward1>)
Loss for a is tensor(789.0945, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(1.3415, grad_fn=<CopyBackwards>)
Loss before: tensor(416.0961, grad_fn=<NormBackward1>)
Loss for a is tensor(416.0961, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(2.5127, grad_fn=<CopyBackwards>)
Loss before: tensor(694.3499, grad_fn=<NormBackward1>)
Loss for a is tensor(694.3499, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(0.6056, grad_fn=<CopyBackwards>)
Loss before: tensor(200.2817, grad_fn=<NormBackward1>)
Loss for a is tensor(200.2817, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(1.3930, grad_fn=<CopyBackwards>)
Loss before: tensor(148.3585, grad_fn=<NormBackward1>)
Loss for a is tensor(148.3585, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(0.8758, grad_fn=<CopyBackwards>)
Loss before: tensor(17.6309, grad_fn=<NormBackward1>)
Loss for a is tensor(17.6309, grad_fn=<NormBackward1>) a

Loss after: tensor(0.8954, grad_fn=<CopyBackwards>)
Loss before: tensor(106.7076, grad_fn=<NormBackward1>)
Loss for a is tensor(106.7076, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(1.5074, grad_fn=<CopyBackwards>)
Loss before: tensor(151.4555, grad_fn=<NormBackward1>)
Loss for a is tensor(151.4555, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(1.8972, grad_fn=<CopyBackwards>)
Loss before: tensor(81.0436, grad_fn=<NormBackward1>)
Loss for a is tensor(81.0436, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(0.0302, grad_fn=<CopyBackwards>)
Loss before: tensor(86.0173, grad_fn=<NormBackward1>)
Loss for a is tensor(86.0173, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(1.9207, grad_fn=<CopyBackwards>)
Loss before: tensor(128.8573, grad_fn=<NormBackward1>)
Loss for a is tensor(128.8573, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(0.8256, grad_fn=<CopyBackwards>)
Loss before: tensor(392.5201, grad_fn=<NormBackward1>)
Loss fo

KeyboardInterrupt: 