In [8]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from timeit import default_timer as timer
from scipy.special import softmax



In [9]:
df = pd.read_csv('ESS8_data.csv')

In [20]:
X = df[['SD1', 'PO1', 'UN1', 'AC1', 'SC1',
       'ST1', 'CO1', 'TR1', 'HD1', 'AC2', 'SC2', 'ST2',
       'CO2', 'PO2', 'BE2', 'TR2', 'HD2']].iloc[range(1000),:]
X = X.to_numpy().T
N, M = X.T.shape
K = 5

In [21]:
# applies both for eq. 7 and 8
def error(a_i,Qt,qt):
    return (0.5 * a_i.T @ Qt @ a_i) - (qt.T @ a_i)

## Defining the helper functions applyConstraints and furthestSum

applyConstrants ensures that the constraints of the problem are upheld i.e. $e_{ij} \geq 0$ and $1^T \textbf{e}_j = 1$


furthestSum is an effective way to initialize the values of $\textbf{b}_i$

In [22]:
def applyConstrains(M):
    return softmax(M, axis = 0)

In [23]:
def furthestSum(X):
    # Choose a random point for initialization
    idx = int(np.random.choice(range(0,N)))
    x_j = X[:,idx]

    j_news = list()
    j_news.append(idx)

    excluded = [idx]

    # Loop over the K archetypes
    for n in range(K):
        best_val = 0
        best_idx = 0
        # Loop over all unseen samples
        for i in range(N-len(excluded)):
            if i not in excluded:
                val = 0
                # sum over each element for each point
                for ele in j_news:
                    for j in range(M):
                        
                        val += np.abs(X[j,i] - X[j , ele])
                if val > best_val:
                    best_val = val
                    best_idx = i
                    
        # 
        j_news.append(int(best_idx))
        excluded.append(best_idx)
        # Remove the random initialization
        if n == 0:
            j_news.pop(0)
            excluded.pop(0)
        
    return j_news


#init_vals_b = X[init_idxs[i],:].astype(np.float64)
#init_vals_bt = torch.tensor(init_vals_b, requires_grad = False).float()

#init_vals_b.shape


## Initializing variables for AA

In [28]:
# Initializing Z through furthest sum
#Z = X[:,furthestSum(X)]
import random
Z = X[:,random.sample(range(1, M), K)]
print(Z.shape)
R = X.T @ X
Rt = torch.tensor(R,requires_grad=False).float()

RSS_values = list()
A = np.zeros((K,N))#.tolist()
B = np.zeros((N,K))#.tolist()

torch.manual_seed(42)


(17, 5)


<torch._C.Generator at 0x1eb8ce3a5d0>

In [None]:
start = timer()
torch.manual_seed(42)

RSS_values = list()
A = np.zeros((K,N))#.tolist()
B = np.zeros((N,K))#.tolist()

# LOOP UNTIL RSS IS LOW
for n in range(2): #N
    Q = Z.T @ Z
    Qt = torch.tensor(Q,requires_grad=False).float()

    # LOOP THROUGH ENTIRE A
    for i in range(N):
        q = Z.T @ X[:,i]
        qt = torch.tensor(q,requires_grad=False).float()
        
        if n == 0:
            a_i = torch.autograd.Variable(torch.rand(K, 1), requires_grad=True) # eller er det Kx1 ?
            optimizer_a = optim.SGD([a_i], lr=0.05)
        
        stop_loss = 1e-6
        step_size = 0.00001 # stop_loss / 3.0

        err = error(a_i,Qt,qt)
        print('Loss before: %s' % (torch.norm( err, p=2)))

        # TRAINING LOOP
        for k in range(100): # 100000
            optimizer_a.zero_grad()
            Delta = error(a_i,Qt,qt) 
            L = torch.norm(Delta, p=2)
            L.backward()
            optimizer_a.step()
            #a_i.data -= step_size * a_i.grad.data # step
            #a_i.grad.data.zero_()
            if k % 10000 == 0: print('Loss for a is %s at iteration %i' % (L, k))
            if abs(L) < stop_loss:
                print('It took %s iterations to achieve %s loss.' % (k, step_size))
                break
        
        A[:,i] = np.array(a_i.tolist()).flatten() 
        
        print('Loss after: %s' % (torch.norm( error(a_i,Qt,qt) )))
    
    A = applyConstrains(A)
    Z = X @ A.T @ np.linalg.inv(A@A.T)
    
    
    # LOOP THROUGH ENTIRE B
    for i in range(K): #K
        r = X.T @ Z[:,i]
        rt = torch.tensor(r,requires_grad=False).float()
        
        if n == 0:
            b_i = torch.autograd.Variable(torch.randn(N,1), requires_grad=True)
            optimizer_b = optim.SGD([b_i], lr=0.05)
        
        
        stop_loss = 1e-6
        step_size = 0.00001 # stop_loss / 3.0

        err = error(b_i,Rt,rt)
        #print('Loss before: %s' % (torch.norm( err, p=2)))

        # TRAINING LOOP
        for k in range(10000): # 100000
            optimizer_b.zero_grad()
            Delta = error(b_i,Rt,rt)
            L = torch.norm(Delta, p=2)
            L.backward()
            optimizer_b.step()
            
            # b_i.data -= step_size * b_i.grad.data # step
            # b_i.grad.data.zero_()
            #if k % 10000 == 0: print('Loss for b is %s at iteration %i' % (L, k))
            if abs(L) < stop_loss:
                print('It took %s iterations to achieve %s loss.' % (k, step_size))
                break

        B[:,i] = np.array(b_i.tolist()).flatten() 
        #print('Loss after: %s' % (torch.norm( error(b_i,Rt,rt) )))    
    
    # apply softmax here
    B = applyConstrains(B)
    Z = X @ B
    
    print(n)
    Zt = torch.tensor(Z, requires_grad=False).float()
    At = torch.tensor(A, requires_grad=False).float()
    Xt = torch.tensor(X,requires_grad=False).float()
    print("RSS at n=%s" % n, torch.norm(Xt-Zt@At,p='fro'))
    RSS_values.append( torch.norm(Xt-Zt@At,p='fro'))
    
end = timer()

print("It took: {0} seconds to finish running".format(end - start))
print("The best RSS value was", min(RSS_values))
print(RSS_values)

Loss before: tensor(294.8781, grad_fn=<NormBackward1>)
Loss for a is tensor(294.8781, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(inf, grad_fn=<CopyBackwards>)
Loss before: tensor(0.4361, grad_fn=<NormBackward1>)
Loss for a is tensor(0.4361, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(inf, grad_fn=<CopyBackwards>)
Loss before: tensor(311.6892, grad_fn=<NormBackward1>)
Loss for a is tensor(311.6892, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(inf, grad_fn=<CopyBackwards>)
Loss before: tensor(204.3376, grad_fn=<NormBackward1>)
Loss for a is tensor(204.3376, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(inf, grad_fn=<CopyBackwards>)
Loss before: tensor(13.3575, grad_fn=<NormBackward1>)
Loss for a is tensor(13.3575, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(inf, grad_fn=<CopyBackwards>)
Loss before: tensor(51.6479, grad_fn=<NormBackward1>)
Loss for a is tensor(51.6479, grad_fn=<NormBackward1>) at iteration 0
Loss af

Loss before: tensor(108.7547, grad_fn=<NormBackward1>)
Loss for a is tensor(108.7547, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(inf, grad_fn=<CopyBackwards>)
Loss before: tensor(111.4284, grad_fn=<NormBackward1>)
Loss for a is tensor(111.4284, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(inf, grad_fn=<CopyBackwards>)
Loss before: tensor(139.6869, grad_fn=<NormBackward1>)
Loss for a is tensor(139.6869, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(inf, grad_fn=<CopyBackwards>)
Loss before: tensor(88.2199, grad_fn=<NormBackward1>)
Loss for a is tensor(88.2199, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(inf, grad_fn=<CopyBackwards>)
Loss before: tensor(406.7508, grad_fn=<NormBackward1>)
Loss for a is tensor(406.7508, grad_fn=<NormBackward1>) at iteration 0
Loss after: tensor(inf, grad_fn=<CopyBackwards>)
Loss before: tensor(19.7043, grad_fn=<NormBackward1>)
Loss for a is tensor(19.7043, grad_fn=<NormBackward1>) at iteration 0
Los

### Time at datasize 1000 x 17 for:
#### Adam:
Time in seconds: 351.60987780000005
RSS: 160.3371
#### SGD
Time in seconds: 
RSS:
#### Ordinal GD(?)
Time in seconds: 
RSS:

