In [182]:
import numpy as np
import pandas as pd 
import random 
from scipy.special import softmax

import torch
import torch.nn as nn
import torch.optim as optim

from timeit import default_timer as timer


In [183]:
df = pd.read_csv('ESS8_data.csv')

In [184]:
X = df[['SD1', 'PO1', 'UN1', 'AC1', 'SC1',
       'ST1', 'CO1', 'UN2', 'TR1', 'HD1', 'SD2','BE1','AC2', 'SC2', 'ST2',
       'CO2', 'PO2', 'BE2', 'UN3', 'TR2','HD2']].iloc[range(100),:]
X = X.to_numpy().T

# Shapes of data:
N, M = X.T.shape

# Number of archetypes
K = 5

# Number of iterations
n_iter = 10000

In [185]:
# Combined RSS loss function, input are tensors
def error(X,B,A):
    return torch.norm(X - X@B@A, p='fro')**2


def applyConstraints(A):
    m = nn.Softmax(dim=0)
    return m(A)

## Convetional AA

#### Dimensions:

$\textbf{X}$ = $\textbf{M}$ x $\textbf{N}$

$\textbf{B}$ = $\textbf{N}$ x $\textbf{K}$

$\textbf{A}$ = $\textbf{K}$ x $\textbf{N}$


hvor: 

$\textbf{Z}$ = $\textbf{X} \textbf{B}$


In [186]:
# Z = X[:,random.sample(range(1, M), K)]
#print(Z.shape)


Xt = torch.tensor(X,requires_grad=False).float()
A = torch.autograd.Variable(torch.rand(K, N), requires_grad=True)
B = torch.autograd.Variable(torch.rand(N, K), requires_grad=True)


    
optimizer = optim.Adam([A, B], amsgrad = True) #, lr = 0.01)


for i in range(n_iter):
    optimizer.zero_grad()
    L = error(Xt, applyConstraints(B), applyConstraints(A))
    L.backward()
    optimizer.step()
    

A = applyConstraints(A)
B = applyConstraints(B)

In [187]:
print(sum(B))
print(sum(A))


print(max(B.flatten()))
#print(np.mean(np.array(B.flatten())))
print(min(B.flatten()))

tensor([1.0000, 1.0000, 1.0000, 1.0000, 1.0000], grad_fn=<AddBackward0>)
tensor([1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
        1.0000], grad_fn=<AddBackward0>)
tensor

In [193]:
class AA:
    """
    Class for applying conventional archetypal analysis.
    
    Input: 
    X = M x N array of data (features x samples)'
    """
    A = None
    B = None
    RSS = None
    done = False
    
    def __init__(self, X):
        self.M, self.N = X.shape
        self.X = X
    
    
    
    def _error(self, X,B,A):   
        return torch.norm(X - X@B@A, p='fro')**2

    def _applyConstraints(A):    
        m = nn.Softmax(dim=0)
        return m(A)
    
    """
    Function for applying Archetypal Analysis
    -----------------------------------------
        Input:
                K: number of archetypes
                n_iter: Number of iterations
                lr: Learning rate
                time: Boolean, if true the time until convergence is tracked.
                
    """
    def computeArchetypes(self, K, n_iter, lr = None, time = True):
        if time == True:
            start = timer()
        
        Xt = torch.tensor(self.X,requires_grad=False).float()
        A = torch.autograd.Variable(torch.rand(K, self.N), requires_grad=True)
        B = torch.autograd.Variable(torch.rand(self.N, K), requires_grad=True)
        
        optimizer = optim.Adam([A, B], amsgrad = True) #, lr = 0.01)

        for i in range(n_iter):
            optimizer.zero_grad()
            L = error(Xt, applyConstraints(B), applyConstraints(A))
            L.backward()
            optimizer.step()
            
            
        A = applyConstraints(A)
        B = applyConstraints(B)
        
        if time == True:
            end = timer()
            print("It took ", end-start, " seconds to converge :)")
        
        # Compute the final error
        RSS = error(Xt, B, A)
        print("The final RSS was: ", RSS)
        
        self.A = A
        self.B = B
        self.RSS = RSS
        self.Z = Xt @ B
        
        done = True
        print("Sucessfully computed the archetypes")
        
        
    def getA(self):
        return self.A

    def getB(self):
        return self.B
    
    def getZ(self):
        return self.Z
    
    def getRSS(self):
        return self.RSS
     

# Ordinal AA

#### See the article: "Gaussian Processes for Ordinal Regression"

##### Gaussian kernel, prior and likelihood function


In [326]:
from scipy.spatial.distance import cdist

# Mercer kernel function
# Input: the hyperparameter k and two data matrices of dimensions M x N
def gaussianKernel(k, x, y):
    sqdist = cdist(x.T, y.T, 'sqeuclidean')
    return np.exp(-(k/2)*sqdist)


Sigma = gaussianKernel(2, X, X)



def prior(f, Sigma):
    Zf = (2*np.pi)**(len(Sigma)/2) * (np.linalg.norm(Sigma)**(1/2))
    return 1/Zf * np.exp(-1/2 * f.T @ Sigma @ f)

test = prior(X.T, Sigma)
print(max(test.flatten()))
# Very small values... this is due to Zf being VERY large.
## what is exactly meant by |Sigma| in the .pdf? --> assuming norm...

1.9026754320582246e-133


In [325]:
print(np.mean(Sigma.flatten()))

print(np.percentile(Sigma.flatten(), 99))

0.010001899486276224
0.012453964655117434


In [328]:
mu = np.mean(X)
mu

2.67