In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from scipy.optimize import minimize
import warnings
import random
warnings.filterwarnings('ignore')


data_df = pd.read_csv('../Data/data.csv')
labels_df = pd.read_csv('../Data/labels.csv')

data = np.array(data_df)[:,1:]#data is n x p matrix
labels = np.array(labels_df)

data = StandardScaler().fit_transform(data)

In [8]:
def penalty_1(x):
    '''
    regularization penalty function for scotlass
    
    function takes x (float)
    
    function returns penalty value (float)
    '''
    penalty = (0.5 * x) * (1 + np.tanh(1000*x))
    
    return penalty

def objective_1(sigma,v,reg_param):
    '''
    objective function for scotlass
    
    function takes
    - sigma: p x p covariance matrix
    - v: array of length p
    - reg_param: regularization parameter (positive float)
    
    function returns
    - objective function value at v
    '''
    v = np.array(v)
    obj = ((0.5*v @ sigma) @ v.T) - 1000 * penalty_1(v @ np.tanh((1000*v).T)-reg_param)
    
    return obj

def gradient_1(sigma,v,reg_param):
    '''
    gradient of objective function for scotlass
    
    function takes
    - sigma: p x p covariance matrix
    - v: array of length p
    - reg_param: regularization parameter (positive float)
    
    function returns
    - gradient of objective function at v
    '''
    v = np.array(v)
    y = (v @ np.tanh(1000*v))-reg_param
    z = np.tanh(1000*v)+(np.diag(np.cosh(y*v))@ (1000*v).T)
    grad = (sigma @ v.T) - 0.5*1000*(1+np.tanh((1000*y)+(np.cosh(1000*y)**(-2))*(1000*y)))*np.array(z)
    
    return grad

def scotlass_1(X,max_iter,reg_param):
    '''
    function takes
    - X: n x p scaled and centered dataset
    - max_iters: max number of steps (positive integer)
    - reg_param: regularization parameter (positive float); reg_param should be <= sqrt(p)
    
    function returns
    - v: first sparse principal direction (array of length p)
    - variance of data along v
    '''
    iters = 1
    delta = 0
    alpha = 1/iters # decreasing step size
    sigma = np.cov(X.T)
    v = ([1]*X.shape[1])/np.linalg.norm([1]*X.shape[1]) #initialize v with equal loadings
    while iters < max_iter:
        v_new = v + alpha*gradient_1(sigma,v,reg_param)
        v_proj = v_new/(np.linalg.norm(v_new)) # project loading vector back onto feasible set (vectors of l2 norm of 1)
        iters +=1
        updated_obj = objective_1(sigma,v_proj,reg_param)
        old_obj = objective_1(sigma,v,reg_param)
        delta = updated_obj - old_obj
        print('delta: '+str(delta))
        v = v_proj
        
    return v, v @ sigma @ v.T # return loadings array v and variance of data along v

In [9]:
v, v_EV = scotlass_1(data,20,10)

delta: -0.07352227531373501
delta: -0.09909310948569328
delta: -0.12030575214885175
delta: -0.11089282188913785
delta: 0.0013636403891723603
delta: 0.40174790724995546
delta: 1.5482796417200007
delta: 4.548873417137656
delta: 12.049193183338502
delta: 30.326767680468038
delta: 74.29606938816141
delta: 179.95247694835416
delta: 438.4892616754514
delta: 1112.2367537524551
delta: 3251.2838724718895
delta: 23692.161571119475
delta: 104372.04299168136
delta: -102317.6555582541
delta: 81119.09481600297
