In [24]:
import scipy.io as sio
import matplotlib.pyplot as plt
import numpy as np 
from numpy.linalg import *
import pandas as pd

In [25]:
def initialize(d, k):
    section = np.sqrt(6/(d+k))
    #return np.random.normal(size=(d,k))
    return np.zeros((d,k))
    #return np.random.uniform(low = -section, high = section, size = (d,k) )

In [26]:
def zscore(x):
    mu = x.mean(0)
    sigma = np.maximum(x.std(0), 0.0001)
    x = (x-mu)/sigma
    return x, mu, sigma

In [27]:
def normalize(x, mu, sigma):
    return (x-mu)/sigma

In [28]:
def f(y, x, w):
    yu = y-np.dot(x,w)
    value = np.dot( yu.T , yu)
    return value/2

In [29]:
def detlaf(y, x, w):
    return np.dot(x.T , ( np.dot(x,w) - y ))

In [30]:
def cost_function(y, x, w, lam):
    yu = np.dot(x,w) - y
    cost = np.dot( yu.T , yu)/2 + lam*np.abs(w).sum()
    return cost

In [31]:
def PGD(f, y, x, lam=50, stepmax=300):
    x = np.column_stack(( np.ones((x.shape[0],1)) , x ))
    [m,d] = x.shape
    k = y.shape[1]
    
    cost = 0
    oldcost = 1000
    step = 0
    eps = 0.0001
    
    w = initialize(d, k)
    while(np.abs(cost-oldcost)>eps and step<stepmax):
        w_old = w
        step = step + 1
        L = 1
        while(1):
            z = w_old - detlaf(y,x,w_old)/L
            w = np.sign(z) * np.maximum(np.abs(z)-lam/L , 0)
            detlaw = w - w_old
            yu = f(y, x, w_old) + np.dot( detlaf(y,x,w_old).T , detlaw ) + L/2 * np.dot(detlaw.T , detlaw)
            if (f(y, x, w) < yu):
                break
            L = 2*L
        oldcost = cost
        cost = cost_function(y, x, w, lam)
    return w
    

In [32]:
def LASSO_test(y, x, w):
    x = np.column_stack(( np.ones((x.shape[0],1)) , x ))
    t = np.dot( x,w )
    er = t - y
    MSE =  ( er**2 ).mean()
    MAE = abs( er ).mean()
    SD = np.sqrt((( er-er.mean())**2).mean())
    obj = pd.Series( [MSE,MAE,SD],index=['MSE','MAE','SD'] )
    print(obj)

In [33]:
data_path = "E://yuwoliang/three set/StellarSLOANDR7/"

data=sio.loadmat( data_path + "StellarSLOANDR7Train.mat" ) 
train_x = data['train_x']
train_y = data['train_y'][:,1:2]

del data
[train_x,mu,sigma] = zscore(train_x)

In [34]:
w = PGD(f, train_y, train_x)
del train_x, train_y

In [35]:
data=sio.loadmat( data_path + "StellarSLOANDR7Test.mat" ) 
test_x = data['test_x']
test_y = data['test_y'][:,1:2]
del data

test_x = normalize(test_x, mu, sigma)
LASSO_test(test_y, test_x, w)

MSE    0.263530
MAE    0.405574
SD     0.508825
dtype: float64
