In [None]:
# Import and devide dataset
from numpy import *
from sklearn import preprocessing
#from sklearn.model_selection import train_test_split
from sklearn.datasets import load_svmlight_file
import matplotlib.pyplot as plt
%matplotlib inline

train_x, train_y = load_svmlight_file("a9a.txt", n_features=123)
test_x, test_y = load_svmlight_file("a9atest.t", n_features=123)
train_x =  train_y.reshape(train_y.shape[0],1)
test_x =  test_y.reshape(test_y.shape[0],1)
train_y[train_y == -1] = 0
test_y[test_y == -1] = 0

In [None]:
# Initialize parameters of model
import numpy as np
m, n = np.shape(train_x)
w = np.ones((n, 1))   # weight 
r = 0.05   # learning rate
maxIters = 40   # epoch times

In [None]:
# Loss function
def cal_loss(x,y,w):
    return  -( y*log(sigmoid(x * w)) + (1-y)*log(1-sigmoid(x * w)) ).sum()/ x.shape[0]
def sigmoid(a):
    return 1/(1+exp(-a))
def cal_accur(x,y,w):
    h =sigmoid( x * w)
    h[h > 0.5] = 1
    h[h <= 0.5] = 0
    return np.count_nonzero(h==y)/ x.shape[0]

In [None]:
def cal_gradient_sgd(k):
    random_num = random.randint(0,m)
    return (train_x[random_num].T * (sigmoid(train_x[random_num] * k) - train_y[random_num]))

train_loss, evaluation_loss,train_accr,evaluation_accr = [],[],[],[]

def SGD(w):
    for i in range(0, maxIters):
        grad = cal_gradient_sgd(w)
        w -= r * grad         
        train_loss.append(cal_loss(train_x,train_y,w))
        evaluation_loss.append(cal_loss(test_x,test_y,w))
        train_accr.append(cal_accur(train_x,train_y,w))
        evaluation_accr.append(cal_accur(test_x,test_y,w))
SGD(w)

# Draw graphs
plt.xlabel("Iterations")
plt.ylabel("Loss value")
plt.plot(train_loss, label="train_loss")
plt.plot(evaluation_loss,label="evaluation_loss" )
plt.legend(loc ='upper right')

plt.xlabel("Iterations")
plt.ylabel("Accuracy")
plt.plot(train_accr, label="train_accuracy")
plt.plot(evaluation_accr, label="evaluation_accuracy")
plt.legend(loc="lower right")


In [None]:
# NAG 
train_loss_nag,evaluation_loss_nag,train_accr_nag,evaluation_accr_nag = [],[],[],[]
w = np.ones((n, 1))

def NAG(w):
    gamma = 0.9
    vt = 0
    for i in range(0, maxIters):
        grad = cal_gradient_sgd(w - gamma*vt)
        vt = gamma*vt + r * grad
        w-=vt
        train_loss_nag.append(cal_loss(train_x,train_y,w))
        evaluation_loss_nag.append(cal_loss(test_x,test_y,w))
        train_accr_nag.append(cal_accur(train_x,train_y,w))
        evaluation_accr_nag.append(cal_accur(test_x,test_y,w))
        
NAG(w)
'''
plt.xlabel("Iterations")
plt.ylabel("Loss")
plt.plot(train_loss_nag, label="train_loss")
plt.plot(evaluation_loss_nag,label="evaluation_loss" )
plt.legend(loc ='upper right')

plt.xlabel("Iterations")
plt.ylabel("Accuracy")
plt.plot(train_accr_nag, label="train_accuracy")
plt.plot(evaluation_accr_nag, label="evaluation_accuracy")
plt.legend(loc="lower right")
'''

In [None]:
train_loss_RMSProp,evaluation_loss_RMSProp,train_accr_RMSProp,evaluation_accr_RMSProp = [],[],[],[]
w = np.ones((n, 1))

def RMSProp(w):
    gamma = 0.9
    vt = 0
    Egt = 0
    e=0.00000001  
    
    learning_rate = 0.3

    for i in range(0, maxIters):
        grad = cal_grad_sgd(w - gamma*vt)
        Egt = gamma * Egt + ((1-gamma)*(grad**2)).sum()  
        w = w - learning_rate*grad/math.sqrt(Egt + e)  

        train_loss_RMSProp.append(cal_loss(train_x,train_y,w))
        evaluation_loss_RMSProp.append(cal_loss(test_x,test_y,w))
        train_accr_RMSProp.append(cal_accur(train_x,train_y,w))
        evaluation_accr_RMSProp.append(cal_accur(test_x,test_y,w))
        
RMSProp(w)
'''
plt.xlabel("Iteration")
plt.ylabel("loss")
plt.plot(train_loss_RMSProp, label="train_loss")
plt.plot(evaluation_loss_RMSProp,label="evaluation_loss" )
plt.legend(loc ='upper right')

plt.xlabel("Iteration")
plt.ylabel("Accuracy")
plt.plot(train_accr_RMSProp, label="train_accuracy")
plt.plot(evaluation_accr_RMSProp, label="evaluation_accuracy")
plt.legend(loc="lower right")


'''


In [None]:
# AdaDelta
train_loss_adaDelta,evaluation_loss_adaDelta,train_accr_adaDelta,evaluation_accr_adaDelta = [],[],[],[]
w = np.ones((n, 1))

def adaDelta(w):

    rho = 0.9
    Egt=0  
    Edt = 0  
    e=0.00000001  
    delta = 0  
    learning_rate = 2000

    for i in range(0, maxIters):
        
        grad = cal_grad_sgd(w)
        Egt = rho * Egt + ((1-rho)*(grad**2) ).sum()  
        delta = - math.sqrt(Edt + e)*grad/math.sqrt(Egt + e)
        Edt =rho*Edt+( (1-rho)*(delta**2) ).sum()  
        w = w + learning_rate*delta 
        
        train_loss_adaDelta.append(cal_loss(train_x,train_y,w))
        evaluation_loss_adaDelta.append(cal_loss(test_x,test_y,w))
        train_accr_adaDelta.append(cal_accur(train_x,train_y,w))
        evaluation_accr_adaDelta.append(cal_accur(test_x,test_y,w))
        
adaDelta(w)
'''
plt.xlabel("Iteration")
plt.ylabel("loss")
plt.plot(train_loss_adaDelta, label="train_loss")
plt.plot(evaluation_loss_adaDelta,label="evaluation_loss" )
plt.legend(loc ='upper right')

plt.xlabel("Iteration")
plt.ylabel("Accuracy")
plt.plot(train_accr_adaDelta, label="train_accuracy")
plt.plot(evaluation_accr_adaDelta, label="evaluation_accuracy")
plt.legend(loc="lower right")
'''




In [None]:
train_loss_adam,evaluation_loss_adam,train_accr_adam,evaluation_accr_adam = [],[],[],[]
w = np.ones((n, 1))

def adam(w):
 
    t = 0 
    m = 0  
    v = 0  
    b1 = 0.9  
    b2 = 0.995  
    learning_rate = 0.05
    
    for i in range(0, maxIters):
        
        grad = cal_grad_sgd(w)
        t +=1 
        m = b1*m + ((1-b1)*grad).sum() 
        v = b2*v + ((1-b2)*(grad**2)).sum()  
        mt = m/(1-(b1**t))  
        vt = v/(1-(b2**t)) 
        w = w- learning_rate * mt/(math.sqrt(vt)+e) 
        
        train_loss_adam.append(cal_loss(train_x,train_y,w))
        evaluation_loss_adam.append(cal_loss(test_x,test_y,w))
        train_accr_adam.append(cal_accur(train_x,train_y,w))
        evaluation_accr_adam.append(cal_accur(test_x,test_y,w))
        
adam(w)
'''

plt.xlabel("Iterations")
plt.ylabel("Loss")
plt.plot(train_loss_adam, label="train_loss")
plt.plot(evaluation_loss_adam,label="evaluation_loss" )
plt.legend(loc ='upper right')


'''
plt.plot(train_loss, label="train_loss")
plt.plot(train_loss_nag, label="train_loss_nag")
plt.plot(train_loss_adaDelta, label="train_loss_adaDelta")
plt.plot(train_loss_RMSProp, label ="train_loss_RMSProp")
plt.plot(train_loss_adam, label="train_loss_adam")
plt.legend(loc="upper right")



