In [None]:
import random

import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import load_svmlight_file
from sklearn.model_selection import train_test_split


def get_data(file):
    data = load_svmlight_file(file)
    return data[0], data[1]

def compute_loss(X, W, y, C = 1):
    L2 =  0.5 * np.dot(W.T, W)
    prediction_y = np.dot(X, W)
    #print("prediction_y", prediction_y)
    diff = np.ones(y.shape[0]) - y * prediction_y
    diff[diff < 0] = 0
    #print("diff", diff)
    hingeloss = C *(np.sum(diff)) / X.shape[0]
    loss = hingeloss + L2
    return loss

def get_gradient(X, W, y, C = 1):
    prediction_y = np.dot(X, W)
    diff = np.ones(y.shape[0]) - np.multiply(y, prediction_y)
    #print("diff", diff.shape)
    #print("y", y.shape)
    y_copy = y.copy()
    y_copy[diff <= 0] = 0
    gradient = W - C * np.dot(y_copy, X) / X.shape[0]
    #print("Gradient", gradient)
    return gradient

def output(X, W):
    return np.dot(X, W)

def BGD(iterate_number, W, X_train, y_train, x_test, y_test):
    loss_train = []
    loss_valid = []

    for i in range(iterate_number):
        gradient = get_gradient(X_train, W, y_train)
        #print(gradient)
        W = W - leraning_rate * gradient
        loss_train.append(compute_loss(X_train, W, y_train))
        loss_valid.append(compute_loss(x_test, W, y_test))
    return loss_train, loss_valid

def Adam(iterate_number, W, X_train, y_train, x_test, y_test, learning_rate):
    beta_1 = 0.9
    beta_2 = 0.99
    ep = 0.00000001

    m = 0  
    v = 0  
    t = 0  

    loss_train = []
    loss_valid = []

    N = X_train.shape[0]

    for i in range(iterate_number):
        h = output(X_train, W)
        error = h - y_train
        gradient = (X_train.T * error) / N
        t = t + 1
        m = beta_1 * m + (1 - beta_1) * gradient
        v = beta_2 * v + (1 - beta_2) * (np.power(gradient, 2))
        mt = m / (1 - beta_1**t)
        vt = v /(1 - (beta_2**t))

        W = W - learning_rate * mt / (np.sqrt(vt) + ep)
        loss_train.append(compute_loss(X_train, W, y_train))
        loss_valid.append(compute_loss(x_test, W, y_test))
    return loss_train, loss_valid

def RMSProp(iterate_number, W, X_train, y_train, x_test, y_test, learning_rate):
    N = X_train.shape[0]

    d = 0.9

    Egt=0  
    Edt = 0
    delta = 0
    ep = 0.00000001 

    loss_train = []
    loss_valid = []

    for i in range(iterate_number):
        h = output(X_train, W)
        error = h - y_train
        gradient = (X_train.T * error) / N
        Egt = d * Egt + (1 - d)*(np.power(gradient, 2))  
    
        W = W - learning_rate * gradient / (np.sqrt(Egt) + ep)
        loss_train.append(compute_loss(X_train, W, y_train))
        loss_valid.append(compute_loss(x_test, W, y_test))
    return loss_train, loss_valid

def ADADELTA(iterate_number, W, X_train, y_train, x_test, y_test):
    N = X_train.shape[0]

    ep = 0.00001

    d = 0.9
  
    Egt = 0
    Edt = 0
    sumDelta = 0

    loss_train = []
    loss_valid = []

    for i in range(iterate_number):
        h = output(X_train, W)
        error = h - y_train
        gradient = (X_train.T * error) / N
       
        Egt = d * Egt + (1 - d) * (np.power(gradient, 2))
        delta =  np.multiply(np.sqrt(Edt + ep), gradient) / np.sqrt(Egt + ep)
        Edt = d * Edt + (1 - d) * (np.power(delta, 2))
    
        W = W - delta
        loss_train.append(compute_loss(X_train, W, y_train))
        loss_valid.append(compute_loss(x_test, W, y_test))
    return loss_train, loss_valid

def NAG(iterate_number, W, X_train, y_train, x_test, y_test, learning_rate):
    N = X_train.shape[0]

    u = 0.9
  
    vt = 0

    loss_train = []
    loss_valid = []

    for i in range(iterate_number):
        h = output(X_train, W + u * vt)
        error = h - y_train
        gradient = (X_train.T * error) / N
       
        vt = u * vt - learning_rate * gradient
    
        W = W + vt
        loss_train.append(compute_loss(X_train, W, y_train))
        loss_valid.append(compute_loss(x_test, W, y_test))
    return loss_train, loss_valid
    
#ploting the loss value
def plot(loss_train, loss_valid, title):
    plt.plot(loss_train, color="r", label="Loss_train")
    plt.plot(loss_valid, color="g",label="Loss_valid")
    plt.xlabel("Iteration")
    plt.ylabel("Loss")
    plt.title(title)
    plt.legend()
    plt.show()

X_train, y_train = get_data("D:/Task/MachineLearning/Test/逻辑回归、线性分类和随机梯度下降/SVM_SGD/data/a9a")
x_test, y_test = get_data("D:/Task/MachineLearning/Test/逻辑回归、线性分类和随机梯度下降/SVM_SGD/data/a9a.t")
X_train = X_train.toarray()
x_test = x_test.toarray()
column_train = np.ones((X_train.shape[0]))
X_train = np.column_stack((X_train, column_train))
column_test = np.ones((x_test.shape[0]))
x_test = np.column_stack((x_test, column_test))


y_train = y_train.reshape(y_train.shape[0], 1)
y_test = y_test.reshape(y_test.shape[0], 1)
y_train[y_train < 0] = 0
y_test[y_test < 0] = 0

N = X_train.shape[1]
#initialize the parameters
W = np.ones((N, 1))

#set the learning rate and iterate number
leraning_rate = 0.01
iterate_number = 1000

pic_title = "Linear SVM"
loss_train, loss_valid = BGD(iterate_number, W, X_train, y_train, x_test, y_test)
plot(loss_train, loss_valid, pic_title)

#loss_train, loss_valid = SGD(iterate_number, W, X_train, y_train, x_test, y_test, learning_rate)
pic_title = "Adam"
loss_train, loss_valid = Adam(iterate_number, W, X_train, y_train, x_test, y_test, learning_rate)
pic_title = "RMSProp"
loss_train, loss_valid = RMSProp(iterate_number, W, X_train, y_train, x_test, y_test, learning_rate)
pic_title = "ADADELTA"
loss_train, loss_valid = ADADELTA(iterate_number, W, X_train, y_train, x_test, y_test)
pic_title = "NAG"
loss_train, loss_valid = NAG(iterate_number, W, X_train, y_train, x_test, y_test, learning_rate)
