In [6]:
from sklearn.datasets import load_svmlight_file
import numpy as np
from scipy.sparse import csr_matrix, hstack 
import matplotlib.pyplot as plt
import math
import random
%matplotlib inline

def data_loader(train_file):
    X,Y = load_svmlight_file(train_file)
    X = X.toarray()
    X = np.c_[np.ones((X.shape[0],1)),X]
    return X, Y

def sigmoid(inX):
    return 1.0/(1+np.exp(-inX))

def loss(X,Y,W):
    m,n = np.shape(X)
    loss=0
    for i in range(m):
        inX = Y[i]*W.T*X[i]
        loss += log(1+sigmoid(inX))
    return loss/m  

def sto_batch_grad(X,Y,W):
    #Mini-batch gradient
    m,n=np.shape(X)
    dataIndex = range(m)
    randIndex = int (random.uniform(0,len(dataIndex)) 
    X_part = np.mat(X(randIndex,randIndex+100))
    Y_part = np.mat(Y(randIndex,randIndex+100))
    h = sigmoid(X_part*W)
    error = h - Y_part
    G = X_part.transpose() * error #G(14,1)
    return G

def sto_gradDecline(X,Y,numIter=100):
    m,n=np.shape(X)
    W = np.ones(n)
    for j in range(numIter):   
        dataIndex = range(m)
        for i in range(m):
            learn_rate = 4/(1.0+j+i)+0.01
            randIndex = int (random.uniform(0,len(dataIndex)))
            h = sigmoid(sum(X[randIndex]*W))
            error = h - Y[randIndex]
            W = W - learn_rate*error*X[randIndex]
            del(randIndex)
    return W

def NAG(X,Y,learn_rate=.05, gamma=.9):
    """
    m,n=np.shape(X)
    v = []
    next_v = [gamma * v[i] + eta * gradients[i] for i in range(para_num)]
    updates = [(v[i], next_v[i]) for i in range(para_num)]
    updates.extend([(parameters[i], parameters[i] - gamma * next_v[i] - eta * gradients[i])
                    for i in range(para_num)])
    updates.extend([(t, t + 1)])
    return updates
    """
    X_matrix = np.mat(X)
    Y_matrix = np.mat(Y)
    m,n = np.shape(X_matrix)
    maxCycle = 500
    W = np.ones((n,1))
    v = np.ones((n,1))
    for k in range(maxCycle):
        G = sto_batch_grad(X_matrix,Y_matrix,W-gamma*v)
        v = gamma * v + learn_rate * G
        W = W - v
    return W
        
        
def RMSProp(X,Y,learn_rate=0.001, gamma=0.9, epsilon=1e-8):
    X_matrix = np.mat(X)
    Y_matrix = np.mat(Y)
    m,n = np.shape(X_matrix)
    maxCycle = 500
    G = np.ones((n,1))
    W = np.ones((n,1))
    for k in range(maxCycle):
        g = sto_batch_grad(X_matrix,Y_matrix,W)
        G = gamma * G + (1 - gamma) * np.sqr(g)
        W = W - learn_rate * g / np.sqrt(next_G[i] + epsilon)
    return W


def AdaDelta(X,Y,gamma=0.95, epsilon=1e-6):
    X_matrix = np.mat(X)
    Y_matrix = np.mat(Y)
    m,n = np.shape(X_matrix)
    maxCycle = 500
    W = np.ones((n,1))
    G = np.ones((n,1))
    dw = np.ones((n,1))
    t = np.ones((n,1))
    for k in range(maxCycle):
        g = sto_batch_grad(X_matrix,Y_matrix,W)
        G = gamma * G + (1 - gamma) *np.sqrt(g)
        dw = np.sqrt(t+epsilon)/np.sqrt(G+epsilon)
        W = W + dw
        t = gamma * t + (1-gamma)*np.sqrt(dw)
    return W


def Adam(X,Y, learn_rate=0.002, gamma=0.999, beta=0.9, epsilon=1e-8):
    X_matrix = np.mat(X)
    Y_matrix = np.mat(Y)
    m,n = np.shape(X_matrix)
    maxCycle = 500
    W = np.ones((n,1))
    m = np.ones((n,1))
    t=1
    for k in range(maxCycle):
        g = sto_batch_grad(X_matrix,Y_matrix,W)
        m = beta * m + (1 - beta) * g
        G = gamma * G + (1 - gamma) * np.sqrt(g)
        W = W - learn_rate * np.sqrt(1 - gamma ** t)/(1 - beta ** t) * m / np.sqrt(G + epsilon)
    return W

def classify(X,W):
    prob = sigmoid(sum(X*W))
    if prob > 0.5 : return 1.0
    else : return 0.0
    
if __name__ =="__main__":
    X_train,Y_train = data_loader("a9a")
    X_test,Y_test = data_loader("a9a.t")
    X = array(X_train)
    
    
    
    
                     
    
    #print(X_train.shape)

SyntaxError: invalid syntax (<ipython-input-6-0ffdd2b7be5b>, line 31)