# Neural Networks

## **Arguments:**

1) **train input**: path to the training input .csv file

2) **validation input**: path to the validation input .csv file

3) **train out**: path of output .labels file to which the predictions on the training data should be written

4) **validation out**: path to output .labels file to which the prediction on the validation data should be written
    
5) **metrics out**: path of the output .txt file to which metrics such as train and validation error should be written
    
6) **num epoch**: integer specifying the number of times backpropogation loops through all of the training data 

7) **hidden units**: positive integer specifying the number of hidden units.
    
8) **init flag**: integer taking value 1 or 2 that specifies whether to use RANDOM or ZERO initialization

that is, if init_flag==1 initialize your weights randomly from a uniform distribution over the range [-0.1,0.1] (i.e. RANDOM), if init_flag==2 initialize all weights to zero (i.e. ZERO). For both settings, always initialize bias terms to zero.

9) **learning rate**: float value specifying the learning rate for SGD.

In [None]:
import numpy as np
import sys
import csv 
train_input = sys.argv[1] 
validation_input = sys.argv[2]
train_out = sys.argv[3]
validation_out = sys.argv[4]
metrics_out = sys.argv[5]
epoch = int(sys.argv[6])
hidden_units = int(sys.argv[7])
init_flag = sys.argv[8]
learning_rate = float(sys.argv[9])

In [None]:
def main():

    
    train = file_data(train_input)
    validation = file_data(validation_input)
    
    numofattributes= train.dim_x
    numofy=train.dim_y
    labels_t= train.y
    labels_v=validation.y
    yval_train= train.labely
    yval_vali= validation.labely
    attrib_train= train.x
    attrib_vali= validation.x
    numofex_train= train.trainingex
    numofex_vali= validation.trainingex
    
    alpha = np.asmatrix(np.random.uniform(-0.1,0.1,(hidden_units,numofattributes)))
    beta = np.asmatrix(np.random.uniform(-0.1,0.1,(numofy,hidden_units+1)))
    if init_flag == '1':
        alpha = np.asmatrix(np.random.uniform(-0.1,0.1,(hidden_units,numofattributes)))
        alpha = np.insert(alpha,0,0.0,axis=1)
        beta = np.asmatrix(np.random.uniform(-0.1,0.1,(numofy,hidden_units)))
        beta = np.insert(beta,0,0.0,axis=1)
    elif init_flag == '2':
        alpha = np.zeros((hidden_units,numofattributes+1))
        beta = np.zeros((numofy,hidden_units+1))
        
    
    modelmetrics = ''
    
    
    for i in range(epoch):
        J_t = []
        
        for j in range(0,numofex_train):
            gA1,gB1= backward(attrib_train,labels_t,yval_train,alpha,beta,learning_rate,j)
            beta= beta- learning_rate*gB1
            alpha= alpha-learning_rate*gA1
            
        for j in range(0,numofex_train):
            a,zm1,z,b,yc,y_te,jtotal_t,pred_t= forward(attrib_train,labels_t,yval_train,alpha,beta,learning_rate,j)
            J_t.append(jtotal_t)
#            print("trip")
#            print(a)
#            print(zm1)
#            print(z)
#            print(jtotal_t)
            #Pred_t.append(pred_t)
        
        J_train= np.sum(J_t)/numofex_train
        print(J_train)
        modelmetrics += 'epoch={} crossentropy(train): {:.11f}\n'.format(i+1, J_train)
        print(modelmetrics)
        J_v = []
        Pred_v=[]
        for j in range(0,numofex_vali):
            a_v,zm1_v,z_v,b_v,yc_v,y_te_v,jtotal_v,pred_v = forward(attrib_vali,labels_v,yval_vali,alpha,beta,learning_rate,j)
            J_v.append(jtotal_v)
            #Pred_v.append(pred_v)
            
        J_validation= np.sum(J_v)/numofex_vali

        modelmetrics += 'epoch={} crossentropy(validation): {:.11f}\n'.format(i+1, J_validation)
        print("jugnu")
        print(modelmetrics)
    
    Pred_t=[]
    for j in range(0,numofex_train):
        a,zm1,z,b,yc,y_te,jtotal_t,pred_t= forward(attrib_train,labels_t,yval_train,alpha,beta,learning_rate,j)
        Pred_t.append(pred_t)
    
    Pred_v=[]
    for j in range(0,numofex_vali):
        a_v,zm1_v,z_v,b_v,yc_v,y_te_v,jtotal_v,pred_v = forward(attrib_vali,labels_v,yval_vali,alpha,beta,learning_rate,j)
        Pred_v.append(pred_v)


    error_train = 1 - float(sum(Pred_t==yval_train))/len(yval_train)
    modelmetrics += 'error(train): {:.2f}\n'.format(error_train)
    error_val = 1 - float(sum(Pred_v==yval_vali))/len(yval_vali)
    modelmetrics += 'error(validation): {:.2f}\n'.format(error_val)


    labels_train = ''
    for i in range(len(Pred_t)):
        labels_train = labels_train + str(Pred_t[i]) + '\n'    

    labels_val = ''
    for i in range(len(Pred_v)):
        labels_val = labels_val + str(Pred_v[i]) + '\n'   
#    
#       
    with open(metrics_out, 'w') as f:
        f.write(modelmetrics)
    f.closed
    with open(train_out, 'w') as f:
        f.write(labels_train)
    f.closed
    with open(validation_out, 'w') as f:
        f.write(labels_val)
    f.closed
        

In [None]:
def sigmoid(a):     
    zm1= 1.0/(1.0+np.exp(-a))
    z=np.insert(zm1,0,1.0,axis=0)
    return [zm1,z]

def softmax(b):
    yc= np.exp(b)/np.sum(np.exp(b))
    return yc

def forward(x1,yunique1, y1, alpha, beta, eta,t):
    x = np.matrix(x1)
    y = np.matrix(y1)
    
    #yunique = np.matrix(yunique1)
    a= np.dot(alpha,np.transpose(x[t]))
    zm1,z = sigmoid(a)
    #z1 = np.matrix(z)
    b= np.dot(beta,z)
    
    yc = softmax(b) #10x1
    A = np.array(yc) 
    p = np.argmax(A)
    #print p
    y_te = yunique1[int(y1[t])]
    
    J= -float(np.dot(y_te,np.log(yc)))
    return [a,zm1,z,b,yc,y_te,J,p]


In [None]:
def backward(x1,yunique1, y1, alpha, beta, eta,t):
    x = np.matrix(x1)
    y = np.matrix(y1)
    
    a,zm1,z,b,yc,y_te,J,p = forward(x1,yunique1, y1, alpha, beta, eta,t)
    yunique = np.matrix(y_te)
    gy= -np.divide(yunique,np.transpose(yc)) #1x10
    
    ycc = np.matrix(yc)
    ycd= np.diagflat(ycc) #10x10
    gb=np.dot(gy,(ycd - yc*np.transpose(yc))) #1x10
    

        
    gB= np.transpose(gb)*np.transpose(z) #10x5
    
        
    gz= np.transpose(beta)*np.transpose(gb) #5x10 * 10x1 #5x1
    
    beta_new = np.delete(beta,0,1)
    
    gz1= np.transpose(beta_new)*np.transpose(gb) #4x1

    ga= np.multiply(np.multiply(gz1,zm1),1-zm1) #4x1
    gA= ga*x[t] #4x1 * 1x129
    
    return [gA,gB]
    

In [None]:
class file_data:
    def __init__(self, file_path):
        
        reader = csv.reader(open(file_path, "rb"), delimiter=",")
        x1 = list(reader)
        result = np.matrix(x1).astype("float")
        r= np.delete(result,0,1)  #x matrix without bias
        self.trainingex=len(r)
        self.dim_x= (np.prod(r.shape)/len(r))
        self.labely= (np.asarray(result[:,0]).reshape(-1))
        self.y1= (np.unique(self.labely))
        y2 = np.diagflat(self.y1)
        self.y = np.identity(y2.shape[0])
        
        
        self.dim_y= (len(np.unique(self.labely)))
        self.x= np.insert(r,0,1.0,axis=1)
    
        


In [None]:
if __name__ == "__main__":
    main()