In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df_train = pd.read_csv('MulticlassTrain.csv')
df_test = pd.read_csv('MulitiClassTest.csv')

In [3]:
X_train = df_train[df_train.columns[:-1]].values
Y_train = df_train[df_train.columns[-1]].values

In [4]:
X_test = df_test[df_test.columns[:-1]].values
Y_test = df_test[df_test.columns[-1]].values

number_of_classes = len(np.unique(Y_train))

In [5]:
def sigmoid(z):
    return 1/(1 + np.exp(-z))

In [6]:
def fit(X_train,Y_train,number_of_classes,learning_rate=0.0005,max_iteration=1000):
    X_train= np.insert(X_train, 0, values=1, axis=1)
    no_attributes = X_train.shape[1]
    
    theta = np.zeros((no_attributes,number_of_classes))
    
    for icount in range(max_iteration):        
        delta = np.zeros((no_attributes,number_of_classes))
        
        totalLogLikelihood = 0
        
        for instance,true_y in zip(X_train,Y_train):
            instance = instance.reshape(no_attributes,1)
            
            dotResult = np.dot(theta.T,instance)
            pr = sigmoid(dotResult)
            predictedValue = pr.squeeze()
            
            av = np.zeros((number_of_classes,1))
            actualValue = av.squeeze()
            actualValue[true_y-1] = 1 # if classes are number from 1 to n else true_y 0 -> n-1
            
            derivativeValue = instance*(actualValue-predictedValue)
            delta += learning_rate*derivativeValue
            logLikelihood = np.dot(actualValue,np.log(predictedValue)) + np.dot(1-actualValue,np.log(1-predictedValue))
            totalLogLikelihood += logLikelihood
            
        theta = theta + delta
        
        if icount%100==0:
            print(icount)
            print(totalLogLikelihood)
            print(theta)
        
    return theta

In [7]:
theta = fit(X_train,Y_train,number_of_classes)

0
-623.8324625039492
[[-0.025      -0.025      -0.025     ]
 [-0.19354725  0.10028855 -0.602869  ]
 [ 0.07345308 -0.32175738 -0.12694417]
 [-0.14645753  0.10352738 -0.45869877]]
100
-148.41672221769613
[[-2.12554    -0.10293206  0.42194603]
 [-0.38388263  0.29062987 -0.96481727]
 [ 0.53815812 -1.35449296  1.12754043]
 [ 0.24491806  0.31151287 -0.76695238]]
200
-83.18568787946295
[[-3.70716028 -0.12082556  0.58092965]
 [-0.26220563  0.31527789 -1.19780382]
 [ 0.69985025 -1.46523062  1.37190042]
 [ 0.36270729  0.33489137 -0.8732131 ]]
300
-60.56543487174008
[[-4.73598652 -0.13374779  0.69660809]
 [-0.11102477  0.33241813 -1.35582235]
 [ 0.72577134 -1.54202539  1.52625126]
 [ 0.34787202  0.350869   -0.92728519]]
400
-45.00761402332528
[[-5.48282872 -0.14397066  0.79078018]
 [-0.12382493  0.34563    -1.47833264]
 [ 0.74405567 -1.60108261  1.64061587]
 [ 0.24011863  0.36303494 -0.96122535]]
500
-37.4531282944459
[[-5.9867499  -0.15247889  0.87153377]
 [-0.04674852  0.35640742 -1.57934947]
 

In [8]:
def predict(X_train,Y_train,theta,number_of_classes):
    X_train= np.insert(X_train, 0, values=1, axis=1)
    no_attributes = X_train.shape[1]
    
    length = len(X_train)
    accuracy = 0
    
    for instance,true_y in zip(X_train,Y_train):
        instance = instance.reshape(no_attributes,1)
        dotResult = np.dot(theta.T,instance)
        
        predictedValue = sigmoid(dotResult).squeeze()
        predictedValue = np.argmax(predictedValue)
        
        if(predictedValue==true_y-1):
            accuracy += 1
        
    return accuracy/length

In [9]:
# train accuracy
predict(X_train,Y_train,theta,number_of_classes)

1.0

In [10]:
predict(X_test,Y_test,theta,number_of_classes)

0.9866666666666667