In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [16]:
dX=pd.read_csv("Logistic_X_Train.csv")
dX

Unnamed: 0,f1,f2,f3
0,-1.239375,0.749101,-0.528515
1,-1.036070,0.801436,-1.283712
2,-0.615579,1.579521,-1.391927
3,1.335978,1.348651,1.433564
4,0.658925,1.300019,0.571603
...,...,...,...
2995,-0.455628,1.302303,-1.338027
2996,-0.434551,1.597813,-1.748643
2997,0.088277,1.638789,-2.193641
2998,1.525155,0.859234,1.505308


In [17]:
dY=pd.read_csv("Logistic_Y_Train.csv")
dY

Unnamed: 0,label
0,1
1,1
2,1
3,0
4,0
...,...
2995,1
2996,1
2997,1
2998,0


In [18]:
X=dX.values
Y=dY.values

Y=Y.reshape((-1,))

X.shape,Y.shape

((3000, 3), (3000,))

# Split

In [19]:
from sklearn import model_selection

In [20]:
X_train,X_test,Y_train,Y_test=model_selection.train_test_split(X,Y,random_state=1,test_size=0.2)

X_train.shape,Y_train.shape,X_test.shape,Y_test.shape

((2400, 3), (2400,), (600, 3), (600,))

# Normalisation

In [21]:
x_mean=X_train.mean(axis=0)
x_std=X_train.std(axis=0)

In [22]:
X_train=(X_train-x_mean)/x_std
X_test=(X_test-x_mean)/x_std

In [24]:
ones=np.ones((X_train.shape[0],1))
X_train_new=np.hstack((ones,X_train))

In [25]:
ones=np.ones((X_test.shape[0],1))
X_test_new=np.hstack((ones,X_test))

In [26]:
def sigmoid(x):
    return 1.0/(1.0+np.exp(-x))

def hypo(X,theta):
    return sigmoid(np.dot(X,theta))

def predict(X,theta):
    '''
       X is an entire array of (m,n+1)
       Theta is an entire array of (n+1,1)
    '''
    yp=hypo(X,theta)
    output=np.zeros(yp.shape)
    output[yp>=0.5]=1
    output=output.astype('int')
    return output

def accuracy(Y,Yp):
    
    Y=Y.astype('int')
    ans=np.sum(Y==Yp)/Y.shape[0]
    return ans*100

def error(X,y,theta):
    yp=hypo(X,theta)
    e=np.mean((y*np.log(yp))+((1-y)*np.log(1-yp)))
    return -e


def gradient(X,y,theta):
    M,N=X.shape
    yp=hypo(X,theta)
    grad=np.dot(y-yp,X)
    return -grad/M


def gradient_descent(X,y,lr=0.5,max_itr=500):
    M,N=X.shape
    theta=np.zeros((N,))
    error_list=[]
    
    for i in range(max_itr):
        e=error(X,y,theta)
        # Stop the loop when the change in e is very small
        error_list.append(e)
        
        grad=gradient(X,y,theta)
        
        theta=theta-lr*grad
        
    return theta,error_list
    

In [28]:
theta,error_list=gradient_descent(X_train_new,Y_train)
theta

array([-0.7290615 , -4.08392916,  2.53235937, -2.90836227])

In [32]:
Y_pred=predict(X_test_new,theta)
Y_t_pred=predict(X_train_new,theta)

In [34]:
print(accuracy(Y_test,Y_pred))
accuracy(Y_train,Y_t_pred)

99.16666666666667


99.54166666666666

# No Split

In [36]:
x_mean=X.mean(axis=0)
x_std=X.std(axis=0)
X=(X-x_mean)/x_std

In [38]:
dXtest=pd.read_csv("Logistic_X_Test.csv")
dXtest

Unnamed: 0,f1,f2,f3
0,-0.767508,-0.261859,-2.514831
1,-1.241653,1.449001,-0.413089
2,1.925270,1.931761,1.342619
3,-1.298255,1.049761,-0.527319
4,-0.964712,0.938088,-1.186328
...,...,...,...
995,2.877461,3.043965,2.308939
996,3.128835,3.115084,1.856309
997,-1.186969,0.947347,-0.786386
998,-0.532324,1.845170,-0.993550


In [39]:
X_test=dXtest.values

In [40]:
X_test=(X_test-x_mean)/x_std

In [41]:
ones=np.ones((X.shape[0],1))
X_new=np.hstack((ones,X))

In [42]:
X_new.shape

(3000, 4)

In [43]:
ones=np.ones((X_test.shape[0],1))
X_test_new=np.hstack((ones,X_test))

In [45]:
X_test_new.shape

(1000, 4)

In [46]:
theta,error_list=gradient_descent(X_new,Y)

In [54]:
Y_tr_pred=predict(X_new,theta)
accuracy(Y,Y_tr_pred)

99.46666666666667

In [55]:
Y_pred=predict(X_test_new,theta)

In [56]:
Y_pred

array([1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0,
       1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
       0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1,
       0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1,
       1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1,
       1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1,
       1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0,
       1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0,
       0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0,
       0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0,
       0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1,

In [58]:
df=pd.DataFrame(Y_pred,columns=["label"])
df

Unnamed: 0,label
0,1
1,1
2,0
3,1
4,1
...,...
995,0
996,0
997,1
998,1


In [59]:
df.to_csv("ans.csv",index=None)