In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.metrics import accuracy_score

In [2]:
data = pd.read_csv('dataset.txt', sep=",")

In [3]:
data.sample(3)

Unnamed: 0,Marks_1,Marks_2,Result
86,42.075455,78.844786,0
94,89.845807,45.358284,1
42,94.443368,65.568922,1


In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 3 columns):
Marks_1    100 non-null float64
Marks_2    100 non-null float64
Result     100 non-null int64
dtypes: float64(2), int64(1)
memory usage: 2.5 KB


In [5]:
class LogisticRegression():
    def __init__(self,regularization=None,cost=0.01,w=0,b=0,iterations=100000,alpha=0.001):
        self.regularization = regularization
        self.cost = cost
        self.iterations = iterations
        self.wgt = w
        self.bs = b
        self.alpha = alpha
        
    def sigmoid(self,X_train,weight):
        z = np.dot(X_train,weight)
        return 1/(1 + np.exp(-z))
    
    def weights(self,weight,alpha,grad):
        return weight - alpha*grad
    
    def loss(self,h,y):
        if self.regularization == 'L1':
            return (-y*np.log(h) - (1-y)*np.log(1-h)).mean() + ((1/self.cost)*(sum(abs(self.wgt))))/(2*y.shape[0])
        elif self.regularization == 'L2':
            return (-y*np.log(h) - (1-y)*np.log(1-h)).mean() + ((1/self.cost)*(sum(self.wgt**2)))/(2*y.shape[0])
        else:
            return (-y*np.log(h) - (1-y)*np.log(1-h)).mean()
    
    def gradientDescent(self,X_train,h,y):
        return np.dot(X_train.T,(h-y))/y.shape[0]
    
    def fit(self,X_train,y_train):
        self.wgt = np.zeros(X_train.shape[1])
        
        for i in range(self.iterations):
            h = self.sigmoid(X_train,self.wgt)
            gd = self.gradientDescent(X_train,h,y_train)
            self.wgt = self.weights(self.wgt,self.alpha,gd)
            
        return self
    
    def predict(self,X_test):
        y = self.sigmoid(X_test,self.wgt)
        for i in range(len(y)):
            if y[i] > 0.5:
                y[i] = 1
            else:
                y[i] = 0
        return y

## Test

In [6]:
X = np.array(data.iloc[:,:2].values)
y = np.array(data.iloc[:,-1].values)

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=16)

In [7]:
# L1 Regularization

lr = LogisticRegression(regularization='L1',iterations=100000,alpha=0.001)
lr.fit(X_train,y_train)

print("Accuracy =", accuracy_score(y_test, lr.predict(X_test)) )

Accuracy = 0.85


In [8]:
# L2 Regularization

lr = LogisticRegression(regularization='L2',iterations=100000,alpha=0.001)
lr.fit(X_train,y_train)

print("Accuracy =", accuracy_score(y_test, lr.predict(X_test)) )

Accuracy = 0.85
