[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1SzyI5ACB79je-pBPlQm4Yy-thoOTP2bt?usp=sharing)

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import KFold
import autograd.numpy as npy
from autograd import grad
from autograd import elementwise_grad

In [None]:
class LogisticRegression():
    def __init__(self, fit_intercept=True):
        '''
        :param fit_intercept: Whether to calculate the intercept for this model. If set to False, no intercept will be used in calculations (i.e. data is expected to be centered).
        '''
        self.fit_intercept = fit_intercept
        self.coef_ = None #Replace with numpy array or pandas series of coefficients learned using using the fit methods
        self.thetas = [] ##weights
        self.costs = [] ##cost of iterations
        self.regu = None
        self.lamda = 0.1
        self.X = None
        self.y = None

        pass

    def fit(self, X, y, n_iter=100000, lr=0.01, lr_type='constant',regu = None,lamda = 0.1):
        
        if isinstance(X,pd.DataFrame):
            X = X.to_numpy()
        
        if isinstance(y,pd.Series):
            y = y.to_numpy()

        m = X.shape[1]
        n = len(y)  
        y = y.reshape(n,1)
        if self.fit_intercept == True:
            theta = np.random.randn(m+1,1)
            X0 = np.ones((n,1))
            X = np.append(X0,X,axis = 1)
        elif self.fit_intercept == False:
            theta = np.random.randn(m,1)

        self.X = X
        self.y = y

        ## For 
        # n = no. of samples
        # m = no. of features
        # X : n x m
        # theta : m x 1
        # y : n x 1

        for it in range(1,n_iter+1):
            if lr_type == 'constant':
                c = 1
            elif lr_type == 'inverse':
                c = 1/(it+1)

            Z = np.dot(X,theta)  
            y_hat = self.sigmoid(Z)
            # cost = self.cost(y,y_hat)
            dtheta = (1/n)*np.dot( X.T,y_hat-y) 
            theta = theta - c*lr*dtheta

            # Keeping track of our weights
            self.thetas.append(theta)
            
            # Keeping track of our cost function value
            # self.costs.append(cost)

        self.coef_ = theta
        return

    def sigmoid(self,Z):
        return 1/(1+npy.exp(-Z))

    def cost(self,theta): 
        X = self.X
        y = self.y
        Z = npy.dot(X,theta)  
        y_hat = self.sigmoid(Z)

        y1 = y*npy.log(y_hat)
        y2 = (1-y)*npy.log(1-y_hat)
        # cost = -(1/len(y))*npy.sum( y*npy.log(y_hat) + (1-y)*npy.log(1-y_hat))
        cost = -(1/len(y))*npy.sum(y1 + y2)
        if(self.regu=="L1"):
            cost += self.lamda*(npy.sum(npy.absolute(theta)))
        elif(self.regu=="L2"):
            cost += self.lamda*(npy.sum(npy.square(theta)))
        else:
            return cost

        return cost

    def fit_autograd(self, X, y, n_iter=10000, lr=0.01, lr_type='constant',regu = None,lamda = 0.1):

        if isinstance(X,pd.DataFrame):
            X = X.to_numpy()
        
        if isinstance(y,pd.Series):
            y = y.to_numpy()

        m = X.shape[1]
        n = len(y)  
        y = y.reshape(n,1)
        if self.fit_intercept == True:
            theta = np.random.randn(m+1,1)
            X0 = np.ones((n,1))
            X = np.append(X0,X,axis = 1)
        elif self.fit_intercept == False:
            theta = np.random.randn(m,1)

        self.X = X
        self.y = y

        ag = elementwise_grad(self.cost)
        for it in range(1,n_iter+1):
            if lr_type == 'constant':
                c = 1
            elif lr_type == 'inverse':
                c = 1/(it+1)

            cost = ag(theta)
            
            theta -= c*lr*cost

            # Keeping track of our weights
            self.thetas.append(theta)
            
            # Keeping track of our cost function value
            self.costs.append(cost)
            
        self.coef_ = theta
        return

    
    def predict(self, X):
            
        if isinstance(X,pd.DataFrame):
            X = X.to_numpy()

        if self.fit_intercept == True:  
            m = X.shape[0]
            X0 = np.ones((m,1))
            X = np.append(X0,X,axis = 1)
        
        y_hat = self.sigmoid(X.dot(self.coef_))
        y_hat = y_hat.reshape(len(y_hat),)
        # print(y_hat)
        for i in range(len(y_hat)):
                if(y_hat[i]<=0.5):
                    y_hat[i]=0
                else:
                    y_hat[i]=1
        return pd.Series(y_hat)

    def accuracy(self,y_hat,y):

        assert(y_hat.size == y.size)
        true_pred = 0
        for i in range(y_hat.size):
            if y_hat[i]==y[i]:
                true_pred+=1

        accu = true_pred/y.size
        return accu*100


### Dataset Preprocessing

In [None]:
cancer = load_breast_cancer()

X = pd.DataFrame(cancer.data, columns=cancer.feature_names)
y = pd.Series(cancer['target'])
features = cancer.feature_names
## Normalization of X
for column in X.columns:
    X.loc[:,column] = (X.loc[:,column] - X.loc[:,column].min())/(X.loc[:,column].max() - X.loc[:,column].min())

### Part - a

In [None]:
regus = ["L1","L2"]

for regu in regus:
    print("\nUsing Regularisation",regu)
    clf = LogisticRegression()
    clf.fit(X,y,regu=regu,lamda=0.1)
    pred = clf.predict(X)
    print("Accuracy: ",clf.accuracy(pred,y))

    clf = LogisticRegression()
    clf.fit_autograd(X,y,regu=regu,lamda=0.1)
    pred = clf.predict(X)
    print("Accuracy(Autograd): ",clf.accuracy(pred,y))


Using Regularisation L1
Accuracy:  97.18804920913884
Accuracy(Autograd):  94.37609841827768

Using Regularisation L2
Accuracy:  97.89103690685414
Accuracy(Autograd):  94.02460456942003


### Part - b

In [None]:
def q_b(regu,lamdas,X,y):
  print("For Regularisation",regu,":")
  cnt = 1
# L1_range = [i for i in np.arange(0.01,0.1,0.01)]
  kf = KFold(n_splits=3,shuffle=True)
  for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    kf1 = KFold(n_splits=3,shuffle=True)
    accu = {}
    for i in lamdas:
        accu[i] = 0

    for train_ind, test_ind in kf1.split(X_train):
        X_tn, X_tt = X[train_ind], X[test_ind]
        y_tn, y_tt = y[train_ind], y[test_ind]
        for lamda in lamdas:
            LR = LogisticRegression()
            LR.fit_autograd(X_tn,y_tn,regu=regu,lamda=lamda)
            yh = LR.predict(X_tt)
            accu[lamda] += LR.accuracy(yh, y_tt)

    vals = list(accu.values())
    best_accu = max(vals)
    best_lamda = lamdas[vals.index(max(vals))]

    LR = LogisticRegression()
    LR.fit_autograd(X_train,y_train,regu=regu,lamda=best_lamda)

    y_hat = LR.predict(X_test)
    ac = LR.accuracy(y_hat,y_test)
    print(cnt,". Value of optimum penalty term lambda",best_lamda,"giving accuracy",ac)

    if regu == "L1":
        theta = list(np.array(LR.coef_).reshape(-1))
        l = [(abs(theta[i]),i) for i in range(1,len(theta))]
        l = sorted(l, reverse = True)
        print("\t Following are the Three Most Important features :")
        print("\t\t >",features[l[0][1]-1])
        print("\t\t >",features[l[1][1]-1])
        print("\t\t >",features[l[2][1]-1])

    cnt +=1


In [None]:
q_b("L1",[i for i in np.arange(0.01,0.1,0.02)],X.to_numpy(),y.to_numpy())

For Regularisation L1 :
1 . Value of optimum penalty term lambda 0.01 giving accuracy 92.63157894736842
	 Following are the Three Most Important features :
		 > mean concavity
		 > concavity error
		 > worst concave points
2 . Value of optimum penalty term lambda 0.03 giving accuracy 93.6842105263158
	 Following are the Three Most Important features :
		 > mean area
		 > worst perimeter
		 > texture error
3 . Value of optimum penalty term lambda 0.06999999999999999 giving accuracy 93.65079365079364
	 Following are the Three Most Important features :
		 > worst perimeter
		 > mean concave points
		 > worst concave points


In [None]:
q_b("L2",[i for i in np.arange(0.01,0.1,0.02)],X.to_numpy(),y.to_numpy())

For Regularisation L2 :
1 . Value of optimum penalty term lambda 0.01 giving accuracy 90.52631578947368
2 . Value of optimum penalty term lambda 0.08999999999999998 giving accuracy 94.21052631578948
3 . Value of optimum penalty term lambda 0.08999999999999998 giving accuracy 94.17989417989418
