# Logistic Regression

### Importing Libraires

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn import linear_model
from numpy.linalg import inv,det,multi_dot,norm
from sklearn.metrics import confusion_matrix
from itertools import combinations
import warnings
warnings.filterwarnings('ignore')

### Activation Function Used - Sigmoid

In [6]:
from IPython.display import Image
Image(url= "https://static.javatpoint.com/tutorial/machine-learning/images/logistic-regression-in-machine-learning.png", width=10000, height=10000)

### Creating Class

In [None]:
class Logistic:  
    def __init__(self):
        self.x=None
        self.y=None        
        self.weights = None
        self.bias = None
        self.cost= [ ]        
        
    def Error_logit(self,x,y):
        r,c=x.shape
        x=np.array(x)
        y=np.array(y).reshape(1,r)
        for i in range(len(self.bias)):
            fx = np.dot(x, self.weights[i]) + self.bias[i]         
        y_pred = self.sigmoid_func(fx)        
        y_cal = np.array([1 if i > 0.5 else 0 for i in y_pred]).reshape(1,r)  
        #print(y_cal,y)
        MSE=(1/len(x))* np.sum(np.square(y_cal - y))       
        return MSE   

    def kfold_logistic(self,x,y):
        kf = KFold(n_splits=5) 
        x=np.array(x)
        y=np.array(y)      
        
        MinErr=[ ]
        alpha_range = [10**i for i in range(-6,2)]
        for j in alpha_range :
            l=[ ]
            for train_index,test_index in kf.split(x,y):
                #print(x.size,y.size)
                self.Logistic_Train(x,y,j,1000,10**-10)                
                pred= self.Error_logit(x[test_index],y[test_index])
                l.append(pred)
                
            
            MinErr.append(sum(l)/len(l))
            print(f"Learning rate: {j} mean error is : {sum(l)/len(l)}")
        
        k= np.argmin(MinErr)        
        optimal_alpha=(alpha_range[k])
        print("optimal Learning rate is-->{}".format(optimal_alpha))
        return optimal_alpha
   
         
    def sigmoid_func(self,z):
        return 1/(1+np.exp(-(z)))
    
    def Logistic_Train(self,x,y,alpha,itr,eps):  
        r,c=x.shape
        x=np.array(x)
        y=np.array(y).reshape(r,1)
        w=np.zeros((c,1))
        w_list=[w]
        bias=[ ]
        cost_list=[ ]
        w0=0
        for i in range(itr):
            fx = np.dot(x,w)+ w0
            h0=self.sigmoid_func(fx)
            Err = h0 - y  
            w = w - (alpha/r)* np.dot(x.T,Err)            
            w0 = w0 - alpha * np.sum(Err)            
            cost = - (np.sum(np.log(h0)*y+ np.log(1-h0)*(1-y)))            
            cost_list.append(cost)            
            w_list.append(w)
            bias.append(w0)
            if cost < eps:
                break 
                
        self.weights={ }
        self.bias={ }  
        K=np.unique(y)                          #for hyperplane parameters
        m=[*range(len(K))]
        p=list(combinations(m, 2))        
        for i in range(len(p)):           
            self.weights[i]=w
            self.bias[i]=w0
            
        self.cost=cost_list        
        

    def confusion_mat(self,y_test,y_pred):        
        cm = confusion_matrix(y_test,y_pred)
        return cm   

    def Logistic_Test(self,x,y):
        r,c=x.shape
        x=np.array(x)
        y=np.array(y).reshape(r,1)
        for i in range(len(self.bias)):
            z= np.dot(x, self.weights[i]) + self.bias[i]
        y_pred = self.sigmoid_func(z)      
        y_cal = np.array([1 if i > 0.5 else 0 for i in y_pred]).reshape(r,1) 
        
        MSE=(1/len(x))* np.sum(np.square(y_cal - y))            
        print("")
        print('Mean Square Error is-->{}'.format(MSE))
        cm=self.confusion_mat(y,y_cal)
        print('Confusion Matrix is' )
        print(cm)
        self.performance(cm)
    
   
        
    def plot_decison_boundary(self,x_test,y_test):
        r,c = x_test.shape
        if(c<3):           
            #  2D hyperplane plotting
            x=x_test.iloc[:,0]
            for i in range(len(self.bias)):
                y_cal=-(self.bias[i]+ x * self.weights[i][0])/self.weights[i][1]
                plt.plot(x,y_cal)
            C=['red' if l==1 else 'blue' for l in y_test]
            plt.scatter(x_test.iloc[:,[0]], x_test.iloc[:,[1]], color= C ,label="Actual data")
            plt.xlabel('x_1',fontsize = 10)
            plt.ylabel('x_2',fontsize = 10)
            plt.legend(loc='best')
            plt.show()
            
        elif(c==3):
            # 3D hyperplane plotting
            fig=plt.figure(figsize=(8, 6))
            ax = fig.add_subplot(111, projection = '3d')
            C=['red' if l==1 else 'blue' for l in y_test]                    
            x1=x_test.iloc[:,0]          
            x2=x_test.iloc[:,1]
            tmp = np.linspace(-40,40,3)
            for i in range(len(self.bias)):
                x1,x2 = np.meshgrid(tmp,tmp)
                z = lambda x1,x2: -(self.bias[i]+(x1*self.weights[i][0])+(x2*self.weights[i][1]))/self.weights[i][2]
                ax.plot_surface(x1, x2, z(x1, x2))  
                    
            
            ax.scatter3D(x_test.iloc[:,[0]], x_test.iloc[:,[1]], x_test.iloc[:,[2]], color= C ,label="Actual data")
            ax.set_xlabel('X1',fontsize = 10)
            ax.set_ylabel('X2',fontsize = 10)
            ax.set_zlabel('X3',fontsize = 10)
            plt.legend(loc='best')
            plt.show()
        
        
        else:
            #not possible to plot in higher dimenstion
            pass
        
        
    def performance(self,cm):
        r,c=cm.shape
        if(c==2):
            tp = cm[0][0]
            tn = cm[1][1]
            fp = cm[0][1]
            fn = cm[1][0]            
            
        elif(c==3):
            tp = cm[0][0]
            tn = cm[1][1]+cm[1][2]+cm[2][1]+cm[2][2]
            fp = cm[0][1]+cm[0][2]
            fn = cm[1][0]+cm[2][0]
                    
        accuracy=(tp+tn)/(tp+tn+fp+fn)
        sn = tp/(tp+fn)
        sp = tn/(tn+fp)
        pr = tp/(tp+fp)
        fm = 2*(pr*sn)/(pr+sn)
        print(" ")
        print("Performance of model")
        print(f"Accuracy is {accuracy}")
        print(f"Sensitivity is {sn}")
        print(f"Specificity is {sp}")
        print(f"Precission is {pr}")
        print(f"F_measur is {fm}")    
        
 # Call this function if data is in standard format    
    def test_model_std(self,data):
        r,c=data.shape
         #Normalization if required
        #data=(data - data.min()) / (data.max() - data.min())
        x = data.iloc[:,range(c-1)]
        y = data.iloc[:,c-1] 
        self.test_model_xy(x,y)
        
 # Call this function if target column is at diffrent location
    def test_model_xy(self,x,y):
        r,c=x.shape
        y =np.where(y > 0, y , y*0)   
        y =np.array(y).reshape(r,1)
        X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3)
        model=logistic()
        print("Train points Shape")
        print(np.shape(X_train),np.shape(y_train))
        print(" ")
        alpha1=model.kfold_logistic(X_train,y_train)
        model.Logistic_Train(X_train,y_train,alpha1,1000,10**-10)
        print(" ")
        print('Parameters for hyperplane are-->')
        print(model.weights)
        print(" ")
        print("bais ")
        print(model.bias)
        model.Logistic_Test(X_test,y_test)    
        model.plot_decison_boundary(X_test,y_test)
