# Logistic regression

## Step 1: Import All required Libraries



In [38]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

## Step 2: Defining class and functions for model training (update is used for gradient decent)

In [39]:
class learning() :
    # Constructor
    def __init__( self, learningrate, iterations ) :        
        self.learningrate = learningrate        
        self.iterations = iterations 
    # fit_model used for gradient descent 
    def fit_model( self, X, Y ) :               
        self.m, self.n = X.shape              
        self.W = np.zeros( self.n )        
        self.b = 0        
        self.X = X        
        self.Y = Y
        # Loop for gradient Descent iterations
        for i in range( self.iterations ) :            
            self.update()            
        return self
    # updating gradients
    def update( self ) :           
        T = 1 / ( 1 + np.exp( - ( self.X.dot( self.W ) + self.b ) ) )      
        t = ( T - self.Y.T )        
        t = np.reshape( t, self.m )        
        dW = np.dot( self.X.T, t ) / self.m         
        db = np.sum( t ) / self.m   
        self.W = self.W - self.learningrate * dW    
        self.b = self.b - self.learningrate * db
        return self
    # predict the values 
    def predict( self, X ) :    
        Z = 1 / ( 1 + np.exp( - ( X.dot( self.W ) + self.b ) ) )        
        Y = np.where( Z > 0.5, 1, 0 )        
        return Y

## Step 3: Reading CSV File for dataset (For this i have uploded the file on google colab)

In [40]:
df = pd.read_csv('diabetes.csv')
values = df.iloc[0:].values
print(df)
print(values)

     Pregnancies  Glucose  ...  Age  Outcome
0              6      148  ...   50        1
1              1       85  ...   31        0
2              8      183  ...   32        1
3              1       89  ...   21        0
4              0      137  ...   33        1
..           ...      ...  ...  ...      ...
763           10      101  ...   63        0
764            2      122  ...   27        0
765            5      121  ...   30        0
766            1      126  ...   47        1
767            1       93  ...   23        0

[768 rows x 9 columns]
[[  6.    148.     72.    ...   0.627  50.      1.   ]
 [  1.     85.     66.    ...   0.351  31.      0.   ]
 [  8.    183.     64.    ...   0.672  32.      1.   ]
 ...
 [  5.    121.     72.    ...   0.245  30.      0.   ]
 [  1.    126.     60.    ...   0.349  47.      1.   ]
 [  1.     93.     70.    ...   0.315  23.      0.   ]]


## Step 4: Store the features and target in X , Y separately

In [41]:
X = df.iloc[:,:-1].values
Y = df.iloc[:,-1:].values
print(X.shape)
print(Y.shape)

(768, 8)
(768, 1)


## Step 5: Splitting the Data in different ratios
A. Split the data in 70,30 for training and testing the model 

In [42]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=1)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)


(537, 8) (231, 8) (537, 1) (231, 1)


B. Split the data in 80,20 for training and testing the model 

In [43]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=1)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(614, 8) (154, 8) (614, 1) (154, 1)


C. Split the data in 90,10 for training and testing the model

In [44]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=1)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)


(691, 8) (77, 8) (691, 1) (77, 1)


## Step 6: Creating model object and passing values.

In [45]:
model = learning( learningrate = 0.01, iterations = 10000 )

## Step 7: Fitting model.

In [46]:
model.fit_model( X_train, y_train )

<__main__.learning at 0x7fb9cb0835d0>

## Step 8: Predicting the values by using defined model

In [47]:
target = model.predict( X_test )
t = model.predict( X_train )
print(target,t,y_test)

[1 1 1 0 0 0 0 1 0 0 0 0 1 1 1 1 0 0 1 0 1 0 1 1 0 0 0 1 0 1 0 1 0 1 0 0 0
 0 0 1 0 0 0 0 0 0 1 1 1 0 1 0 1 0 0 0 1 0 1 1 1 1 1 0 0 1 0 0 1 1 1 1 0 0
 1 1 1] [1 1 1 0 1 0 1 0 0 1 1 0 1 1 0 0 0 1 1 1 0 1 1 0 1 0 1 0 0 1 0 0 0 1 0 0 0
 0 0 0 1 0 0 0 1 0 0 1 0 0 0 1 0 1 0 0 1 1 1 1 0 0 0 1 0 1 1 1 1 0 0 0 1 1
 1 0 1 1 1 0 0 0 1 1 1 0 0 1 0 0 1 1 1 1 1 0 0 1 0 1 0 0 0 1 1 0 1 0 0 0 1
 1 1 1 0 0 0 0 1 1 0 1 1 1 0 0 0 1 1 0 0 1 1 0 1 0 1 1 1 1 0 0 1 1 1 0 1 1
 1 0 1 0 1 1 1 0 0 0 0 1 1 1 1 0 1 0 0 1 0 1 1 0 0 1 1 1 1 1 1 0 0 0 1 1 1
 0 0 0 0 0 0 1 1 1 0 0 0 0 1 0 1 0 1 0 1 0 1 1 1 1 0 1 0 1 0 0 0 1 1 0 1 0
 1 0 1 1 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 1 1 1 0 0 1 1 1 1 0 1 0 0 1 1 0 1
 1 0 1 0 1 1 1 0 0 0 0 1 1 0 1 0 1 0 1 1 0 0 1 1 1 1 0 0 1 0 1 1 0 0 1 1 0
 0 0 0 0 1 1 1 0 1 1 0 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 1 1 1 1 0 0 1
 0 1 0 0 0 1 0 1 0 1 1 0 1 0 0 1 1 0 1 0 0 0 0 0 1 1 1 1 1 0 0 0 1 0 1 1 0
 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 1 1 0 0 1 1 0 1 1 0 1 0 0 1 1 0 1 0 0
 1 1 1 1 0 1 0 0 

## Step 9: Checking for Accuracy

In [52]:
v1 = 0  
v2 = 0
c = 0
i=0    
for c in range( np.size( target ) ) :  
  if y_test[c] == target[c] :            
    v1 = v1 + 1
for i in range( np.size( t ) ) :
  if y_train[i] == t[i] :            
    v2 = v2 + 1
  i = i + 1
print("Accuracy of Defined Model on test data :",(v1/c)*100)     
print("Accuracy of Defined Model on train data:",(v2/i)*100)   

Accuracy of Defined Model on test data : 65.78947368421053
Accuracy of Defined Model on train data: 64.97829232995659
