# Half Sapce Classifier using Linear Programming

## Step 1: Import All required Libraries

In [39]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

## Step 2: Import Logistic Regression for checking accuracy of defined model with original model

In [2]:
from sklearn.linear_model import LogisticRegression

## Step 3: Defining class and functions for model training (update is used for gradient decent)

In [14]:
class learning() :
    def __init__( self, learningrate, iterations ) :        
        self.learning_rate = learningrate        
        self.iterations = iterations 
    def fit_model( self, X, Y ) :               
        self.m, self.n = X.shape              
        self.W = np.zeros( self.n )        
        self.b = 0        
        self.X = X        
        self.Y = Y
        for i in range( self.iterations ) :            
            self.update()            
        return self
    def update( self ) :           
        A = 1 / ( 1 + np.exp( - ( self.X.dot( self.W ) + self.b ) ) )      
        temp = ( A - self.Y.T )        
        temp = np.reshape( temp, self.m )        
        dW = np.dot( self.X.T, temp ) / self.m         
        db = np.sum( temp ) / self.m   
        self.W = self.W - self.learning_rate * dW    
        self.b = self.b - self.learning_rate * db
        return self
    def predict( self, X ) :    
        Z = 1 / ( 1 + np.exp( - ( X.dot( self.W ) + self.b ) ) )        
        Y = np.where( Z > 0.5, 1, 0 )        
        return Y

## Step 4: Reading CSV File for dataset (For this i have uploded the file on google colab)

In [15]:
df = pd.read_csv('diabetes.csv')
values = df.iloc[0:].values
print(df)
print(values)

     Pregnancies  Glucose  ...  Age  Outcome
0              6      148  ...   50        1
1              1       85  ...   31        0
2              8      183  ...   32        1
3              1       89  ...   21        0
4              0      137  ...   33        1
..           ...      ...  ...  ...      ...
763           10      101  ...   63        0
764            2      122  ...   27        0
765            5      121  ...   30        0
766            1      126  ...   47        1
767            1       93  ...   23        0

[768 rows x 9 columns]
[[  6.    148.     72.    ...   0.627  50.      1.   ]
 [  1.     85.     66.    ...   0.351  31.      0.   ]
 [  8.    183.     64.    ...   0.672  32.      1.   ]
 ...
 [  5.    121.     72.    ...   0.245  30.      0.   ]
 [  1.    126.     60.    ...   0.349  47.      1.   ]
 [  1.     93.     70.    ...   0.315  23.      0.   ]]


## Step 5: Store the features and target in X , Y separately

In [24]:
X = df.iloc[:,:-1].values
Y = df.iloc[:,-1:].values
print(X.shape)
print(Y.shape)

(768, 8)
(768, 1)


## Step 6: 
A. Split the data in 70,30 for training and testing the model 

In [25]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=1)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)


(537, 8) (231, 8) (537, 1) (231, 1)


B. Split the data in 80,20 for training and testing the model 

In [27]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=1)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(614, 8) (154, 8) (614, 1) (154, 1)


C. Split the data in 90,10 for training and testing the model

In [33]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=1)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)


(691, 8) (77, 8) (691, 1) (77, 1)


## Step 7: Creating model object and passing values.

In [34]:
model = learning( learningrate = 0.01, iterations = 1000 )

## Step 8: Fitting model.

In [35]:
model.fit_model( X_train, y_train )

<__main__.learning at 0x7fcd072fb490>

## Step 9: Creating model provided by Sklearn and fitting thet model

In [36]:
model1 = LogisticRegression()    
model1.fit( X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

## Step 10: Predicting the values of both models

In [37]:
target = model.predict( X_test )
target1 = model1.predict( X_test )
print(target,target1)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0] [0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 0 1 0 1 0
 0 0 1 0 0 0 0 0 0 1 1 0 0 0 0 1 0 1 0 1 0 1 0 1 0 1 0 0 0 0 0 1 1 1 1 1 0
 1 0 1]


## Step 11: Checking for Accuracy

In [38]:
v1 = 0  
v2 = 0   
c = 0    
for c in range( np.size( target ) ) :  
  if y_test[c] == target[c] :            
    v1 = v1 + 1
  if y_test[c] == target1[c] :            
    v2 = v2 + 1
  c = c + 1
print("Accuracy of Defined Model :",(v1/c)*100)
print("Accuracy of sklearn Model :",(v2/c)*100)            

Accuracy of Defined Model : 58.44155844155844
Accuracy of sklearn Model : 77.92207792207793
