In [4]:
import numpy as np

In [5]:
class Logistic_Regression():
  #declaring learning rate and number of iterations
  def __init__(self, learning_rate, no_of_iterations):
    self.learning_rate = learning_rate
    self.no_of_iterations = no_of_iterations

  #fit function to train the model with dataset
  def fit(self, X, Y):
    #number of data points in the dataset (number of rows) ->  m
    #number of input features in the dataset (number of columns) -> n
    self.m, self.n = X.shape

    #initiating weight & bias value
    self.w = np.zeros(self.n)
    self.b = 0

    self.X = X
    self.Y = Y


    #implementing Gradient Descent for Optimization
    for i in range(self.no_of_iterations):
      self.update_weights()

  def update_weights(self):
    #Y_hat formula (sigmoid function)
    Y_hat = 1 / (1 + np.exp( - (self.X.dot(self.w) + self.b ) ))

    #derivaties
    dw = (1/self.m)*np.dot(self.X.T, (Y_hat - self.Y))
    db = (1/self.m)*np.sum(Y_hat - self.Y)

    #updating the weights & bias using gradient descent
    self.w = self.w - self.learning_rate * dw
    self.b = self.b - self.learning_rate * db


  #prediction
  def predict(self, X):
    Y_pred = 1 / (1 + np.exp( - (X.dot(self.w) + self.b ) ))
    Y_pred = np.where( Y_pred > 0.5, 1, 0)
    return Y_pred

In [6]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [7]:
pumpkin_dataset = pd.read_csv('/content/Pumpkin_Seeds_Dataset.csv')

In [8]:
pumpkin_dataset.head()

Unnamed: 0,Area,Perimeter,Major_Axis_Length,Minor_Axis_Length,Convex_Area,Equiv_Diameter,Eccentricity,Solidity,Extent,Roundness,Aspect_Ration,Compactness,Class
0,56276,888.242,326.1485,220.2388,56831,267.6805,0.7376,0.9902,0.7453,0.8963,1.4809,0.8207,1
1,76631,1068.146,417.1932,234.2289,77280,312.3614,0.8275,0.9916,0.7151,0.844,1.7811,0.7487,1
2,71623,1082.987,435.8328,211.0457,72663,301.9822,0.8749,0.9857,0.74,0.7674,2.0651,0.6929,1
3,66458,992.051,381.5638,222.5322,67118,290.8899,0.8123,0.9902,0.7396,0.8486,1.7146,0.7624,1
4,66107,998.146,383.8883,220.4545,67117,290.1207,0.8187,0.985,0.6752,0.8338,1.7413,0.7557,1


In [9]:
#number of rows and Columns in this dataset
pumpkin_dataset.shape

(2500, 13)

In [11]:
pumpkin_dataset['Class'].value_counts()

1    1300
0    1200
Name: Class, dtype: int64

In [14]:
#separating the features and target
features = pumpkin_dataset.drop(columns = 'Class', axis=1)
target = pumpkin_dataset['Class']

In [15]:
print(features)

       Area  Perimeter  Major_Axis_Length  Minor_Axis_Length  Convex_Area  \
0     56276    888.242           326.1485           220.2388        56831   
1     76631   1068.146           417.1932           234.2289        77280   
2     71623   1082.987           435.8328           211.0457        72663   
3     66458    992.051           381.5638           222.5322        67118   
4     66107    998.146           383.8883           220.4545        67117   
...     ...        ...                ...                ...          ...   
2495  79637   1224.710           533.1513           190.4367        80381   
2496  69647   1084.318           462.9416           191.8210        70216   
2497  87994   1210.314           507.2200           222.1872        88702   
2498  80011   1182.947           501.9065           204.7531        80902   
2499  84934   1159.933           462.8951           234.5597        85781   

      Equiv_Diameter  Eccentricity  Solidity  Extent  Roundness  \
0       

In [16]:
print(target)

0       1
1       1
2       1
3       1
4       1
       ..
2495    0
2496    0
2497    0
2498    0
2499    0
Name: Class, Length: 2500, dtype: int64


In [17]:
features

Unnamed: 0,Area,Perimeter,Major_Axis_Length,Minor_Axis_Length,Convex_Area,Equiv_Diameter,Eccentricity,Solidity,Extent,Roundness,Aspect_Ration,Compactness
0,56276,888.242,326.1485,220.2388,56831,267.6805,0.7376,0.9902,0.7453,0.8963,1.4809,0.8207
1,76631,1068.146,417.1932,234.2289,77280,312.3614,0.8275,0.9916,0.7151,0.8440,1.7811,0.7487
2,71623,1082.987,435.8328,211.0457,72663,301.9822,0.8749,0.9857,0.7400,0.7674,2.0651,0.6929
3,66458,992.051,381.5638,222.5322,67118,290.8899,0.8123,0.9902,0.7396,0.8486,1.7146,0.7624
4,66107,998.146,383.8883,220.4545,67117,290.1207,0.8187,0.9850,0.6752,0.8338,1.7413,0.7557
...,...,...,...,...,...,...,...,...,...,...,...,...
2495,79637,1224.710,533.1513,190.4367,80381,318.4289,0.9340,0.9907,0.4888,0.6672,2.7996,0.5973
2496,69647,1084.318,462.9416,191.8210,70216,297.7874,0.9101,0.9919,0.6002,0.7444,2.4134,0.6433
2497,87994,1210.314,507.2200,222.1872,88702,334.7199,0.8990,0.9920,0.7643,0.7549,2.2828,0.6599
2498,80011,1182.947,501.9065,204.7531,80902,319.1758,0.9130,0.9890,0.7374,0.7185,2.4513,0.6359


Data Pre Processing

In [18]:
scaler = StandardScaler()

In [19]:
scaler.fit(features)

In [20]:
standardized_data = scaler.transform(features)

In [21]:
print(standardized_data)

[[-1.78470346 -2.21575484 -2.32022415 ...  1.8737626  -1.77506344
   2.19727996]
 [-0.29478016 -0.56880361 -0.70091635 ...  0.93837685 -0.82486401
   0.84023019]
 [-0.66135033 -0.43294002 -0.36939513 ... -0.43161453  0.07405883
  -0.21148339]
 ...
 [ 0.53695644  0.7326892   0.90028724 ... -0.65517709  0.76312751
  -0.83346454]
 [-0.04737457  0.48215494  0.8057821  ... -1.30619127  1.29646729
  -1.28581446]
 [ 0.31297387  0.27147071  0.11193101 ...  0.0316071  -0.21587543
   0.1183551 ]]


In [22]:
features = standardized_data
target = pumpkin_dataset['Class']

In [23]:
print(features)
print(target)

[[-1.78470346 -2.21575484 -2.32022415 ...  1.8737626  -1.77506344
   2.19727996]
 [-0.29478016 -0.56880361 -0.70091635 ...  0.93837685 -0.82486401
   0.84023019]
 [-0.66135033 -0.43294002 -0.36939513 ... -0.43161453  0.07405883
  -0.21148339]
 ...
 [ 0.53695644  0.7326892   0.90028724 ... -0.65517709  0.76312751
  -0.83346454]
 [-0.04737457  0.48215494  0.8057821  ... -1.30619127  1.29646729
  -1.28581446]
 [ 0.31297387  0.27147071  0.11193101 ...  0.0316071  -0.21587543
   0.1183551 ]]
0       1
1       1
2       1
3       1
4       1
       ..
2495    0
2496    0
2497    0
2498    0
2499    0
Name: Class, Length: 2500, dtype: int64


In [24]:
X_train,X_testeval,Y_train,Y_testeval = train_test_split(features,target,test_size=0.5,random_state=2)
X_test,X_eval,Y_test,Y_eval = train_test_split(X_testeval,Y_testeval,test_size=0.4,random_state=2)

In [25]:
print(features.shape, X_train.shape, X_test.shape)

(2500, 12) (1250, 12) (750, 12)


In [28]:
classifier = Logistic_Regression(learning_rate=0.001, no_of_iterations=50000)

In [29]:
classifier.fit(X_train, Y_train)

In [31]:
#Accuracy score on the training data
X_eval_prediction = classifier.predict(X_eval)
evaluation_data_accuracy = accuracy_score( Y_eval, X_eval_prediction)

In [32]:
X_eval_prediction.shape

(500,)

In [33]:
print('Accuracy score of the training data : ', evaluation_data_accuracy)

Accuracy score of the training data :  0.882


In [35]:
# accuracy score on the test data
X_test_prediction = classifier.predict(X_test)
test_data_accuracy = accuracy_score( Y_test, X_test_prediction)

In [36]:
print('Accuracy score of the test data : ', test_data_accuracy)

Accuracy score of the test data :  0.8706666666666667


In [37]:
from sklearn.metrics import accuracy_score,  precision_score, recall_score
precision_score(Y_eval, X_eval_prediction, average="macro")


0.8820760758196722

In [38]:
recall_score(Y_eval, X_eval_prediction, average="macro")


0.8819109951833064