<a href="https://colab.research.google.com/github/ProtikBose/Machine-Learning-Algorithms/blob/master/Logistic%20Regression/irisCode.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [44]:
# Import all the tools we need
from sklearn.preprocessing import LabelEncoder
# Regular EDA (exploratory data analysis) and plotting libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# we want our plots to appear inside the notebook
%matplotlib inline 

# Models from Scikit-Learn
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LinearRegression

# Model Evaluations
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from sklearn.metrics import mean_squared_error

**CSV reading**

In [45]:
df = pd.read_csv("/content/drive/My Drive/Machine Learning Algorithm/Logistic Regression/iris.csv") 
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,Output
0,5.1,3.5,1.4,0.2,setosa,1
1,4.9,3.0,1.4,0.2,setosa,1
2,4.7,3.2,1.3,0.2,setosa,1
3,4.6,3.1,1.5,0.2,setosa,1
4,5.0,3.6,1.4,0.2,setosa,1


In [46]:
df.columns

Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species',
       'Output'],
      dtype='object')

**Preprocessing**

In [47]:
df.dtypes

sepal_length    float64
sepal_width     float64
petal_length    float64
petal_width     float64
species          object
Output            int64
dtype: object

In [48]:
# creating instance of labelencoder
labelencoder = LabelEncoder()

df['species'] = labelencoder.fit_transform(df['species'])
df['species'].values

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [49]:
df['sepal_length'] = df['sepal_length']/np.max(df['sepal_length'])
df['sepal_width'] = df['sepal_width']/np.max(df['sepal_width'])
df['petal_length'] = df['petal_length']/np.max(df['petal_length'])
df['petal_width'] = df['petal_width']/np.max(df['petal_width'])
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,Output
0,0.728571,0.795455,0.27451,0.111111,0,1
1,0.7,0.681818,0.27451,0.111111,0,1
2,0.671429,0.727273,0.254902,0.111111,0,1
3,0.657143,0.704545,0.294118,0.111111,0,1
4,0.714286,0.818182,0.27451,0.111111,0,1


**Train-Test Split**

In [50]:
X = df.drop("Output",axis=1)
Y = df['Output']

# Random seed for reproducibility
np.random.seed(42)

# Split into train & test set
X_train, X_test, y_train, y_test = train_test_split(X, # independent variables 
                                                    Y, # dependent variable
                                                    test_size = 0.2) # percentage of data to use for test set
                                                    

**Model Train**

In [51]:
#Define the required Sigmoid function
def sigmoid(z):
    return 1/(1+np.exp(-z))

In [52]:
def fit_implementation_Raw(X_train,Y_train, learning_rate=0.0005, max_iteration=1000):
  
  
  #Adding a column of 1's so that the first element of each input is always 1
  #It would be multiplied with theta_0 later
  X_train= np.insert(X_train, 0, values=1, axis=1)
  no_attributes = X_train.shape[1]
  
  #Initialize model parameters theta
  theta = np.zeros((no_attributes,1))
  

  #Run number of iterations
  for icount in range(max_iteration):
    #delta is the quantity that will be added with theta during updating theta
    delta = np.zeros((no_attributes,1))
    totalLogLikelihood = 0
    
    #Check each data point
    for instance, actualOutput in zip(X_train,Y_train):
      
      instance=instance.reshape(no_attributes,1)
      dotResult = np.dot(theta.T, instance)
      predictedValue=sigmoid(dotResult).squeeze()
      
      #Calculate the derivative value for this data point
      derivativeValue = instance*(actualOutput-predictedValue)
      
      #Calculate the amount to be added with theta
      delta += learning_rate*derivativeValue

      logLikelihood = actualOutput*np.log(predictedValue)+(1-actualOutput)*np.log(1-predictedValue)
      totalLogLikelihood += logLikelihood
      
    theta = theta + delta
    '''
    #After each 100 iteration, print the status
    if icount%100==0:
      print(icount)
      print(totalLogLikelihood)
      print(theta)
    '''
  #print(theta.shape)
  
  return theta

In [53]:
def fit_implementation_Matrix(X_train, Y_train, learning_rate=0.0005, max_iteration=1000):
  #Adding a column of 1's so that the first element of each input is always 1
  #It would be multiplied with theta_0 later
  X_train= np.insert(X_train, 0, values=1, axis=1)
  #print(X_train.shape)
  no_attributes = X_train.shape[1]
  Y_train = Y_train.reshape(-1,1)
  #print(Y_train.shape)
  
  #Initialize model parameters theta
  theta = np.zeros((no_attributes,1))
  #print(theta.shape)
  
  #Run number of iterations
  for icount in range(max_iteration):
    #delta is the quantity that will be added with theta during updating theta
    delta = np.zeros((no_attributes,1))
    totalLogLikelihood = 0
    
    dotResult = np.dot(X_train,theta)
    #print("Dot Result: ",dotResult.shape)
    predictedValue = sigmoid(dotResult)
    #print("predictedValue: ",predictedValue.shape)
    diff = Y_train - predictedValue
    #print("diff: ",diff.shape)
    derivativeValue = X_train*diff
    #print("derivativeValue: ",derivativeValue.shape)
    delta = learning_rate*derivativeValue
    #print("delta: ",delta.shape)
    delta = np.sum(delta, axis=0).reshape(no_attributes,-1)
    #print("delta Updated: ",delta.shape)
    logLikelihood = Y_train*np.log(predictedValue) + (1-Y_train)*np.log(1-predictedValue)
    #print("logLikelihood: ",logLikelihood.shape)
    totalLogLikelihood = np.sum(logLikelihood)
    theta = theta + delta
    '''
    #After each 100 iteration, print the status
    if icount%100==0:
      print(icount)
      print(totalLogLikelihood)
      print(theta)
    '''
  #print(theta.shape)
  
  return theta


In [54]:
def predictionMatrix(X_test, Y_test, theta):
  #Adding a column of 1's so that the first element of each input is always 1
  #It would be multiplied with theta_0 later
  X_test= np.insert(X_test, 0, values=1, axis=1)
  no_attributes = X_test.shape[1]

  correctCount = 0
  totalCount = 0

  Y_test = Y_test.reshape(-1,1)
  dotResult = np.dot(X_test,theta)
  predictedValue = sigmoid(dotResult)
  #print(list(zip(predictedValue, Y_test)))
  predictedOutput = (predictedValue >= 0.5).astype(int)
  resultMatrix = (Y_test == predictedOutput).astype(int)
  correctCount = np.sum(resultMatrix)
  totalCount = len(resultMatrix)
  print("Total Correct Count: ",correctCount," Total Wrong Count: ",totalCount-correctCount," Accuracy: ",(correctCount*100)/(totalCount))
  return correctCount*100/(totalCount)

In [55]:
def predictionRaw(X_test, Y_test, theta):
  #Adding a column of 1's so that the first element of each input is always 1
  #It would be multiplied with theta_0 later
  X_test= np.insert(X_test, 0, values=1, axis=1)
  no_attributes = X_test.shape[1]

  correctCount = 0
  totalCount = 0

  #Check each data point
  for instance, actualOutput in zip(X_test,Y_test):
    instance=instance.reshape(no_attributes,1)
    dotResult = np.dot(theta.T, instance)
    #Calculated the probability of belonging to class 1
    predictedValue=sigmoid(dotResult).squeeze()
    
    if predictedValue >= 0.5:
        predictedOutput = 1
    else:
        predictedOutput = 0
    #print(predictedValue, actualOutput)
    if predictedOutput == actualOutput:
        correctCount += 1
    totalCount += 1

  print("Total Correct Count: ",correctCount," Total Wrong Count: ",totalCount-correctCount," Accuracy: ",(correctCount*100)/(totalCount))
  return correctCount*100/(totalCount)


In [56]:
parameters=fit_implementation_Raw(np.array(X_train),np.array(y_train),.01,1000)
accuracy=predictionRaw(np.array(X_test), np.array(y_test), parameters)
print(accuracy)

Total Correct Count:  20  Total Wrong Count:  0  Accuracy:  100.0
100.0


In [57]:
parameters=fit_implementation_Matrix(np.array(X_train),np.array(y_train),.01,1000)
accuracy=predictionMatrix(np.array(X_test), np.array(y_test), parameters)
print(accuracy)

Total Correct Count:  20  Total Wrong Count:  0  Accuracy:  100.0
100.0
