In [14]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split

In [15]:
irisDf = pd.read_csv('../datasets/Data_Q2/iris.csv')

irisDf.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [16]:
irisDf['species'].value_counts()

versicolor    50
setosa        50
virginica     50
Name: species, dtype: int64

In [17]:
irisDf2 = irisDf[irisDf.species != 'setosa']        # for binary classfication we made a new df by removing setosa class

In [18]:
irisDf2.shape

(100, 5)

In [19]:
irisDf2['species'].value_counts()

versicolor    50
virginica     50
Name: species, dtype: int64

In [20]:
irisDf2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 100 entries, 50 to 149
Data columns (total 5 columns):
sepal_length    100 non-null float64
sepal_width     100 non-null float64
petal_length    100 non-null float64
petal_width     100 non-null float64
species         100 non-null object
dtypes: float64(4), object(1)
memory usage: 4.7+ KB


In [21]:
irisDf3 = irisDf2.replace('versicolor',0)        # replacing class 1 with value 0
irisDf4 = irisDf3.replace('virginica',1)         # replacing class 2 with value 1
#irisDf4.head()
irisDf4['species'].value_counts()

1    50
0    50
Name: species, dtype: int64

In [22]:

features = irisDf4.values[:,:4]    # till 3rd column means exclusive of 4th coln whch is target or label
target = irisDf4.values[:,4]
  
#slicing data
features_train, features_test, target_train, target_test = train_test_split(features,
                                                                            target, test_size = 0.33, random_state = 10)

In [23]:
from math import exp

# sigmoid or logistic function we are gonna use 
def sigmoid(scores):
    return ( 1.0 / (1.0 + np.exp(-scores)))



# finding log likelihood : we derived this L(w) where w is a vector of parameters we wnt to learn for which prob.
# of classlabel is maximized

# in following weights are parameters we want to learn for which we get the best or optimum answer
# features and labels (target vals) , scores is our hypothese function before applying sigmoid function

def log_likelihood(features, target, weights): 
    scores = np.dot(features, weights)      # scores is (wTranspose x) , target is yi or labels 
    ll = np.sum( target*scores - np.log(1 + np.exp(scores)) )      #look at this eqn we have derived as log likelihood
    return ll

# by taking derivative of loglikelihood eqn or ll we will get a gradient : which we ll use here

def logistic_regression(features, target, num_steps, learning_rate, add_intercept = False):
    if add_intercept:
        intercept = np.ones((features.shape[0], 1))
        features = np.hstack((intercept, features))
        
    weights = np.zeros(features.shape[1])     # intaliazing parameter values
    
    for step in range(num_steps):
        scores = np.array(np.dot(features, weights),dtype=np.float32 )    # h(x) = wT.x
        predictions = sigmoid(scores)      # our hypotheses function after applying sigmoid  : h(x) = sigmoid(wT.x) 

        # Update weights with gradient
        #print(target)
        #print(predictions)
        output_error_signal = target - predictions      #  target means yi or actual labels , predictions means p(xi) or predicted prob.
        gradient = np.dot(features.T, output_error_signal)    # this eqn we have derived : it is a derivative of ll : log likelihood
        
        weights += learning_rate * gradient       # forumula for G.D. Wnew = Wold + n * gradient : here + because we are maximizing
        
        # Print log-likelihood every so often
        #if step % 10000 == 0:
           # print(log_likelihood(features, target, weights))
            
    return weights

In [24]:
#call to the function we have defined
weights = logistic_regression(features, target,num_steps = 300000, learning_rate = 5e-5, add_intercept=True)

# these are the parameter values we have learned for optimum ( maximum ) confidence for given class label : frm training data

In [25]:
# accuracy of logistic regression with gradient descent 

data_with_intercept = np.hstack((np.ones((features.shape[0], 1)),features))
final_scores = np.dot(data_with_intercept, weights)
preds = np.round(sigmoid(final_scores))         # we are rounding final predicted prob. to neareset class label 0 or 1

print("Accuracy from scratch: {0}".format((preds == target).sum().astype(float) / len(preds)))

Accuracy from scratch: 0.97
