In [245]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

irisDf = pd.read_csv('../datasets/Data_Q2/iris.csv')

irisDf.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [246]:
irisDf['species'].value_counts()

versicolor    50
setosa        50
virginica     50
Name: species, dtype: int64

In [247]:
from sklearn.cross_validation import train_test_split
    
# this will give dataframes as output of splitting 
traindf,testdf = train_test_split(irisDf,test_size=0.33)
traindf['species'].value_counts()

virginica     37
setosa        34
versicolor    29
Name: species, dtype: int64

In [248]:
irisSet = traindf.replace('versicolor',1).replace('virginica',1).replace('setosa',0)
irisVe = traindf.replace('versicolor',0).replace('virginica',1).replace('setosa',1)
irisVi = traindf.replace('versicolor',1).replace('virginica',0).replace('setosa',1)

In [249]:
irisSet.shape

(100, 5)

In [250]:
irisVi['species'].value_counts()

1    63
0    37
Name: species, dtype: int64

In [251]:
irisSet.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 100 entries, 94 to 92
Data columns (total 5 columns):
sepal_length    100 non-null float64
sepal_width     100 non-null float64
petal_length    100 non-null float64
petal_width     100 non-null float64
species         100 non-null int64
dtypes: float64(4), int64(1)
memory usage: 4.7 KB


In [252]:

#slicing data for setosa
features_train_set = irisSet.values[:,:4]    # till 3rd column means exclusive of 4th coln whch is target or label
target_train_set = irisSet.values[:,4]
                                                                        
#slicing data for virginica
features_train_vi = irisVi.values[:,:4]    # till 3rd column means exclusive of 4th coln whch is target or label
target_train_vi = irisVi.values[:,4]
  
#slicing data for versicolor

features_train_ve = irisVe.values[:,:4]    # till 3rd column means exclusive of 4th coln whch is target or label
target_train_ve = irisVe.values[:,4]

#slicing test data which is common for all
features_test = testdf.values[:,:4]
target_test = testdf.values[:,4]
#target_test

In [289]:
from math import exp

# sigmoid or logistic function we are gonna use 
def sigmoid(scores):
    return ( 1.0 / (1.0 + np.exp(-scores)))



# finding log likelihood : we derived this L(w) where w is a vector of parameters we wnt to learn for which prob.
# of classlabel is maximized

# in following weights are parameters we want to learn for which we get the best or optimum answer
# features and labels (target vals) , scores is our hypothese function before applying sigmoid function

def log_likelihood(features, target, weights): 
    scores = np.dot(features, weights)      # scores is (wTranspose x) , target is yi or labels 
    ll = np.sum( target*scores - np.log(1 + np.exp(scores)) )      #look at this eqn we have derived as log likelihood
    return ll

# by taking derivative of loglikelihood eqn or ll we will get a gradient : which we ll use here

def logistic_regression(features, target, num_steps, learning_rate, add_intercept = False):
    if add_intercept:
        intercept = np.ones((features.shape[0], 1))
        features = np.hstack((intercept, features))
        
    weights = np.zeros(features.shape[1]).reshape(-1,1)     # intaliazing parameter values
    target = target.reshape(-1,1)
    m = features.shape[0]
    for step in range(num_steps):
        scores = np.array(np.dot(features, weights),dtype=np.float32 )    # h(x) = wT.x
        predictions = sigmoid(scores)      # our hypotheses function after applying sigmoid  : h(x) = sigmoid(wT.x) 

        sigma = np.diag((predictions * (1 - predictions))[:,0])
        #sigma = np.diag((sigma)[:,0])
        #print(np.shape(sigma))
        temp = np.dot(sigma, features)
        hessian = np.dot(features.T, temp)
        #print(hessian)
        hessian_inv = np.linalg.inv(hessian)    

        # Update weights with gradient
        #print(target)
        #print(predictions)
        output_error_signal = target - predictions      #  target means yi or actual labels , predictions means p(xi) or predicted prob.
        gradient = np.dot(features.T, output_error_signal) / m  # this eqn we have derived : it is a derivative of ll : log likelihood
        #print(np.shape(target))
        weights += (np.dot(hessian_inv, gradient))       # forumula for G.D. Wnew = Wold + n * gradient : here + because we are maximizing
        
        # Print log-likelihood every so often
        #if step % 10000 == 0:
           # print(log_likelihood(features, target, weights))
            
    return weights

In [295]:
import time

#call to the function we have defined

t0 = time.time()

weights_set = logistic_regression(features_train_set, target_train_set ,num_steps = 1000, learning_rate = 5e-5, add_intercept=True)
weights_vi = logistic_regression(features_train_vi, target_train_vi,num_steps = 1000, learning_rate = 5e-5, add_intercept=True)
weights_ve = logistic_regression(features_train_ve, target_train_ve,num_steps = 1000, learning_rate = 5e-5, add_intercept=True)

time = time.time() - t0

# these are the parameter values we have learned for optimum ( maximum ) confidence for given class label : frm training data

  """


In [296]:
# accuracy of logistic regression with gradient descent 

data_with_intercept = np.hstack((np.ones((features_test.shape[0], 1)),features_test))
final_scores_set = np.dot(data_with_intercept, weights_set)
#print(final_scores_set)
#preds_set = (sigmoid(final_scores_set))     # we are rounding final predicted prob. to neareset class label 0 or 1
                                            # this is a sigmoid fun whch maps vals to 0 to 1

# vi

data_with_intercept = np.hstack((np.ones((features_test.shape[0], 1)),features_test))
final_scores_vi = np.dot(data_with_intercept, weights_vi)
#preds_vi = (sigmoid(final_scores_vi))         # we are rounding final predicted prob. to neareset class label 0 or 1

# ve

data_with_intercept = np.hstack((np.ones((features_test.shape[0], 1)),features_test))
final_scores_ve = np.dot(data_with_intercept, weights_ve)
#preds_ve = (sigmoid(final_scores_ve))         # we are rounding final predicted prob. to neareset class label 0 or 1


#print("Accuracy from scratch: {0}".format((preds == target).sum().astype(float) / len(preds)))

In [297]:
#final_scores_set

In [298]:
len(final_scores_set)

50

In [299]:
final_pred = list()

for i in range(0,len(final_scores_set)):
    m = min(final_scores_set[i],final_scores_vi[i],final_scores_ve[i])
    if(m == final_scores_set[i]):
        final_pred.append("setosa")
    elif(m == final_scores_vi[i]):
        final_pred.append("virginica")
    else:
        final_pred.append("versicolor")

#print(final_pred)
#print(target_test)
#finding accuracy

count =0        

for i in range(0,len(final_scores_set)):
    if(final_pred[i] == target_test[i]):
        count += 1

print("accruacy is : ",100 * (count/len(final_scores_set)))
print("training time is : ", round( time, 3), "seconds")

accruacy is :  96.0
training time is :  0.256 seconds
