In [5]:
# Iris flower classification using Logistic Regression using python

In [2]:
# Loading libraries 
import numpy as np
import pandas as pd
import warnings 
warnings.filterwarnings('ignore')
from math import ceil
import matplotlib.pyplot as plt
import seaborn as sb
from sklearn.metrics import confusion_matrix #Confusion matrix
from sklearn.metrics import accuracy_score  
from sklearn.cross_validation import train_test_split
from pandas.tools.plotting import parallel_coordinates
from scipy import optimize as op



In [3]:
# Load Dataset 
iris=pd.read_csv('Iris.csv')

In [4]:
iris.drop('Id', axis=1, inplace=True)

In [5]:
iris.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [41]:
# Data formation 

# Variables 
Species = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']
n=4  #features 
m=iris.shape[0] # number of entries 
k=3 # number of classes 
X=np.ones((m,n+1))
y=np.array((m,1))

# Inputs 
X[:,1] = iris['PetalLengthCm'].values
X[:,2] = iris['PetalWidthCm'].values
X[:,3] = iris['SepalLengthCm'].values
X[:,4] = iris['SepalWidthCm'].values
    
# Labels 
y=iris['Species'].reshape((m,1))

In [37]:
# Mean normalization 
for j in range(n):
    X[:, j] = (X[:, j] - X[:,j].mean())

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 11)


In [38]:
X.shape, y.shape

((150, 5), (150, 1))

In [52]:
# Logistic regression 

# sigmoid activation function 
def sigmoid(z):
    return 1.0/(1+np.exp(-z))

# Regularized cost function 
def regCostFunc(theta, X, y, _lambda=0.001):
    m=len(y)
    h=sigmoid(X.dot(theta))
    reg=(_lambda/(2*m))*(np.sum(theta**2))
    
    return (1/m) * (-y.T.dot(np.log(h))-(1-y).T.dot(np.log(1-h))) + reg

# Regularized Gradient Descent 
def regGradientDescent(theta, X, y, _lambda=0.001):
    m, n=X.shape
    theta=theta.reshape((n,1))
    y = y.reshape((m, 1))
    h = sigmoid(X.dot(theta))
    reg = _lambda*theta /m

    return ((1 / m) * X.T.dot(h - y)) + reg

#Optimal theta 
def logisticRegression(X, y, theta):
    result = op.minimize(fun = regCostFunc, x0 = theta, args = (X, y),
                         method = 'TNC', jac = regGradientDescent)
    
    return result.x

In [53]:
# Training time 
all_theta = np.zeros((k, n + 1))

#One vs all
i = 0
for flower in Species:
    #set the labels in 0 and 1
    tmp_y = np.array(y_train == flower, dtype = int)
    optTheta = logisticRegression(X_train, tmp_y, np.zeros((n + 1,1)))
    all_theta[i] = optTheta
    i += 1

In [54]:
#Predictions
P = sigmoid(X_test.dot(all_theta.T)) #probability for each flower
p = [Species[np.argmax(P[i, :])] for i in range(X_test.shape[0])]

print("Test Accuracy ", accuracy_score(y_test, p) * 100 , '%')

Test Accuracy  96.6666666667 %
