#  Implementation the logistic discrimination algorithm for multiple classes 

# Data Processing

## Iris Dataset 

Popular classification dataset with 3 labels/classes which refer to three species of Iris (Iris setosa, Iris virginica and Iris versicolor). There are four features were measured from each sample.
https://archive.ics.uci.edu/ml/datasets/iris

In [1]:
import numpy as np
np.random.seed(42)
import pandas as pd
data= pd.read_csv("iris.data",names=["SepalLengthCm","SepalWidthCm", "PetalLengthCm","PetalWidthCm","Species"])
data.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [2]:
data['Class']=data['Species']
data['Class'] = data['Class'].map({'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2})
print("Total classes: ", data['Class'].unique())
#data.shape

Total classes:  [0 1 2]


In [3]:
M = []
X = []
import numpy as np


for i in range(0, len(data)):
    if(data['Class'][i] == 0): M.append([1,0,0])
    elif(data['Class'][i] == 1): M.append([0,1,0])
    elif(data['Class'][i] == 2): M.append([0,0,1])
    
    
    X.append([data['SepalLengthCm'][i], data['SepalWidthCm'][i], data['PetalLengthCm'][i], data['PetalWidthCm'][i]])

y_train = np.asarray(M)    

N = len(X)
#print(N)
K = len(data['Class'].unique())  #number of classes
#print(K)
D = len(X[0]) #dimension or number of independent variables

x= np.asarray(X)
#print(x)
np.random.shuffle(x)
N=x.shape[0]
print(N)

150


# Trainand test splitting for performance measure

In [4]:
#Data splitting for training and testing set
training_idx = np.random.randint(x.shape[0], size=120)
test_idx = np.random.randint(x.shape[0], size=30)
X_train, X_test= x[training_idx,:], x[test_idx,:]
print("Shape of X_train: ", X_train.shape)
print("Shape of X_test: ", X_test.shape)


Shape of X_train:  (120, 4)
Shape of X_test:  (30, 4)


# Implementation of the logistic discrimination algorithm for multiple classes 

In [5]:
import math

def log_regression_multi_class(x, r, d, step_size,accuracy_stop,iterations):    
    x0 = np.repeat(1, len(x))
    new_x = np.c_[x0, x]
    
    #initialize w with zeros
    w = np.zeros( shape = (K, d + 1) )
    #print("first", w)
    
    # book mentions (1 .. K) 1 to K inclusive K. exactly K numbers
    for i in range (0, K): # will work as 0 .... K-1 = K numbers         
        #book mentions: 0 ... d => book uses inclusive 0 to d i.e total d + 1 
        for j in range (0, d + 1): # will work as 0 ..... d = d + 1 counts
            w[i, j] = np.random.uniform (-0.01, 0.01)

    # run iterations time (i.e. 0 .... (iterations - 1) )
    for test in range(0, iterations):
        
        # accuracy score for current iterations
        accuracy_score = 0
        
        # initialize delta_w i.e derivative_w i.e deriv_w
        deriv_w = np.zeros( shape = (K, d + 1 ) )
        
        # iterate for all rows of x where each row represent x0, x1, .... xd i.e. d + 1 elements in a single row
        
        # book: 1 .... N i.e. total N count
        N = len(new_x)
        for t in range(0, N ): # i.e. 0 .... N - 1
            
            # calculate o. o will help to calculate sigmoid/softmax i.e. to calculate y
            o = []
            for i in range(0, K):
                o.append(0)
                for j in range(0, d + 1):
                    o[i] += w[i, j] * new_x[t, j]
            
            # o2 also helps in calculating sigmoid/softmax i.e. to calculate y
            # see the equation y = exp(Oi)/ ( sum of  exp(Ok) for all K 1....k )
            # o2 helps to calculate the denominator
            y = []            
            o2 = 0
            for i in range(0, K):
                o2 += np.exp(o[i])
            
            # calculate y 
            for i in range(0, K):
                y.append(0)                
                y[i] = np.exp( o[i] )/o2
                #print(y)
              
              # check accuracy score
            if r[t , i].argmax() == y.index (max(y)):
                accuracy_score += 1
                          
            # calculate deriv w
            for i in range (0, K):
                for j in range(0, d + 1 ): # book mentions: 0 to d i.e. d + 1 count
                    deriv_w[i, j] += deriv_w[i, j] + (r[t, i] - y[i] ) * new_x[t, j]            
            
            # the book shows w updates after each iteration. 
            # however, if i print w at that point, w might have nans    i.e. overfit (train accuracy >= 100%)
            # hence, even during the iteration checking for accuracy
            # it might need further thoughts if this can be here
            if  (  accuracy_score/ len(x) >= accuracy_stop ):
                print ('Accuracy', accuracy_score * 100 / len(x) )
                #print(w)
                return (w)
            

        # after one pass of all samples calculate/update w
        for i in range (0, K):
            for j in range(0, d + 1):
                w[i, j] = w[i, j] + step_size * deriv_w[i, j]
                
        #print('\nw at iteration', test)
        #print(w)
        
        # exit from the method if we have reached the accuracy target
        if  (  accuracy_score/ len(x) >= accuracy_stop ):
            print ('\nTraining accuracy', accuracy_score * 100 / len(x) )
            print(w)
            return (w)
    
    return w

In [6]:
w= log_regression_multi_class(X_train,y_train, D, 0.5,0.9,100)
print ('\n Logistic Discrimination Weights Multi_class ')
w

Accuracy 90.0

 Logistic Discrimination Weights Multi_class 




array([[ 4.47113426e+35,  2.44981317e+36,  1.27900326e+36,
         1.67261539e+36,  6.29151120e+35],
       [-2.32175096e+35, -1.27160538e+36, -6.64496694e+35,
        -8.65711078e+35, -3.25314290e+35],
       [-2.14938330e+35, -1.17820780e+36, -6.14506566e+35,
        -8.06904313e+35, -3.03836830e+35]])

# Comparison of sklearn libraries logistic regression function 

## Importing sklearn libraries for logistic regression function

In [7]:
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
import numpy as np

# Trainand test splitting for performance measure


In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y_train, test_size = 0.2, random_state =4)

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
#print(y_train)

#LogisticRegression in sklearn doesn't support multi-label directly
Y_train = np.argmax(y_train, axis=1)
Y_test = np.argmax(y_test, axis=1)
print('Train Dataset: ', X_train.shape)
print('Test Dataset: ', X_test.shape)

Train Dataset:  (120, 4)
Test Dataset:  (30, 4)


# Fitting Multiclass Logistic Classification to the Training set From sklearn libraries

In [9]:
# Fitting Multiclass Logistic Classification to the Training set
from sklearn.linear_model import LogisticRegression
logisticregression = LogisticRegression()
logisticregression.fit(X_train, Y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [10]:
# Predicting the Test set results
y_pred = logisticregression.predict(X_test)
print(y_pred)

[2 0 2 2 2 1 2 0 0 1 0 0 0 1 2 0 1 0 0 2 0 2 1 0 0 0 0 0 0 2]


In [11]:
#accuracy_score(Y_test, pred) # this gives accuracy
print('Accuracy: {}'.format(round(accuracy_score(np.array(Y_test), np.array(y_pred)) * 100, 2)))

Accuracy: 93.33


In [12]:
#lets see the actual and predicted value side by side
y_compare = np.vstack((Y_test,y_pred)).T
#actual value on the left side and predicted value on the right hand side
#printing the top 5 values
#y_compare[:5,:]
#print(y_compare)