**Importing the required libraries**

In [None]:
import numpy as np
import pandas as pd

import math
from sklearn.model_selection import train_test_split

**Linking google drive to google colab**

We upload the Iris.csv dataset file to google colab

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


**Load data from Iris dataset**

In [None]:
iris = pd.read_csv("Iris.csv")
iris

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...,...
145,146,6.7,3.0,5.2,2.3,Iris-virginica
146,147,6.3,2.5,5.0,1.9,Iris-virginica
147,148,6.5,3.0,5.2,2.0,Iris-virginica
148,149,6.2,3.4,5.4,2.3,Iris-virginica


**Rearranging the data**

We get rid of the Id column, further we divide the data into the classes

In [None]:
iris = iris.drop(['Id'],axis=1)
iris.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


**Seperating training data**

We seperate the training and test data using a 80:20 ratio

In [None]:
train = iris.iloc[0:40, :]

temp = iris.iloc[50:90, :]
train = pd.concat([train, temp])

temp = iris.iloc[100:140, :]
train = pd.concat([train, temp])

In [None]:
train.reset_index(inplace = True)
train = train.drop(['index'],axis=1)
train

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
115,7.7,3.0,6.1,2.3,Iris-virginica
116,6.3,3.4,5.6,2.4,Iris-virginica
117,6.4,3.1,5.5,1.8,Iris-virginica
118,6.0,3.0,4.8,1.8,Iris-virginica


**Seperating test data**

The rest data after removing training data is test data

In [None]:
test = iris.iloc[40:50, :]

temp1 = iris.iloc[90:100, :]
test = pd.concat([test, temp1])

temp1 = iris.iloc[140:150, :]
test = pd.concat([test, temp1])

In [None]:
test.reset_index(inplace = True)
test = test.drop(['index'],axis=1)
test.head(10)

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.0,3.5,1.3,0.3,Iris-setosa
1,4.5,2.3,1.3,0.3,Iris-setosa
2,4.4,3.2,1.3,0.2,Iris-setosa
3,5.0,3.5,1.6,0.6,Iris-setosa
4,5.1,3.8,1.9,0.4,Iris-setosa
5,4.8,3.0,1.4,0.3,Iris-setosa
6,5.1,3.8,1.6,0.2,Iris-setosa
7,4.6,3.2,1.4,0.2,Iris-setosa
8,5.3,3.7,1.5,0.2,Iris-setosa
9,5.0,3.3,1.4,0.2,Iris-setosa


**Trainging with the given data**

Now we use the griven train data and find the x and y parameters

In [None]:
x = train[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']]
n = x.shape[1]
m = x.shape[0]

In [None]:
y = train['Species']
k = len(y.unique())
y =y.map({'Iris-setosa':0,'Iris-versicolor':1,'Iris-virginica':2})
y.value_counts()

0    40
1    40
2    40
Name: Species, dtype: int64

**Calculations for softmax**

In [None]:
x[5] = np.ones(x.shape[0])
x.shape

(120, 5)

In [None]:
theta = np.empty((k,n+1))

**Function definition for softmax**

In [None]:
def phi(i,theta,x):
    mat_theta = np.matrix(theta[i])
    mat_x = np.matrix(x)
    num = math.exp(np.dot(mat_theta,mat_x.T))
    den = 0
    for j in range(0,k):
        mat_theta_j = np.matrix(theta[j])
        den = den + math.exp(np.dot(mat_theta_j,mat_x.T))
    phi_i = num/den
    return phi_i

In [None]:
def indicator(a,b):
    if a == b: return 1
    else: return 0

In [None]:
def get__der_grad(j,theta):
    sum = np.array([0 for i in range(0,n+1)])
    for i in range(0,m):
        p = indicator(y[i],j) - phi(j,theta,x.loc[i])
        sum = sum + (x.loc[i] *p)
    grad = -sum/m
    return grad

We set the learning rate as 1/(10^5) and iters but 100

In [None]:
def gradient_descent(theta,alpha= 1/(10^5),iters=100):
    for j in range(0,k):
        for iter in range(iters):
            theta[j] = theta[j] - alpha * get__der_grad(j,theta)
    return theta

**Apply gradient descent**

In [None]:
theta_dash = gradient_descent(theta)

In [None]:
theta_dash

array([[ 0.30736854,  0.84391433, -1.25261847, -0.57674578,  0.16999306],
       [ 0.19966804, -0.01691111, -0.22403197, -0.23900584,  0.09517276],
       [-0.34463017, -0.30603885,  0.52547442,  0.36404748, -0.15203357]])

In [None]:
def h_theta(x):
    x = np.matrix(x)
    h_matrix = np.empty((k,1))
    den = 0
    for j in range(0,k):
        den = den + math.exp(np.dot(theta_dash[j], x.T))
    for i in range(0,k):
        h_matrix[i] = math.exp(np.dot(theta_dash[i],x.T))
    h_matrix = h_matrix/den
    return h_matrix

**Test the test dataset**

We use the test data set to test the given hypothesis

In [None]:
x_u = test[['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']]
n = x_u.shape[1]
m = x_u.shape[0]

In [None]:
y_true = test['Species']
k = len(y_true.unique())
y_true =y_true.map({'Iris-setosa':0,'Iris-versicolor':1,'Iris-virginica':2})
y_true.value_counts()

0    10
1    10
2    10
Name: Species, dtype: int64

In [None]:
x_u[5] = np.ones(x_u.shape[0])
x_u.shape

(30, 5)

**Finding out a predicition using the hypothesis**

In [None]:
for index,row in x_u.iterrows():
    h_matrix = h_theta(row)
    prediction = int(np.where(h_matrix == h_matrix.max())[0])
    x_u.loc[index,'prediction'] = prediction

In [None]:
results = x_u
results['actual'] = y_true

**FInal result table**

In [None]:
results

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,5,prediction,actual
0,5.0,3.5,1.3,0.3,1.0,0.0,0
1,4.5,2.3,1.3,0.3,1.0,0.0,0
2,4.4,3.2,1.3,0.2,1.0,0.0,0
3,5.0,3.5,1.6,0.6,1.0,0.0,0
4,5.1,3.8,1.9,0.4,1.0,0.0,0
5,4.8,3.0,1.4,0.3,1.0,0.0,0
6,5.1,3.8,1.6,0.2,1.0,0.0,0
7,4.6,3.2,1.4,0.2,1.0,0.0,0
8,5.3,3.7,1.5,0.2,1.0,0.0,0
9,5.0,3.3,1.4,0.2,1.0,0.0,0


**We compare the actual values to the predicted values**

We use this comparison to find the accuracy of our model

In [None]:
compare = results['prediction'] == results['actual']
correct = compare.value_counts()[1]
accuracy = correct/len(results)

Our accuracy indicates that we only have 1 data value which is not the same as the predicted one

In [None]:
accuracy * 100

96.66666666666667