In [1]:
import numpy as np
import pandas as pd
import math

In [2]:
np.random.seed(42)

In [3]:
df = pd.read_csv('iris.csv')

In [4]:
df.head(5)

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [5]:
df.shape

(150, 6)

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             150 non-null    int64  
 1   SepalLengthCm  150 non-null    float64
 2   SepalWidthCm   150 non-null    float64
 3   PetalLengthCm  150 non-null    float64
 4   PetalWidthCm   150 non-null    float64
 5   Species        150 non-null    object 
dtypes: float64(4), int64(1), object(1)
memory usage: 7.2+ KB


In [7]:
df = df.drop(['Id'], axis=1)
df.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [8]:
pd.unique(df[['Species']].values.ravel())

array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)

In [9]:
target_encoded_values = {"Species": {"Iris-setosa": 0, "Iris-versicolor": 1, "Iris-virginica": 2}}
df = df.replace(target_encoded_values)
# df.head()
df.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [10]:
df.describe()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
count,150.0,150.0,150.0,150.0,150.0
mean,5.843333,3.054,3.758667,1.198667,1.0
std,0.828066,0.433594,1.76442,0.763161,0.819232
min,4.3,2.0,1.0,0.1,0.0
25%,5.1,2.8,1.6,0.3,0.0
50%,5.8,3.0,4.35,1.3,1.0
75%,6.4,3.3,5.1,1.8,2.0
max,7.9,4.4,6.9,2.5,2.0


## Splitting data into training and testing part

In [11]:
df.head(10)

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
5,5.4,3.9,1.7,0.4,0
6,4.6,3.4,1.4,0.3,0
7,5.0,3.4,1.5,0.2,0
8,4.4,2.9,1.4,0.2,0
9,4.9,3.1,1.5,0.1,0


In [12]:
unorganized = df.sample(frac=1, random_state=1)
unorganized.shape

(150, 5)

In [13]:
unorganized.head(10)

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
14,5.8,4.0,1.2,0.2,0
98,5.1,2.5,3.0,1.1,1
75,6.6,3.0,4.4,1.4,1
16,5.4,3.9,1.3,0.4,0
131,7.9,3.8,6.4,2.0,2
56,6.3,3.3,4.7,1.6,1
141,6.9,3.1,5.1,2.3,2
44,5.1,3.8,1.9,0.4,0
29,4.7,3.2,1.6,0.2,0
120,6.9,3.2,5.7,2.3,2


In [14]:
train = unorganized.iloc[:int(0.7*len(unorganized)), :]
train.head(10)

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
14,5.8,4.0,1.2,0.2,0
98,5.1,2.5,3.0,1.1,1
75,6.6,3.0,4.4,1.4,1
16,5.4,3.9,1.3,0.4,0
131,7.9,3.8,6.4,2.0,2
56,6.3,3.3,4.7,1.6,1
141,6.9,3.1,5.1,2.3,2
44,5.1,3.8,1.9,0.4,0
29,4.7,3.2,1.6,0.2,0
120,6.9,3.2,5.7,2.3,2


In [15]:
print(train.shape)

(105, 5)


In [16]:
test = unorganized.iloc[int(0.7*len(unorganized)): , :]
test.head(10)

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
26,5.0,3.4,1.6,0.4,0
76,6.8,2.8,4.8,1.4,1
43,5.0,3.5,1.6,0.6,0
24,4.8,3.4,1.9,0.2,0
136,6.3,3.4,5.6,2.4,2
121,5.6,2.8,4.9,2.0,2
143,6.8,3.2,5.9,2.3,2
49,5.0,3.3,1.4,0.2,0
21,5.1,3.7,1.5,0.4,0
70,5.9,3.2,4.8,1.8,1


In [17]:
print(test.shape)

(45, 5)


## Splitting the dataset into feature and target variables, for both training and testing part.

In [18]:
#for feature variables in training and testing dataset
X_train = train.iloc[:, :4]
X_test = test.iloc[:, :4]

#for target variables in training and testing dataset
y_train = train.iloc[:, 4:]
y_test = test.iloc[:, 4:]

In [19]:
X_train.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
14,5.8,4.0,1.2,0.2
98,5.1,2.5,3.0,1.1
75,6.6,3.0,4.4,1.4
16,5.4,3.9,1.3,0.4
131,7.9,3.8,6.4,2.0


Looking at the X, we see that there are 4 main features in it. Now we create a First layer with weight matrix which includes 4 feautures and 3 nodes 

In [20]:
def initialize_weights_and_bias(mode):
    if mode == 'random':
        W1 = np.random.random((3, 4)).astype(np.float32)
        b1 = np.random.random((1, 3)).astype(np.float32)
        W2 = np.random.random((3, 3)).astype(np.float32)
        b2 = np.random.random((1, 3)).astype(np.float32)
    elif mode == 'xavier':
        W1 = np.random.normal(loc=0.0, scale=(2/(4+3)), size=(3, 4)).astype(np.float32)
        b1 = np.random.random((1, 3)).astype(np.float32)
        W2 = np.random.normal(loc=0.0, scale=(2/(3+3)), size=(3, 3)).astype(np.float32)
        b2 = np.random.random((1, 3)).astype(np.float32)
    elif mode == 'constant':
        W1 = np.ones((3, 4))
        b1 = np.ones((1, 3))
        W2 = np.ones((3, 3))
        b2 = np.ones((1, 3))
    return W1, b1, W2, b2

In [21]:
#Activation Functions
def sigmoid(x):
    return 1. / (1. + np.exp(-x))

def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=1)

def tanh(x):
    return math.tanh(x)



In [22]:
def forward_pass(x, W1, b1, W2, b2):
    
    #First layer
    x = np.array(x, dtype=np.float32)
    res1 = np.matmul(x, W1.T) + b1
    layer1_out = sigmoid(res1)
    # print(layer1_out)
    
    #Second layer
    res2 = np.matmul(layer1_out, W2.T) + b2
    layer2_out = softmax(res2)
    #print(layer2_out)
    return layer2_out.flatten(), layer1_out.flatten()

In [23]:
def backward_pass(x, W1, b1, W2, b2, y_hat, y, z1, lr = 0.01):
    
    x = np.array(x, dtype= np.float32).reshape(1,4)
    y = np.array(y, dtype = np.float32).reshape(1,3)
    y_hat = np.array(y_hat, dtype=np.float32).reshape(1,3)
    z1 = np.array(z1, dtype=np.float32).reshape(1,3)
    
    res = y_hat - y                    
    dw2 = np.matmul(res.T, z1)                                                # 3x1 * 1x3 = 3x3
    db2 = res                                                                 
    grad_w1 = np.matmul(res, W2)          
    grad_w1 = np.matmul(grad_w1, z1.T)                                        # 1x3 * 3x1 = 1x1
    grad_w2 = 1 - z1                                                          # 1x3
    db1 = np.matmul(grad_w1, grad_w2)                                  
    dw1 = np.matmul(db1.T, x)                             
    
    up_W2 = W2 - lr*dw2
    up_b2 = b2 - lr*db2
    up_b1 = b1 - lr*db1
    up_W1 = W1 - lr*dw1
    
    return up_W2, up_b2, up_W1, up_b1
    

In [None]:
# print("Predicted Class | Actual Class")
import matplotlib.pyplot as plt


W1, b1, W2, b2 = initialize_weights_and_bias('xavier')
cost = 0
losses = []
epochs = 1000
for j in range(epochs):
    losss = 0
    tp = 0
    for i in range(len(X_train)):
        
        x = np.array(X_train.iloc[i:i+1, :], dtype=np.float32).reshape(1,4)
        target = np.array(y_train)
        y = []
        if target[i] == 0:
            y = [1, 0, 0]
        elif target[i] == 1:
            y = [0, 1, 0]
        elif target[i] == 2:
            y = [0, 0, 1]


        #network forward pass
        y_hat, z1 = forward_pass(x, W1, b1, W2, b2)
        
        
        #we use categorical cross entropy loss
        loss = 0
        for i in range(len(y)):
            if y[i] != 0:
                loss +=  -y[i] * np.log(y_hat[i])
        losss += loss
        
        #the true positives for this case, to calculate accuracy
        if np.argmax(y_hat) == np.argmax(y):
            tp += 1
        
        #network backward pass
        W2, b2, W1, b1 = backward_pass(x, W1, b1, W2, b2, y_hat, y, z1)
    
    #we append total loss of each epoch to the losses array
    losses.append(losss)
    print(f"Epoch = {j}, Accuracy = {tp/len(X_train)}, loss= {losss}")



Epoch = 0, Accuracy = 0.3142857142857143, loss= 118.5898574590683
Epoch = 1, Accuracy = 0.3333333333333333, loss= 115.93233048915863
Epoch = 2, Accuracy = 0.3333333333333333, loss= 115.23180335760117
Epoch = 3, Accuracy = 0.34285714285714286, loss= 114.23383688926697
Epoch = 4, Accuracy = 0.3523809523809524, loss= 112.44661331176758
Epoch = 5, Accuracy = 0.3619047619047619, loss= 110.35484504699707
Epoch = 6, Accuracy = 0.38095238095238093, loss= 107.94732767343521
Epoch = 7, Accuracy = 0.3904761904761905, loss= 105.29424905776978
Epoch = 8, Accuracy = 0.41904761904761906, loss= 102.46417725086212
Epoch = 9, Accuracy = 0.49523809523809526, loss= 99.52309602499008
Epoch = 10, Accuracy = 0.6857142857142857, loss= 96.54364615678787
Epoch = 11, Accuracy = 0.7047619047619048, loss= 93.59491622447968
Epoch = 12, Accuracy = 0.7047619047619048, loss= 90.7312935590744
Epoch = 13, Accuracy = 0.7142857142857143, loss= 87.98875683546066
Epoch = 14, Accuracy = 0.7142857142857143, loss= 85.386918365

Epoch = 130, Accuracy = 0.9523809523809523, loss= 29.389614168554544
Epoch = 131, Accuracy = 0.9523809523809523, loss= 29.266047444194555
Epoch = 132, Accuracy = 0.9523809523809523, loss= 29.143690574914217
Epoch = 133, Accuracy = 0.9523809523809523, loss= 29.022495318204165
Epoch = 134, Accuracy = 0.9523809523809523, loss= 28.902480624616146
Epoch = 135, Accuracy = 0.9523809523809523, loss= 28.783622350543737
Epoch = 136, Accuracy = 0.9523809523809523, loss= 28.6658826880157
Epoch = 137, Accuracy = 0.9523809523809523, loss= 28.549230620265007
Epoch = 138, Accuracy = 0.9619047619047619, loss= 28.43367349728942
Epoch = 139, Accuracy = 0.9619047619047619, loss= 28.31918651610613
Epoch = 140, Accuracy = 0.9619047619047619, loss= 28.20575973391533
Epoch = 141, Accuracy = 0.9619047619047619, loss= 28.0933429710567
Epoch = 142, Accuracy = 0.9619047619047619, loss= 27.981952007859945
Epoch = 143, Accuracy = 0.9619047619047619, loss= 27.87158005312085
Epoch = 144, Accuracy = 0.9619047619047619

Epoch = 252, Accuracy = 0.9809523809523809, loss= 19.727136639878154
Epoch = 253, Accuracy = 0.9809523809523809, loss= 19.67630261555314
Epoch = 254, Accuracy = 0.9809523809523809, loss= 19.62577943317592
Epoch = 255, Accuracy = 0.9809523809523809, loss= 19.575540190562606
Epoch = 256, Accuracy = 0.9809523809523809, loss= 19.52560668066144
Epoch = 257, Accuracy = 0.9809523809523809, loss= 19.475951014086604
Epoch = 258, Accuracy = 0.9904761904761905, loss= 19.426592035219073
Epoch = 259, Accuracy = 0.9904761904761905, loss= 19.37752212025225
Epoch = 260, Accuracy = 0.9904761904761905, loss= 19.328734254464507
Epoch = 261, Accuracy = 0.9904761904761905, loss= 19.28022459335625
Epoch = 262, Accuracy = 0.9904761904761905, loss= 19.231993230991066
Epoch = 263, Accuracy = 0.9904761904761905, loss= 19.184034990146756
Epoch = 264, Accuracy = 0.9904761904761905, loss= 19.13636760879308
Epoch = 265, Accuracy = 0.9904761904761905, loss= 19.088959357701242
Epoch = 266, Accuracy = 0.99047619047619

Epoch = 372, Accuracy = 0.9904761904761905, loss= 15.23523045796901
Epoch = 373, Accuracy = 0.9904761904761905, loss= 15.207997266203165
Epoch = 374, Accuracy = 0.9904761904761905, loss= 15.180874140933156
Epoch = 375, Accuracy = 0.9904761904761905, loss= 15.153900016099215
Epoch = 376, Accuracy = 0.9904761904761905, loss= 15.127031196840107
Epoch = 377, Accuracy = 0.9904761904761905, loss= 15.100287114270031
Epoch = 378, Accuracy = 0.9904761904761905, loss= 15.073644513264298
Epoch = 379, Accuracy = 0.9904761904761905, loss= 15.047143219970167
Epoch = 380, Accuracy = 0.9904761904761905, loss= 15.020772717893124
Epoch = 381, Accuracy = 0.9904761904761905, loss= 14.994504863396287
Epoch = 382, Accuracy = 0.9904761904761905, loss= 14.968354860320687
Epoch = 383, Accuracy = 0.9904761904761905, loss= 14.94233678188175
Epoch = 384, Accuracy = 0.9904761904761905, loss= 14.91642774362117
Epoch = 385, Accuracy = 0.9904761904761905, loss= 14.890625841915607
Epoch = 386, Accuracy = 0.99047619047

Epoch = 501, Accuracy = 0.9904761904761905, loss= 12.5271575297229
Epoch = 502, Accuracy = 0.9904761904761905, loss= 12.511093073058873
Epoch = 503, Accuracy = 0.9904761904761905, loss= 12.495089539792389
Epoch = 504, Accuracy = 0.9904761904761905, loss= 12.479149446357042
Epoch = 505, Accuracy = 0.9904761904761905, loss= 12.46326003363356
Epoch = 506, Accuracy = 0.9904761904761905, loss= 12.44742705207318
Epoch = 507, Accuracy = 0.9904761904761905, loss= 12.431655706837773
Epoch = 508, Accuracy = 0.9904761904761905, loss= 12.415934395976365
Epoch = 509, Accuracy = 0.9904761904761905, loss= 12.400277940090746
Epoch = 510, Accuracy = 0.9904761904761905, loss= 12.384649417828768
Epoch = 511, Accuracy = 0.9904761904761905, loss= 12.36908580781892
Epoch = 512, Accuracy = 0.9904761904761905, loss= 12.35359377041459
Epoch = 513, Accuracy = 0.9904761904761905, loss= 12.338152548763901
Epoch = 514, Accuracy = 0.9904761904761905, loss= 12.322768199723214
Epoch = 515, Accuracy = 0.99047619047619

Epoch = 623, Accuracy = 0.9904761904761905, loss= 10.92408586293459
Epoch = 624, Accuracy = 0.9904761904761905, loss= 10.91339482460171
Epoch = 625, Accuracy = 0.9904761904761905, loss= 10.902736814692616
Epoch = 626, Accuracy = 0.9904761904761905, loss= 10.89211770053953
Epoch = 627, Accuracy = 0.9904761904761905, loss= 10.881542562507093
Epoch = 628, Accuracy = 0.9904761904761905, loss= 10.870994185563177
Epoch = 629, Accuracy = 0.9904761904761905, loss= 10.860452082008123
Epoch = 630, Accuracy = 0.9904761904761905, loss= 10.849951828364283
Epoch = 631, Accuracy = 0.9904761904761905, loss= 10.839500366244465
Epoch = 632, Accuracy = 0.9904761904761905, loss= 10.829082915559411
Epoch = 633, Accuracy = 0.9904761904761905, loss= 10.818690439220518
Epoch = 634, Accuracy = 0.9904761904761905, loss= 10.808328916784376
Epoch = 635, Accuracy = 0.9904761904761905, loss= 10.79798923060298
Epoch = 636, Accuracy = 0.9904761904761905, loss= 10.787675280123949
Epoch = 637, Accuracy = 0.990476190476

In [None]:
# print(losses)
plt.plot(losses)
plt.show()

In [None]:
print(f"Accuracy for training data: {round(tp/len(X_train), 2)*100}%")

## For test data

In [None]:
tp_test = 0

for i in range(len(X_test)):
    x = X_test.iloc[i:i+1, :].to_numpy()
    target = y_test.to_numpy()
    y = []
    if target[i] == 0:
        y = [1, 0, 0]
    elif target[i] == 1:
        y = [0, 1, 0]
    elif target[i] == 2:
        y = [0, 0, 1]


    #network forward pas
    y_hat, z1 = forward_pass(x, W1, b1, W2, b2)
    
    print(f"{np.argmax(y_hat)} \ {np.argmax(y)}")
    
    if np.argmax(y_hat) == np.argmax(y):
        tp_test += 1
        
#     print(W2)

In [None]:
print(f"Accuracy for test data: {round(tp_test/len(X_test), 2)*100}%")