In [266]:
import numpy as np
import pandas as pd
import math

In [267]:
np.random.seed(42)

In [268]:
df = pd.read_csv('iris.csv')

In [269]:
df.head(5)

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [270]:
df.shape

(150, 6)

In [271]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             150 non-null    int64  
 1   SepalLengthCm  150 non-null    float64
 2   SepalWidthCm   150 non-null    float64
 3   PetalLengthCm  150 non-null    float64
 4   PetalWidthCm   150 non-null    float64
 5   Species        150 non-null    object 
dtypes: float64(4), int64(1), object(1)
memory usage: 7.2+ KB


In [272]:
df = df.drop(['Id'], axis=1)
df.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [273]:
pd.unique(df[['Species']].values.ravel())

array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)

In [274]:
target_encoded_values = {"Species": {"Iris-setosa": 0, "Iris-versicolor": 1, "Iris-virginica": 2}}
df = df.replace(target_encoded_values)
# df.head()
df.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [275]:
df.describe()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
count,150.0,150.0,150.0,150.0,150.0
mean,5.843333,3.054,3.758667,1.198667,1.0
std,0.828066,0.433594,1.76442,0.763161,0.819232
min,4.3,2.0,1.0,0.1,0.0
25%,5.1,2.8,1.6,0.3,0.0
50%,5.8,3.0,4.35,1.3,1.0
75%,6.4,3.3,5.1,1.8,2.0
max,7.9,4.4,6.9,2.5,2.0


## Splitting data into training and testing part

In [276]:
df.head(10)

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
5,5.4,3.9,1.7,0.4,0
6,4.6,3.4,1.4,0.3,0
7,5.0,3.4,1.5,0.2,0
8,4.4,2.9,1.4,0.2,0
9,4.9,3.1,1.5,0.1,0


In [277]:
unorganized = df.sample(frac=1, random_state=1)
unorganized.shape

(150, 5)

In [278]:
unorganized.head(10)

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
14,5.8,4.0,1.2,0.2,0
98,5.1,2.5,3.0,1.1,1
75,6.6,3.0,4.4,1.4,1
16,5.4,3.9,1.3,0.4,0
131,7.9,3.8,6.4,2.0,2
56,6.3,3.3,4.7,1.6,1
141,6.9,3.1,5.1,2.3,2
44,5.1,3.8,1.9,0.4,0
29,4.7,3.2,1.6,0.2,0
120,6.9,3.2,5.7,2.3,2


In [279]:
train = unorganized.iloc[:int(0.7*len(unorganized)), :]
train.head(10)

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
14,5.8,4.0,1.2,0.2,0
98,5.1,2.5,3.0,1.1,1
75,6.6,3.0,4.4,1.4,1
16,5.4,3.9,1.3,0.4,0
131,7.9,3.8,6.4,2.0,2
56,6.3,3.3,4.7,1.6,1
141,6.9,3.1,5.1,2.3,2
44,5.1,3.8,1.9,0.4,0
29,4.7,3.2,1.6,0.2,0
120,6.9,3.2,5.7,2.3,2


In [280]:
print(train.shape)

(105, 5)


In [281]:
test = unorganized.iloc[int(0.7*len(unorganized)): , :]
test.head(10)

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
26,5.0,3.4,1.6,0.4,0
76,6.8,2.8,4.8,1.4,1
43,5.0,3.5,1.6,0.6,0
24,4.8,3.4,1.9,0.2,0
136,6.3,3.4,5.6,2.4,2
121,5.6,2.8,4.9,2.0,2
143,6.8,3.2,5.9,2.3,2
49,5.0,3.3,1.4,0.2,0
21,5.1,3.7,1.5,0.4,0
70,5.9,3.2,4.8,1.8,1


In [282]:
print(test.shape)

(45, 5)


## Splitting the dataset into feature and target variables, for both training and testing part.

In [283]:
#for feature variables in training and testing dataset
X_train = train.iloc[:, :4]
X_test = test.iloc[:, :4]

#for target variables in training and testing dataset
y_train = train.iloc[:, 4:]
y_test = test.iloc[:, 4:]

In [284]:
X_train.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
14,5.8,4.0,1.2,0.2
98,5.1,2.5,3.0,1.1
75,6.6,3.0,4.4,1.4
16,5.4,3.9,1.3,0.4
131,7.9,3.8,6.4,2.0


Looking at the X, we see that there are 4 main features in it. Now we create a First layer with weight matrix which includes 4 feautures and 3 nodes 

In [285]:
def initialize_weights_and_bias(mode, distribution='normal'):
    if mode == 'random':
        W1 = np.random.random((3, 4)).astype(np.float32)
        b1 = np.random.random((1, 3)).astype(np.float32)
        W2 = np.random.random((3, 3)).astype(np.float32)
        b2 = np.random.random((1, 3)).astype(np.float32)
    elif mode == 'xavier':
        if distribution == 'normal':
            W1 = np.random.normal(loc=0.0, scale=np.sqrt(2/(4+3)), size=(3, 4)).astype(np.float32)
            b1 = np.random.normal(loc=0.0, scale=np.sqrt(2/(3+3)), size=(1, 3)).astype(np.float32)
            W2 = np.random.normal(loc=0.0, scale=np.sqrt(2/(3+3)), size=(3, 3)).astype(np.float32)
            b2 = np.random.normal(loc=0.0, scale=np.sqrt(2/(3+3)), size=(1, 3)).astype(np.float32)
        elif distribution == 'uniform':
            W1 = np.random.uniform(low = -(6/np.sqrt(4+3)), high = (6/np.sqrt(4+3)), size=(3, 4)).astype(np.float32)
            b1 = np.random.uniform(low = -(6/np.sqrt(3+3)), high = (6/np.sqrt(3+3)), size=(1, 3)).astype(np.float32)
            W2 = np.random.uniform(low = -(6/np.sqrt(3+3)), high = (6/np.sqrt(3+3)), size=(3, 3)).astype(np.float32)
            b2 = np.random.uniform(low = -(6/np.sqrt(3+3)), high = (6/np.sqrt(3+3)), size=(1, 3)).astype(np.float32)
    elif mode == 'constant':
        W1 = np.ones((3, 4))
        b1 = np.ones((1, 3))
        W2 = np.ones((3, 3))
        b2 = np.ones((1, 3))
    return W1, b1, W2, b2

In [286]:
#Activation Functions
def sigmoid(x):
    return 1. / (1. + np.exp(-x))

def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=1)

def tanh(x):
    return np.tanh(x)

In [287]:
def forward_pass(x, W1, b1, W2, b2):
    
    #First layer
    x = np.array(x, dtype=np.float32)
    res1 = np.matmul(x, W1.T) + b1
    layer1_out = sigmoid(res1)
#     print(f"Layer 1 = {layer1_out}")
    
    #Second layer
    res2 = np.matmul(layer1_out, W2.T) + b2
    layer2_out = softmax(res2)
#     print(f"Layer 2 = {layer2_out}")
    
    
    return layer1_out.flatten(), layer2_out.flatten()

In [288]:
def backward_pass(x, W1, b1, W2, b2, y_hat, y, z1, lr = 0.01):
    
    x = np.array(x, dtype= np.float32).reshape(1,4)
    y = np.array(y, dtype = np.float32).reshape(1,3)
    y_hat = np.array(y_hat, dtype=np.float32).reshape(1,3)
    z1 = np.array(z1, dtype=np.float32).reshape(1,3)
    
    res = y_hat - y                    
    dw2 = np.matmul(res.T, z1)                                                # 3x1 * 1x3 = 3x3
    db2 = res                                                                 
    grad_w1 = np.matmul(res, W2)          
    grad_w1 = np.matmul(grad_w1, z1.T)                                        # 1x3 * 3x1 = 1x1
    grad_w2 = 1 - z1                                                          # 1x3
    db1 = np.matmul(grad_w1, grad_w2)                                  
    dw1 = np.matmul(db1.T, x)                             
    
    up_W2 = W2 - lr*dw2
    up_b2 = b2 - lr*db2
    up_b1 = b1 - lr*db1
    up_W1 = W1 - lr*dw1
    
    return up_W2, up_b2, up_W1, up_b1
    

In [289]:
# print("Predicted Class | Actual Class")
import matplotlib.pyplot as plt


W1, b1, W2, b2 = initialize_weights_and_bias('xavier', distribution='normal')
cost = 0
losses = []
epochs = 1000
for j in range(epochs):
    losss = 0
    tp = 0
    for i in range(len(X_train)):
        
        x = np.array(X_train.iloc[i:i+1, :], dtype=np.float32).reshape(1,4)
        target = np.array(y_train)
        y = []
        if target[i] == 0:
            y = [1, 0, 0]
        elif target[i] == 1:
            y = [0, 1, 0]
        elif target[i] == 2:
            y = [0, 0, 1]


        #network forward pass
        z1, y_hat = forward_pass(x, W1, b1, W2, b2)
        
        
        #we use categorical cross entropy loss
        loss = 0
        for i in range(len(y)):
            if y[i] != 0:
                loss +=  -y[i] * np.log(y_hat[i])
        losss += loss
        
        #the true positives for this case, to calculate accuracy
        if np.argmax(y_hat) == np.argmax(y):
            tp += 1
        
        #network backward pass
        W2, b2, W1, b1 = backward_pass(x, W1, b1, W2, b2, y_hat, y, z1)
    
    #we append total loss of each epoch to the losses array
    losses.append(losss)
    print(f"Epoch = {j}, Accuracy = {tp/len(X_train)}, loss= {losss}")

Epoch = 0, Accuracy = 0.3523809523809524, loss= 112.00765174627304
Epoch = 1, Accuracy = 0.34285714285714286, loss= 110.93877041339874
Epoch = 2, Accuracy = 0.3904761904761905, loss= 106.03584402799606
Epoch = 3, Accuracy = 0.4, loss= 103.13199353218079
Epoch = 4, Accuracy = 0.38095238095238093, loss= 101.00243002176285
Epoch = 5, Accuracy = 0.47619047619047616, loss= 99.2242830991745
Epoch = 6, Accuracy = 0.5714285714285714, loss= 97.69938260316849
Epoch = 7, Accuracy = 0.6666666666666666, loss= 96.35699343681335
Epoch = 8, Accuracy = 0.6761904761904762, loss= 95.13521075248718
Epoch = 9, Accuracy = 0.6761904761904762, loss= 93.98959159851074
Epoch = 10, Accuracy = 0.6761904761904762, loss= 92.89386147260666
Epoch = 11, Accuracy = 0.6761904761904762, loss= 91.83492434024811
Epoch = 12, Accuracy = 0.6761904761904762, loss= 90.80732858181
Epoch = 13, Accuracy = 0.6666666666666666, loss= 89.80946689844131
Epoch = 14, Accuracy = 0.6761904761904762, loss= 88.84127926826477
Epoch = 15, Accu

Epoch = 128, Accuracy = 0.6666666666666666, loss= 58.447803162038326
Epoch = 129, Accuracy = 0.6666666666666666, loss= 58.384982489049435
Epoch = 130, Accuracy = 0.6666666666666666, loss= 58.3230220079422
Epoch = 131, Accuracy = 0.6666666666666666, loss= 58.261922132223845
Epoch = 132, Accuracy = 0.6666666666666666, loss= 58.2016540504992
Epoch = 133, Accuracy = 0.6666666666666666, loss= 58.14220741018653
Epoch = 134, Accuracy = 0.6666666666666666, loss= 58.083564300090075
Epoch = 135, Accuracy = 0.6666666666666666, loss= 58.025710597634315
Epoch = 136, Accuracy = 0.6666666666666666, loss= 57.96862507984042
Epoch = 137, Accuracy = 0.6666666666666666, loss= 57.91228459775448
Epoch = 138, Accuracy = 0.6666666666666666, loss= 57.85670503973961
Epoch = 139, Accuracy = 0.6666666666666666, loss= 57.80184359103441
Epoch = 140, Accuracy = 0.6666666666666666, loss= 57.74769390746951
Epoch = 141, Accuracy = 0.6666666666666666, loss= 57.69424643367529
Epoch = 142, Accuracy = 0.6666666666666666, l

Epoch = 252, Accuracy = 0.6761904761904762, loss= 54.20908353663981
Epoch = 253, Accuracy = 0.6761904761904762, loss= 54.18964529596269
Epoch = 254, Accuracy = 0.6761904761904762, loss= 54.17031721957028
Epoch = 255, Accuracy = 0.6761904761904762, loss= 54.151090022176504
Epoch = 256, Accuracy = 0.6761904761904762, loss= 54.131951346993446
Epoch = 257, Accuracy = 0.6761904761904762, loss= 54.11290266737342
Epoch = 258, Accuracy = 0.6761904761904762, loss= 54.09395742136985
Epoch = 259, Accuracy = 0.6761904761904762, loss= 54.07509976066649
Epoch = 260, Accuracy = 0.6761904761904762, loss= 54.056334437802434
Epoch = 261, Accuracy = 0.6761904761904762, loss= 54.03766401484609
Epoch = 262, Accuracy = 0.6761904761904762, loss= 54.019073355942965
Epoch = 263, Accuracy = 0.6761904761904762, loss= 54.00056375656277
Epoch = 264, Accuracy = 0.6761904761904762, loss= 53.98214054014534
Epoch = 265, Accuracy = 0.6761904761904762, loss= 53.9637939715758
Epoch = 266, Accuracy = 0.6761904761904762, l

Epoch = 375, Accuracy = 0.7142857142857143, loss= 51.92629808653146
Epoch = 376, Accuracy = 0.7142857142857143, loss= 51.90149110555649
Epoch = 377, Accuracy = 0.7142857142857143, loss= 51.87643127981573
Epoch = 378, Accuracy = 0.7142857142857143, loss= 51.85112832672894
Epoch = 379, Accuracy = 0.7142857142857143, loss= 51.8255574638024
Epoch = 380, Accuracy = 0.7238095238095238, loss= 51.79973507672548
Epoch = 381, Accuracy = 0.7238095238095238, loss= 51.77365853730589
Epoch = 382, Accuracy = 0.7238095238095238, loss= 51.74730606842786
Epoch = 383, Accuracy = 0.7238095238095238, loss= 51.720701546408236
Epoch = 384, Accuracy = 0.7238095238095238, loss= 51.69383232947439
Epoch = 385, Accuracy = 0.7238095238095238, loss= 51.66669619921595
Epoch = 386, Accuracy = 0.7238095238095238, loss= 51.63929188530892
Epoch = 387, Accuracy = 0.7238095238095238, loss= 51.61161496117711
Epoch = 388, Accuracy = 0.7238095238095238, loss= 51.58365990687162
Epoch = 389, Accuracy = 0.7238095238095238, loss

Epoch = 505, Accuracy = 0.8666666666666667, loss= 46.236137514933944
Epoch = 506, Accuracy = 0.8666666666666667, loss= 46.170852003619075
Epoch = 507, Accuracy = 0.8666666666666667, loss= 46.10528963431716
Epoch = 508, Accuracy = 0.8761904761904762, loss= 46.03943648561835
Epoch = 509, Accuracy = 0.8761904761904762, loss= 45.973316992633045
Epoch = 510, Accuracy = 0.8761904761904762, loss= 45.906927944161
Epoch = 511, Accuracy = 0.8761904761904762, loss= 45.84027101751417
Epoch = 512, Accuracy = 0.8761904761904762, loss= 45.77335906308144
Epoch = 513, Accuracy = 0.8761904761904762, loss= 45.7061892291531
Epoch = 514, Accuracy = 0.8857142857142857, loss= 45.638761607930064
Epoch = 515, Accuracy = 0.8857142857142857, loss= 45.57110261172056
Epoch = 516, Accuracy = 0.8857142857142857, loss= 45.503193313255906
Epoch = 517, Accuracy = 0.8857142857142857, loss= 45.4350593239069
Epoch = 518, Accuracy = 0.8857142857142857, loss= 45.36667547840625
Epoch = 519, Accuracy = 0.8857142857142857, los

Epoch = 629, Accuracy = 0.9238095238095239, loss= 37.59433775022626
Epoch = 630, Accuracy = 0.9238095238095239, loss= 37.53237992525101
Epoch = 631, Accuracy = 0.9238095238095239, loss= 37.47066308930516
Epoch = 632, Accuracy = 0.9238095238095239, loss= 37.409204663708806
Epoch = 633, Accuracy = 0.9238095238095239, loss= 37.348002849146724
Epoch = 634, Accuracy = 0.9238095238095239, loss= 37.28703963384032
Epoch = 635, Accuracy = 0.9238095238095239, loss= 37.22632414288819
Epoch = 636, Accuracy = 0.9238095238095239, loss= 37.16583390533924
Epoch = 637, Accuracy = 0.9238095238095239, loss= 37.1055975202471
Epoch = 638, Accuracy = 0.9238095238095239, loss= 37.04561484977603
Epoch = 639, Accuracy = 0.9238095238095239, loss= 36.98586008325219
Epoch = 640, Accuracy = 0.9238095238095239, loss= 36.92636630497873
Epoch = 641, Accuracy = 0.9238095238095239, loss= 36.86710147000849
Epoch = 642, Accuracy = 0.9238095238095239, loss= 36.80811549723148
Epoch = 643, Accuracy = 0.9238095238095239, los

Epoch = 753, Accuracy = 0.9428571428571428, loss= 31.564027313143015
Epoch = 754, Accuracy = 0.9428571428571428, loss= 31.526077456772327
Epoch = 755, Accuracy = 0.9428571428571428, loss= 31.488275844603777
Epoch = 756, Accuracy = 0.9428571428571428, loss= 31.450602570548654
Epoch = 757, Accuracy = 0.9428571428571428, loss= 31.413059195503592
Epoch = 758, Accuracy = 0.9428571428571428, loss= 31.375623071566224
Epoch = 759, Accuracy = 0.9428571428571428, loss= 31.338306749239564
Epoch = 760, Accuracy = 0.9428571428571428, loss= 31.301108131185174
Epoch = 761, Accuracy = 0.9428571428571428, loss= 31.26404164545238
Epoch = 762, Accuracy = 0.9428571428571428, loss= 31.227098777890205
Epoch = 763, Accuracy = 0.9428571428571428, loss= 31.19025749899447
Epoch = 764, Accuracy = 0.9428571428571428, loss= 31.153538677841425
Epoch = 765, Accuracy = 0.9428571428571428, loss= 31.116940731182694
Epoch = 766, Accuracy = 0.9428571428571428, loss= 31.080463582649827
Epoch = 767, Accuracy = 0.9428571428

Epoch = 881, Accuracy = 0.9523809523809523, loss= 27.508134869858623
Epoch = 882, Accuracy = 0.9523809523809523, loss= 27.481488700956106
Epoch = 883, Accuracy = 0.9523809523809523, loss= 27.454906253144145
Epoch = 884, Accuracy = 0.9523809523809523, loss= 27.4283834323287
Epoch = 885, Accuracy = 0.9523809523809523, loss= 27.401919351890683
Epoch = 886, Accuracy = 0.9523809523809523, loss= 27.37551449239254
Epoch = 887, Accuracy = 0.9523809523809523, loss= 27.349171455949545
Epoch = 888, Accuracy = 0.9523809523809523, loss= 27.32289405539632
Epoch = 889, Accuracy = 0.9523809523809523, loss= 27.296674018725753
Epoch = 890, Accuracy = 0.9523809523809523, loss= 27.27052271552384
Epoch = 891, Accuracy = 0.9523809523809523, loss= 27.244425924494863
Epoch = 892, Accuracy = 0.9523809523809523, loss= 27.218368761241436
Epoch = 893, Accuracy = 0.9523809523809523, loss= 27.19238266721368
Epoch = 894, Accuracy = 0.9523809523809523, loss= 27.16644360870123
Epoch = 895, Accuracy = 0.952380952380952

In [290]:
# print(losses)
# plt.plot(losses)
# plt.show()

In [291]:
print(f"Accuracy for training data: {round(tp/len(X_train), 2)*100}%")

Accuracy for training data: 96.0%


## For test data

In [292]:
tp_test = 0

for i in range(len(X_test)):
    x = X_test.iloc[i:i+1, :].to_numpy()
    target = y_test.to_numpy()
    y = []
    if target[i] == 0:
        y = [1, 0, 0]
    elif target[i] == 1:
        y = [0, 1, 0]
    elif target[i] == 2:
        y = [0, 0, 1]


    #network forward pas
    layer1_out, layer2_out, layer3_out, layer4_out, y_hat = forward_pass(x, W1, b1, W2, b2, W3, b3, W4, b4, W5, b5)
    
    print(f"{np.argmax(y_hat)} \ {np.argmax(y)}")
    
    if np.argmax(y_hat) == np.argmax(y):
        tp_test += 1
        

TypeError: forward_pass() takes 5 positional arguments but 11 were given

In [None]:
print(f"Accuracy for test data: {round(tp_test/len(X_test), 2)*100}%")