In [31]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder,StandardScaler
import plotly.express as px

In [32]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [None]:
noOfInputs=5 #constant number of inputs
layerSizes=[2,3] #don't count the output layer
noOfOutputs=3 #constant number of outputs
noOfLayers= len(layerSizes)+1 #don't count the output layer as a layer
initRandWeightMin=-0.1
initRandWeightMax= 0.1
labels=np.zeros(10)
activationFunction = sigmoid
lr = 0.01 # learning rate
useBias = True
weights, bias = [], []
neuronsOutput, errors = [], []

In [34]:
def gRandomNumbers(size):
    return np.random.uniform(low=initRandWeightMin,high=initRandWeightMax,size=size)

In [35]:
def initWeights(layerSizes):
    weights=[]
    weights.append(gRandomNumbers((noOfInputs,layerSizes[0])))
    for i, l in enumerate(layerSizes[:-1]):
        weights.append(gRandomNumbers((l,layerSizes[i+1]))) #weights between last layer and the final output neurons
    weights.append(gRandomNumbers((layerSizes[-1],noOfOutputs))) #weights between last layer and the final output neurons
    return weights

In [36]:
def initBias():
    bias = []
    for i, l in enumerate(layerSizes):
        bias.append(gRandomNumbers((l))) #bias between last layer and the final output neurons
    bias.append(gRandomNumbers((noOfOutputs))) #bias between last layer and the final output neurons
    return bias

In [37]:
def initNeuronsOutput():
    neuronsOutput=[]
    neuronsOutput.append(np.random.random(noOfInputs))
    for l in layerSizes:
        neuronsOutput.append(np.random.random(l))
    neuronsOutput.append(np.random.random(noOfOutputs))
    return neuronsOutput

In [38]:
def initErrors():
    errors=[]
    for l in layerSizes:
        errors.append(np.random.random(l))
    errors.append(np.random.random(noOfOutputs))
    return errors

- A 'layer' is a layer of neurons
- A 'connection' is the set of weights between two layers

In [39]:
def feedForward():
    # instead of multiplying each neuron weights and inputs 
    # we can just multiply weights[i].T x inputs[j] 
    # and produce the next neuronsOutput directly 
    # for example (5, 2).T @ (5,) = (2,) -> activation((2,)) -> (2,)
    for j, connection in enumerate(weights):
        z = (connection.T @ neuronsOutput[j]) + (bias[j] if useBias else 0)
        neuronsOutput[j+1] = activationFunction(z)

In [40]:
def backPropagation(label):
    #Output layer error
    k = [i == label for i in range(noOfOutputs)]
    errors[-1]= (k-neuronsOutput[-1]) * (1-neuronsOutput[-1]) * neuronsOutput[-1]

    for j in range(len(weights)-1,0,-1): #number of hidden layers
        for i in range(errors[j-1].shape[0]): #number of neurons in that layer
            s = (errors[j].dot(weights[j][i,:])) + (bias[j-1][i] if useBias else 0) #s means sigma
            # don't know why but it errors out when i use bias[j][i] with [5,2,4(>2),3(<4)] or similar arch.
            errors[j-1][i]=s * (1-neuronsOutput[j][i]) * neuronsOutput[j][i]

I LOVE obfuscated code

In [41]:
def updateWeights():
    for i in range(len(weights)):
        layer_input = neuronsOutput[i].reshape(-1, 1)
        layer_error = errors[i].reshape(1, -1)

        weights[i] += lr * (layer_input @ layer_error)

        if useBias:
            bias[i] += lr * errors[i]

In [42]:
weights = initWeights(layerSizes)
weights

[array([[-0.09147867,  0.00929264],
        [-0.09561539, -0.02848619],
        [-0.02388567, -0.09053165],
        [-0.07925672, -0.09187971],
        [ 0.09301473,  0.00070257]]),
 array([[-0.0361643 , -0.0586946 , -0.09209322],
        [ 0.03397781,  0.07208023, -0.06354969]]),
 array([[-0.03972539, -0.08521502, -0.08288775],
        [-0.03587659, -0.02217294,  0.07600444],
        [-0.01269991,  0.04432014, -0.02858496]])]

In [43]:
bias = initBias()
bias

[array([-0.00539918,  0.05658374]),
 array([ 0.07326758,  0.07906774, -0.03503366]),
 array([0.00455219, 0.04370299, 0.05741109])]

In [44]:
neuronsOutput = initNeuronsOutput()
neuronsOutput

[array([0.93228246, 0.82473936, 0.5190848 , 0.60908985, 0.35867155]),
 array([0.10685106, 0.28822481]),
 array([0.24489082, 0.48194531, 0.69616572]),
 array([0.79927983, 0.62967954, 0.74097581])]

In [45]:
errors = initErrors()
errors

[array([0.41736557, 0.01424903]),
 array([0.79940175, 0.41772262, 0.14026951]),
 array([0.05955191, 0.26942464, 0.08877387])]

Don't take any function for granted, not showing errors doesn't neccessarly mean working correctly

Reading the data

In [46]:
data=pd.read_csv("penguins.csv")

In [47]:
data['Species'].value_counts()

Species
Adelie       50
Chinstrap    50
Gentoo       50
Name: count, dtype: int64

In [48]:
data.notna().sum().sum() # There's 8 values missing , we will impute them by the column mean

np.int64(892)

In [49]:
data.fillna(data.mean(numeric_only=True),inplace=True)

In [50]:
le=LabelEncoder()
data['OriginLocation']=le.fit_transform(data['OriginLocation'])
data['Species']=le.fit_transform(data['Species'])

In [51]:
numeric_cols = data.columns.drop(['OriginLocation','Species'])
scaler = StandardScaler()
data[numeric_cols] = scaler.fit_transform(data[numeric_cols])

First 30 samples of every species is taken as training data and the rest is for testing

In [52]:
trainingData=pd.concat((data[data['Species']==0][:30],data[data['Species']==1][:30],data[data['Species']==2][:30]))
testData = data.drop(trainingData.index)

Shuffle the data and reset its index

In [53]:
trainingData=trainingData.sample(frac=1).reset_index(drop=True)
testData=testData.sample(frac=1).reset_index(drop=True)

In [54]:
xTrain,yTrain=trainingData[trainingData.columns.drop('Species')],trainingData['Species']
xTest,yTest=testData[testData.columns.drop('Species')],testData['Species']

In [55]:
acc=0

In [56]:
accurecies = []
epochs = 200
for epoch in range(epochs):
    acc = 0
    for i, row in xTrain.iterrows():
        neuronsOutput[0] = row.values
        feedForward()

        if neuronsOutput[-1].argmax() == yTrain[i]:
            acc += 1

        backPropagation(yTrain[i])
        updateWeights()

    # print(f"Epoch {epoch+1}/{epochs}, Accuracy: {(acc / len(xTrain))}")
    accurecies.append(acc/len(xTrain))

In [57]:
fig = px.line(accurecies)
fig.show()

In [58]:
acc=0

In [59]:
for i, row in xTest.iterrows():
    neuronsOutput[0] = row.values
    feedForward()

    if neuronsOutput[-1].argmax() == yTest[i]:
        acc += 1


In [60]:
acc/len(yTest)

0.3333333333333333