In [301]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder,StandardScaler
import plotly.express as px

In [302]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_d(x):
    return (x)*(1-x)

def tanh(x):
    return np.tanh(x)

def tanh_d(x):
    return (1-x)*(1+x)

In [303]:
noOfInputs=5 #constant number of inputs
layerSizes=[10] #don't count the output layer
noOfOutputs=3 #constant number of outputs
noOfLayers= len(layerSizes)+1 #don't count the output layer as a layer
initRandWeightMin=-0.1
initRandWeightMax= 0.1
labels=np.zeros(10)
activationFunction = tanh
activationFunction_d = sigmoid_d if activationFunction == sigmoid else tanh_d
lr = 0.01 # learning rate
useBias = True
weights, bias = [], []
neuronsOutput, errors = [], []

In [304]:
def gRandomNumbers(size):
    return np.random.uniform(low=initRandWeightMin,high=initRandWeightMax,size=size)

In [305]:
def initWeights(layerSizes):
    weights=[]
    weights.append(gRandomNumbers((noOfInputs,layerSizes[0])))
    for i, l in enumerate(layerSizes[:-1]):
        weights.append(gRandomNumbers((l,layerSizes[i+1]))) #weights between last layer and the final output neurons
    weights.append(gRandomNumbers((layerSizes[-1],noOfOutputs))) #weights between last layer and the final output neurons
    return weights

In [306]:
def initBias():
    bias = []
    for i, l in enumerate(layerSizes):
        bias.append(gRandomNumbers((l))) #bias between last layer and the final output neurons
    bias.append(gRandomNumbers((noOfOutputs))) #bias between last layer and the final output neurons
    return bias

In [307]:
def initNeuronsOutput():
    neuronsOutput=[]
    neuronsOutput.append(np.random.random(noOfInputs))
    for l in layerSizes:
        neuronsOutput.append(np.random.random(l))
    neuronsOutput.append(np.random.random(noOfOutputs))
    return neuronsOutput

In [308]:
def initErrors():
    errors=[]
    for l in layerSizes:
        errors.append(np.random.random(l))
    errors.append(np.random.random(noOfOutputs))
    return errors

- A 'layer' is a layer of neurons
- A 'connection' is the set of weights between two layers

In [309]:
def feedForward():
    # instead of multiplying each neuron weights and inputs 
    # we can just multiply weights[i].T x inputs[j] 
    # and produce the next neuronsOutput directly 
    # for example (5, 2).T @ (5,) = (2,) -> activation((2,)) -> (2,)
    for j, connection in enumerate(weights):
        z = (connection.T @ neuronsOutput[j]) + (bias[j] if useBias else 0)
        neuronsOutput[j+1] = activationFunction(z)

In [310]:
def backPropagation(label):
    #Output layer error
    k = [i == label for i in range(noOfOutputs)]
    errors[-1]= (k-neuronsOutput[-1]) * activationFunction_d(neuronsOutput[-1])

    for j in range(len(weights)-1,0,-1): #number of hidden layers
        for i in range(errors[j-1].shape[0]): #number of neurons in that layer
            s = (errors[j].dot(weights[j][i,:])) + (bias[j-1][i] if useBias else 0) #s means sigma
            # don't know why but it errors out when i use bias[j][i] with [5,2,4(>2),3(<4)] or similar arch.
            errors[j-1][i]= s * activationFunction_d(neuronsOutput[j][i])

I LOVE obfuscated code

In [311]:
def updateWeights():
    for i in range(len(weights)):
        layer_input = neuronsOutput[i].reshape(-1, 1)
        layer_error = errors[i].reshape(1, -1)

        weights[i] += lr * (layer_input @ layer_error)

        if useBias:
            bias[i] += lr * errors[i]

In [312]:
weights = initWeights(layerSizes)
weights

[array([[-0.09927228, -0.08395082,  0.0516142 ,  0.03768122, -0.00899754,
          0.02831802,  0.06296995,  0.00306482,  0.09130575,  0.08329036],
        [ 0.08248962,  0.06376008, -0.09847013, -0.06415975,  0.01894343,
          0.09221047,  0.03663779, -0.08369704, -0.05055615, -0.04988335],
        [-0.02794306, -0.09169467,  0.09334259, -0.09678102,  0.0287709 ,
         -0.0959741 , -0.02199448,  0.00349848, -0.08431505, -0.07139648],
        [-0.01816958,  0.01667784, -0.08095977,  0.02867722, -0.01838199,
         -0.04855497,  0.01087766,  0.05684325, -0.00055539,  0.02028006],
        [-0.061434  ,  0.02894805,  0.04310476,  0.09404511,  0.03072368,
          0.08256787,  0.07492725,  0.06237204, -0.01620888, -0.05664773]]),
 array([[-0.06460088,  0.05608309,  0.04401163],
        [ 0.00595157,  0.0313552 ,  0.00943339],
        [ 0.00692764, -0.0996131 ,  0.05630369],
        [ 0.09611857, -0.02907393, -0.00028544],
        [-0.04131074,  0.04738483,  0.04237793],
        

In [313]:
bias = initBias()
bias

[array([ 0.0325317 , -0.01222616, -0.01858975, -0.08259808,  0.0391952 ,
         0.07755143,  0.02205311, -0.02595567, -0.00339846,  0.03473832]),
 array([-0.05589654,  0.01873463, -0.08212386])]

In [314]:
neuronsOutput = initNeuronsOutput()
neuronsOutput

[array([0.3618236 , 0.95497353, 0.25968587, 0.91955064, 0.22940377]),
 array([0.69700824, 0.87162884, 0.30604222, 0.41027716, 0.60517728,
        0.13598089, 0.61958161, 0.90645304, 0.71071923, 0.99297814]),
 array([0.75030706, 0.39618467, 0.45494846])]

In [315]:
errors = initErrors()
errors

[array([0.76183036, 0.28629942, 0.25636699, 0.91601858, 0.99629733,
        0.30071065, 0.86526378, 0.86449291, 0.12340956, 0.69260653]),
 array([0.98383624, 0.22629105, 0.42461543])]

Don't take any function for granted, not showing errors doesn't neccessarly mean working correctly

Reading the data

In [316]:
data=pd.read_csv("penguins.csv")

In [317]:
data['Species'].value_counts()

Species
Adelie       50
Chinstrap    50
Gentoo       50
Name: count, dtype: int64

In [318]:
data.notna().sum().sum() # There's 8 values missing , we will impute them by the column mean

np.int64(892)

In [319]:
data.fillna(data.mean(numeric_only=True),inplace=True)

In [320]:
le=LabelEncoder()
data['OriginLocation']=le.fit_transform(data['OriginLocation'])
data['Species']=le.fit_transform(data['Species'])

In [321]:
numeric_cols = data.columns.drop(['OriginLocation','Species'])
scaler = StandardScaler()
data[numeric_cols] = scaler.fit_transform(data[numeric_cols])

First 30 samples of every species is taken as training data and the rest is for testing

In [322]:
trainingData=pd.concat((data[data['Species']==0][:30],data[data['Species']==1][:30],data[data['Species']==2][:30]))
testData = data.drop(trainingData.index)

Shuffle the data and reset its index

In [323]:
trainingData=trainingData.sample(frac=1).reset_index(drop=True)
testData=testData.sample(frac=1).reset_index(drop=True)

In [324]:
xTrain,yTrain=trainingData[trainingData.columns.drop('Species')],trainingData['Species']
xTest,yTest=testData[testData.columns.drop('Species')],testData['Species']

In [325]:
acc=0

In [326]:
accurecies = []
epochs = 200
for epoch in range(epochs):
    acc = 0
    for i, row in xTrain.iterrows():
        neuronsOutput[0] = row.values
        feedForward()

        if neuronsOutput[-1].argmax() == yTrain[i]:
            acc += 1

        backPropagation(yTrain[i])
        updateWeights()

    # print(f"Epoch {epoch+1}/{epochs}, Accuracy: {(acc / len(xTrain))}")
    accurecies.append(acc/len(xTrain))

In [327]:
fig = px.line(accurecies)
fig.show()

In [328]:
acc=0

In [329]:
for i, row in xTest.iterrows():
    neuronsOutput[0] = row.values
    feedForward()

    if neuronsOutput[-1].argmax() == yTest[i]:
        acc += 1


In [330]:
acc/len(yTest)

0.3333333333333333