In [234]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder,StandardScaler

In [235]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [236]:
noOfInputs=5 #constant number of inputs
noOfLayers=2 #don't count the output layer as a layer
layerSizes=[2] #don't count the output layer
noOfOutputs=3 #constant number of outputs
initRandWeightMin=-1
initRandWeightMax=1
labels=np.zeros(10)
activationFunction= sigmoid #or np.tanh
lr = 0.001 # learning rate
useBias = True
weights, bias = [], []
neuronsOutput, errors = [], []

In [237]:
def gRandomNumbers(size):
    return np.random.uniform(low=initRandWeightMin,high=initRandWeightMax,size=size)

In [238]:
def initWeights(layerSizes):
    weights=[]
    weights.append(gRandomNumbers((noOfInputs,layerSizes[0])))
    for i, l in enumerate(layerSizes[:-1]):
        weights.append(gRandomNumbers((l))) #weights between last layer and the final output neurons
    weights.append(gRandomNumbers((layerSizes[-1],noOfOutputs))) #weights between last layer and the final output neurons
    return weights

In [239]:
def initBias():
    bias = []
    for i, l in enumerate(layerSizes):
        bias.append(gRandomNumbers((l))) #bias between last layer and the final output neurons
    bias.append(gRandomNumbers((noOfOutputs))) #bias between last layer and the final output neurons
    return bias

In [240]:
def initNeuronsOutput():
    neuronsOutput=[]
    neuronsOutput.append(np.random.random(noOfInputs))
    for l in layerSizes:
        neuronsOutput.append(np.random.random(l))
    neuronsOutput.append(np.random.random(noOfOutputs))
    return neuronsOutput

In [241]:
def initErrors():
    errors=[]
    for l in layerSizes:
        errors.append(np.random.random(l))
    errors.append(np.random.random(noOfOutputs))
    return errors

- A 'layer' is a layer of neurons
- A 'connection' is the set of weights between two layers

In [242]:
def feedForward():
    # instead of multiplying each neuron weights and inputs 
    # we can just multiply weights[i].T x inputs[j] 
    # and produce the next neuronsOutput directly 
    # for example (5, 2).T @ (5,) = (2,) -> activation((2,)) -> (2,)
    for j, connection in enumerate(weights):
        z = (connection.T @ neuronsOutput[j]) + bias[j] if useBias else 0
        neuronsOutput[j+1] = activationFunction(z)

In [243]:
def finalError(label):
    return (label-neuronsOutput[-1])*neuronsOutput[-1]*(1-neuronsOutput[-1])

In [244]:
def backPropagation(label):
    k = [i == label for i in range(noOfOutputs)]
    errors[-1]= (k-neuronsOutput[-1])*neuronsOutput[-1]*(1-neuronsOutput[-1])

    for j in range(len(weights)-1,-1,-1):
        for i in range(errors[j-1].shape[0]):
            s = (errors[j].dot(weights[j][i,:]))+ bias[j][i] if (useBias and j > 0) else 0
            errors[j-1][i]=s*(1-neuronsOutput[j-1][i])

I LOVE obfuscated code

In [245]:
def updateWeights():
    for i,connection in enumerate(weights): #Loop over every connection between every two layers
        for j in range(connection.shape[1]): # loop over every layer's neurons
            for k in range(connection.shape[0]): # loop over all the incoming weights to this neuron
                weights[i][k,j]=weights[i][k,j]+lr*neuronsOutput[i][k]*errors[i][j]
                # Rule: weight=weight+lr*error*input
                if useBias:
                    t = bias[i][j]
                    bias[i][j]=bias[i][j]+lr*errors[i][j]
                    print(f"before: {t}, after: {bias[i][j]}")

In [246]:
weights = initWeights(layerSizes)
weights

[array([[ 0.63634913, -0.79231357],
        [ 0.43147367,  0.1053607 ],
        [-0.33542372, -0.39878964],
        [-0.9433187 ,  0.0161709 ],
        [-0.234961  ,  0.42511795]]),
 array([[ 0.4647161 , -0.55256512,  0.77916392],
        [ 0.93234459, -0.93734816, -0.7867201 ]])]

In [247]:
bias = initBias()
bias

[array([-0.188549  , -0.27238637]),
 array([-0.67547084, -0.03536864,  0.37696664])]

In [248]:
neuronsOutput = initNeuronsOutput()
neuronsOutput

[array([0.6386911 , 0.0901573 , 0.68025799, 0.27951628, 0.70167794]),
 array([0.73323216, 0.61546757]),
 array([0.29048495, 0.00214237, 0.7338682 ])]

In [249]:
errors = initErrors()
errors

[array([0.48755038, 0.97704914]), array([0.80498837, 0.72956532, 0.93011798])]

In [250]:
feedForward()

In [251]:
backPropagation(1)

In [252]:
weights

[array([[ 0.63634913, -0.79231357],
        [ 0.43147367,  0.1053607 ],
        [-0.33542372, -0.39878964],
        [-0.9433187 ,  0.0161709 ],
        [-0.234961  ,  0.42511795]]),
 array([[ 0.4647161 , -0.55256512,  0.77916392],
        [ 0.93234459, -0.93734816, -0.7867201 ]])]

In [253]:
updateWeights() # it works but the error is very small so the difference is small

before: -0.18854899816376225, after: -0.1888820830510483
before: -0.1888820830510483, after: -0.18921516793833437
before: -0.18921516793833437, after: -0.18954825282562043
before: -0.18954825282562043, after: -0.1898813377129065
before: -0.1898813377129065, after: -0.19021442260019256
before: -0.27238637355829565, after: -0.2725357930554375
before: -0.2725357930554375, after: -0.2726852125525794
before: -0.2726852125525794, after: -0.27283463204972125
before: -0.27283463204972125, after: -0.2729840515468631
before: -0.2729840515468631, after: -0.273133471044005
before: -0.6754708404244372, after: -0.6754708404244372
before: -0.6754708404244372, after: -0.6754708404244372
before: -0.03536864029837239, after: -0.03536864029837239
before: -0.03536864029837239, after: -0.03536864029837239
before: 0.3769666416399746, after: 0.3769666416399746
before: 0.3769666416399746, after: 0.3769666416399746


In [254]:
weights

[array([[ 0.6361364 , -0.792409  ],
        [ 0.43144364,  0.10534723],
        [-0.33565031, -0.39889128],
        [-0.9434118 ,  0.01612913],
        [-0.23519472,  0.42501311]]),
 array([[ 0.4647161 , -0.55256512,  0.77916392],
        [ 0.93234459, -0.93734816, -0.7867201 ]])]

Don't take any function for granted, not showing errors doesn't neccessarly mean working correctly

In [255]:
#TODO: loop over some samples

In [256]:
a = np.matrix([[1,2],[2,3]])
b = np.matrix([[3,4],[4,5]])
(a @ b) == (a.T  @ b)

matrix([[ True,  True],
        [ True,  True]])

Reading the data

In [257]:
data=pd.read_csv("penguins.csv")

In [258]:
data['Species'].value_counts()

Species
Adelie       50
Chinstrap    50
Gentoo       50
Name: count, dtype: int64

In [None]:
data.notna().sum().sum() # There's 8 values missing , we will impute them by the column mean

np.int64(892)

In [260]:
data.fillna(data.mean(numeric_only=True),inplace=True)

In [261]:
le=LabelEncoder()
data['OriginLocation']=le.fit_transform(data['OriginLocation'])
data['Species']=le.fit_transform(data['Species'])

In [262]:
numeric_cols = data.columns.drop(['OriginLocation','Species'])
scaler = StandardScaler()
data[numeric_cols] = scaler.fit_transform(data[numeric_cols])

First 30 samples of every species is taken as training data and the rest is for testing

In [263]:
trainingData=pd.concat((data[data['Species']==0][:30],data[data['Species']==1][:30],data[data['Species']==2][:30]))
testData = data.drop(trainingData.index)

Shuffle the data and reset its index

In [265]:
trainingData=trainingData.sample(frac=1).reset_index(drop=True)
testData=testData.sample(frac=1).reset_index(drop=True)

In [269]:
xTrain,yTrain=trainingData[trainingData.columns.drop('Species')],trainingData['Species']
xTest,yTest=testData[testData.columns.drop('Species')],testData['Species']