In [81]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler

In [82]:
class Layer:
    def __init__(self,features,neurons,activationfunc):
        self.features            = features
        self.neurons             = neurons
        self.activationfunc      = activationfunc
        self.weight              = np.random.randn(self.neurons, self.features)
        self.bias                = np.random.randn(neurons,1)
        self.input               = np.zeros([self.features,1])
        self.output              = np.zeros([self.features,1])

    def activationReLu(self,inputs):
        return np.maximum(inputs,0)
        
    def derivativeReLu(self,inputs):
        return inputs > 0
    
    def activationSoftMax(self, inputs):
        exp_values = np.exp(inputs - np.max(inputs))
        return exp_values / np.sum(exp_values)
        
    def derivativeSofMax(self,input):
        return self.activationSoftMax(input)*(1-self.activationSoftMax(input))
    
    def derivativeFunction(self,input):
        if self.activationfunc   == "relu":
            return self.derivativeReLu(input)
        elif self.activationfunc == "softmax":
            return self.derivativeSofMax(input)
        else :
            return 1
    
    def activationFunction(self,inputs):
        if self.activationfunc   == "relu":
            return self.activationReLu(inputs)
        elif self.activationfunc == "softmax":
            return self.activationSoftMax(inputs)
        else:
            return inputs

    def feedforward(self,input):
        self.input = input
        self.output      = np.dot(self.weight, input)
        self.output      += self.bias
        activation  = self.activationFunction(self.output)  
        return activation
        
    def backpropagation(self,dZ):
        m    = dZ.shape[1]
        dEdZ = dZ*self.derivativeFunction(self.output)
        dW   = (1/m) * np.dot(dEdZ, self.input.transpose())
        dB   = (1/m) * np.sum(dEdZ, axis=1, keepdims=True)
        dE   = np.dot(self.weight.transpose(), dEdZ)
 
        return dW, dB, dE

    def learn(self, dW, dB, learning_rate):
        self.weight -= dW*learning_rate
        self.bias   -= dB*learning_rate

class MLP:
    def __init__(self, input_dim:int, layer_dims:list[int], activationfuncs:list[int], learning_rate=0.001):
        self.learning_rate = learning_rate
        self.network = []
        self.network.append(Layer(input_dim, layer_dims[0], activationfuncs[0]))
        for i in range(len(layer_dims)-1):
            self.network.append(Layer(layer_dims[i], layer_dims[i+1], activationfuncs[i]))

    def feedforward(self, input):
        out = input
        for i in range(len(self.network)):
            out = self.network[i].feedforward(out)        
        return out

    def backpropagation(self, ground_truth, output):
        error = output - ground_truth

        for i in reversed(range(len(self.network))):
            dW, dB, error = self.network[i].backpropagation(error)
            self.network[i].learn(dW, dB, self.learning_rate)

    def mse_loss(self,ground_truth, output):
        squared_diff = (ground_truth - output) ** 2
        overall_mse = squared_diff.mean()
        return overall_mse

In [83]:
# Load the dataset
data = load_iris()

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.3, random_state=42)

# Convert the training and testing sets into input and labels
train_input = X_train
test_input = X_test

# Initialize the StandardScaler
scaler = StandardScaler()

# Fit the scaler on the training data and transform both the training and testing data
train_input = scaler.fit_transform(train_input)
test_input = scaler.transform(test_input)


# One-hot encode the target labels
encoder = OneHotEncoder()
train_labels = encoder.fit_transform(y_train.reshape(-1, 1)).toarray()
test_labels = encoder.transform(y_test.reshape(-1, 1)).toarray()


In [84]:
mlp = MLP(4, layer_dims = [64, 64, 3], activationfuncs=[ "softmax", "softmax"], learning_rate=0.01)

In [85]:
epoch=500
batch_size = 1
for i in range(epoch):
    outs = []
    for j in range(0,len(train_input), batch_size):
        x = train_input[j:j+batch_size]
        y = train_labels[j:j+batch_size]
        out = mlp.feedforward(x.T)
        mlp.backpropagation(y.T, out)
        outs.append(out)
    if i%10 == 0:
        print("mse loss : ", mlp.mse_loss(train_labels, np.array(outs).reshape([105,3])))

mse loss :  0.2737785298333683
mse loss :  0.25843454473583155
mse loss :  0.24153953360680444
mse loss :  0.22583371001174188
mse loss :  0.21667582322389256
mse loss :  0.2124172241836926
mse loss :  0.20961988925437794
mse loss :  0.20682482082715545
mse loss :  0.203577974051321
mse loss :  0.19966964966375753
mse loss :  0.19492743436363616
mse loss :  0.18918505334559926
mse loss :  0.18231918604493358
mse loss :  0.1743724696036565
mse loss :  0.16572059857786983
mse loss :  0.15705270809562877
mse loss :  0.14903843035331785
mse loss :  0.14201306252573426
mse loss :  0.13597731431065535
mse loss :  0.13076613793462977
mse loss :  0.12618120610996927
mse loss :  0.12204774149049234
mse loss :  0.11822794060761921
mse loss :  0.11461854159534138
mse loss :  0.11114513126229106
mse loss :  0.10775736061141529
mse loss :  0.10442573138277379
mse loss :  0.10113926436427385
mse loss :  0.09790288397839032
mse loss :  0.0947334730302451
mse loss :  0.09165430141099834
mse loss :  0.

In [86]:
outs= []
for input, label in zip(train_input, y_train):
    out = mlp.feedforward(input.reshape(4,1))
    outs.append(np.argmax(out))

In [87]:
np.array(outs).transpose()

array([1, 2, 2, 1, 2, 1, 2, 1, 0, 2, 1, 0, 0, 0, 1, 2, 0, 0, 0, 1, 0, 1,
       2, 0, 1, 2, 0, 2, 2, 1, 1, 2, 1, 0, 1, 2, 0, 0, 1, 2, 0, 2, 0, 0,
       2, 1, 2, 2, 1, 2, 1, 0, 0, 1, 2, 0, 0, 0, 1, 2, 0, 2, 2, 0, 1, 2,
       2, 2, 2, 0, 2, 1, 2, 1, 1, 1, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0,
       2, 0, 2, 2, 2, 1, 2, 1, 1, 2, 2, 0, 1, 1, 0, 1, 2], dtype=int64)

In [88]:
y_train-outs

array([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0, -1,  0,  0,  0,  0, -1,  0,  0, -1,  1,  0,  0,
        0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, -1,  0, -1,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0, -1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,
        0,  0,  0], dtype=int64)

In [89]:
outs= []
for input, label in zip(test_input, y_test):
    out = mlp.feedforward(input.reshape(4,1))
    # print(out)
    outs.append(np.argmax(out))


y_test-outs

array([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, -1,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0, -1,  0,  0,  0,  0,  0, -1,  0,  0], dtype=int64)

In [101]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split

# Load the wine dataset
data = load_wine()
X = data.data   # Features (inputs)
y = data.target # Target (outputs)

# Split the dataset into a training set and a test set
# test_size=0.2 means 20% of data is used for testing and 80% for training
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Now, X_train and y_train are the training inputs and outputs,
# and X_test and y_test are the testing inputs and outputs.
train_input = X_train
test_input = X_test

# Initialize the StandardScaler
scaler = StandardScaler()

# Fit the scaler on the training data and transform both the training and testing data
train_input = scaler.fit_transform(train_input)
test_input = scaler.transform(test_input)

# One-hot encode the target labels
encoder = OneHotEncoder()
train_labels = encoder.fit_transform(y_train.reshape(-1, 1)).toarray()
test_labels = encoder.transform(y_test.reshape(-1, 1)).toarray()

print(train_labels.shape)

epoch=100

mlp = MLP(13, layer_dims = [128, 64, 3], activationfuncs=["softmax", "softmax"], learning_rate=0.05)

for i in range(epoch):
    outs = []
    for x, y in zip(train_input, train_labels):
        out = mlp.feedforward(x.reshape(13,1))
        mlp.backpropagation(y.reshape(3,1), out)
        outs.append(out)
    if i%10 == 0:
        print("mse loss : ", mlp.mse_loss(train_labels, np.array(outs).reshape([142,3])))

outs= []
for input, label in zip(train_input, y_train):
    out = mlp.feedforward(input.reshape(13,1))
    # print(out)
    outs.append(np.argmax(out))

(142, 3)
mse loss :  0.24753575179230217
mse loss :  0.21404413511529988
mse loss :  0.1993759058576027
mse loss :  0.1557965388489795
mse loss :  0.09660991836559063
mse loss :  0.054919626288035255
mse loss :  0.03678352989475689
mse loss :  0.028899243251749515
mse loss :  0.024714640582122378
mse loss :  0.02188802930072155


In [102]:
np.array(outs).transpose()

array([2, 2, 1, 2, 0, 1, 1, 1, 2, 0, 1, 1, 2, 0, 1, 0, 0, 2, 2, 1, 1, 0,
       1, 0, 2, 1, 1, 2, 0, 0, 0, 2, 0, 0, 1, 2, 1, 0, 2, 1, 0, 2, 1, 1,
       0, 1, 0, 0, 1, 0, 0, 2, 1, 1, 1, 0, 1, 1, 1, 2, 2, 0, 1, 2, 2, 1,
       1, 0, 1, 2, 2, 1, 2, 1, 1, 1, 0, 0, 2, 0, 2, 0, 0, 1, 1, 0, 0, 0,
       1, 1, 1, 2, 1, 1, 1, 2, 2, 1, 0, 0, 1, 2, 2, 0, 1, 2, 2, 2, 2, 1,
       0, 1, 1, 2, 0, 0, 1, 1, 0, 2, 1, 0, 2, 2, 0, 0, 2, 2, 2, 1, 1, 1,
       1, 1, 1, 2, 0, 1, 1, 0, 1, 1], dtype=int64)

In [103]:
y_train-outs

array([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0, -1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0, -1,  0,  0,  0,  0, -1,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0], dtype=int64)

In [104]:
outs= []
for input, label in zip(test_input, y_test):
    out = mlp.feedforward(input.reshape(13,1))
    # print(out)
    outs.append(np.argmax(out))


y_test-outs

array([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, -1,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0], dtype=int64)

In [105]:
def batch_normalization(batch, gamma, beta, epsilon=1e-5):
    # Calculate mean and variance
    mean = np.mean(batch, axis=0)
    variance = np.var(batch, axis=0)
    
    # Normalize
    batch_normalized = (batch - mean) / np.sqrt(variance + epsilon)
    
    # Scale and shift
    batch_scaled_and_shifted = gamma * batch_normalized + beta
    
    return batch_scaled_and_shifted