# Libraries

In [101]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# Dataset Loading

In [102]:
iris = load_iris()
X = iris.data  #features
y = iris.target  #target labels 

inputSize = 4 # 4features 
outputSize = len(np.unique(y))  #number of unique classes

# Basic functions from scratch

In [103]:
def addForward(x, y):
    return x + y, 1, 1

def addBackward(usd, lx, ly):
    return usd * lx, usd * ly, None

def multiplyForward(x, y):
    return x * y  

def multiplyBackward(usd, lx, ly):
    return usd * ly, usd * lx, None

def subtractForward(x, y):
    return x - y, 1, -1

def subtractBackward(usd, lx, ly):
    return usd * lx, usd * (-ly), None

# Affine Layer Class

In [104]:
class AffineLayer:
    def __init__(self, inputSize, outputSize):
        outputSize = len(np.unique(y))
        self.weights = np.random.randn(outputSize, inputSize) * np.sqrt(1/inputSize)
        self.bias = np.zeros((outputSize, 1))

    def forward(self, x):
        weightedSum = np.dot(x, self.weights.T)
        biasReshaped = self.bias.reshape(1, -1)
        output, derivativeBias, derivative_y = addForward(weightedSum, biasReshaped)
        return output

    def backward(self, X, derivative):
        #for derivatives of loss wrt weights
        derivativeWeights = np.zeros_like(self.weights)  
        for sampleIndex, x_i in enumerate(X):
            #per-sample derivative for each weight
            derivativeWeightsSample = np.dot(derivative[sampleIndex].reshape(-1, 1), x_i.reshape(1, -1))
            derivativeWeights += derivativeWeightsSample

        #average derivatives across samples (for stochastic gradient descent)
        #derivativeWeights /= len(X)

        #calculate derivative w.r.t input (for next layer)
        derivativeInput = np.dot(derivative, self.weights)
        return derivativeWeights, derivativeInput 

# Model Architecture

In [105]:
class SimpleANN:
    def __init__(self, inputSize, outputSize):
        self.layer1 = AffineLayer(inputSize, outputSize)

    def forward(self, x):
        output = self.layer1.forward(x)
        return output

    def backward(self, x, upstreamDerivative):
        derivativeWeights1, derivativeInput = self.layer1.backward(x, upstreamDerivative)
        return derivativeWeights1, None

# Loss Function

In [106]:
def SVMloss(logits, y):
    numSamples = logits.shape[0]
    #convert target labels to one-hot encoded format
    oneHotEncoding = np.zeros_like(logits)
    oneHotEncoding[np.arange(numSamples), y] = 1
    #calculate differences
    differences = logits - oneHotEncoding
    differences[differences < 0] = 0
    loss = np.sum(differences)
    differences[differences > 0] = 1
    differences[np.arange(numSamples), y] = -1 * np.sum(differences, axis=1)
    return loss, differences

# Load Model

In [107]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = SimpleANN(inputSize, outputSize)
learningRate = 0.01  

# Training Loop

In [108]:
for epoch in range(100):  
    #forward pass on the training data
    output_train = model.forward(X_train)
    loss_train, derivative = SVMloss(output_train, y_train)
    
    #backward pass and weight updation on training data
    derivativeWeights1, derivativeBias1 = model.backward(X_train, derivative)
    derivativeWeights1_Transposed = derivativeWeights1.T
    derivativeWeights1Reshaped = derivativeWeights1_Transposed.reshape(model.layer1.weights.shape)  
    model.layer1.weights -= learningRate * derivativeWeights1Reshaped
    
    if derivativeBias1 is not None:
        model.layer1.bias -= learningRate * derivativeBias1
        
    #forward pass on the testing data
    output_test = model.forward(X_test)
    loss_test, _ = SVMloss(output_test, y_test)
    
    #training accuracy
    predictions_train = np.argmax(output_train, axis=1)
    accuracy_train = np.mean(predictions_train == y_train)
    
    #testing accuracy
    predictions_test = np.argmax(output_test, axis=1)
    accuracy_test = np.mean(predictions_test == y_test)

    print(f"Epoch {epoch+1}, Training Loss: {loss_train:.3f}, Training Accuracy: {accuracy_train:.2f}, Testing Loss: {loss_test:.3f}, Testing Accuracy: {accuracy_test:.2f}")




Epoch 1, Training Loss: 970.386, Training Accuracy: 0.33, Testing Loss: 1211.723, Testing Accuracy: 0.40
Epoch 2, Training Loss: 4733.012, Training Accuracy: 0.53, Testing Loss: 2975.483, Testing Accuracy: 0.50
Epoch 3, Training Loss: 11579.422, Training Accuracy: 0.58, Testing Loss: 4739.244, Testing Accuracy: 0.47
Epoch 4, Training Loss: 18425.832, Training Accuracy: 0.47, Testing Loss: 6503.005, Testing Accuracy: 0.30
Epoch 5, Training Loss: 25272.426, Training Accuracy: 0.39, Testing Loss: 8260.692, Testing Accuracy: 0.23
Epoch 6, Training Loss: 32096.344, Training Accuracy: 0.33, Testing Loss: 10018.380, Testing Accuracy: 0.20
Epoch 7, Training Loss: 38920.262, Training Accuracy: 0.33, Testing Loss: 11776.067, Testing Accuracy: 0.20
Epoch 8, Training Loss: 45744.181, Training Accuracy: 0.31, Testing Loss: 13533.754, Testing Accuracy: 0.20
Epoch 9, Training Loss: 52568.099, Training Accuracy: 0.30, Testing Loss: 15291.441, Testing Accuracy: 0.20
Epoch 10, Training Loss: 59392.017, 

Epoch 98, Training Loss: 659896.828, Training Accuracy: 0.30, Testing Loss: 171725.602, Testing Accuracy: 0.30
Epoch 99, Training Loss: 666720.746, Training Accuracy: 0.30, Testing Loss: 173483.289, Testing Accuracy: 0.30
Epoch 100, Training Loss: 673544.664, Training Accuracy: 0.30, Testing Loss: 175240.976, Testing Accuracy: 0.30


---

**Note:** I have tried my best to provide accurate results in this notebook. However, these results may not be entirely accurate, and contributions or corrections are encouraged. Thank you!
