In [280]:
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import os
import numpy as np

In [281]:
# Set random seeds for reproducibility
torch.manual_seed(42)

<torch._C.Generator at 0x12b4dd670>

In [282]:
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print("Using Device: ", device )

Using Device:  mps


In [283]:
df = pd.read_csv('fmnist_small.csv')
df.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,9,0,0,0,0,0,0,0,0,0,...,0,7,0,50,205,196,213,165,0,0
1,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,1,0,0,0,...,142,142,142,21,0,3,0,0,0,0
3,8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,8,0,0,0,0,0,0,0,0,0,...,213,203,174,151,188,10,0,0,0,0


In [284]:
# Train and Test Splits
X = df.iloc[:, 1:].values # all row , starting from the index 1
y = df.iloc[:,0].values # lable data hi liya means first column chayies tha

In [285]:
# Convert ONCE with correct dtype
X = torch.from_numpy(X).float()   # ✅ float for inputs
y = torch.from_numpy(y).long()  

In [286]:
X.dtype

torch.float32

In [287]:
X.shape

torch.Size([6000, 784])

In [288]:
X = X / 255.0

In [289]:
class Linear:
    def __init__(self, n_input, n_neurons):
        self.weights =  torch.rand(n_input, n_neurons, dtype=torch.float32) * 0.01 
        self.weights.requires_grad=True
        self.bias = torch.zeros((1, n_neurons), dtype=torch.float32)
        self.bias.requires_grad=True

    def forward(self, input):
        self.output = torch.matmul(input, self.weights) + self.bias

In [290]:
class Activation_ReLU:
    def forward(self, input):
        self.output = torch.maximum(input, torch.tensor(0.0, device=input.device))

In [291]:
class Activation_Softmax:
     def forward(self, inputs):
        # inputs shape: (batch_size, num_classes)

        # Step 1: numerical stability
        exp_values = torch.exp(inputs - torch.max(inputs, dim=1, keepdim=True).values)

        # Step 2: normalize
        probabilities = exp_values / torch.sum(exp_values, dim=1, keepdim=True)

        self.output = probabilities

In [292]:
class Loss_CategoricalCrossEntropy:
    def forward(self, y_pred, y_true):
        # y_pred shape: (batch_size, num_classes)
        # y_true shape: (batch_size,)

        batch_size = y_pred.shape[0]

        # Prevent log(0)
        y_pred_clipped = torch.clamp(y_pred, 1e-7, 1 - 1e-7)

        # Get probability of correct class
        correct_confidences = y_pred_clipped[range(batch_size), y_true]

        # Compute loss
        negative_log_likelihoods = -torch.log(correct_confidences)

        # Mean loss
        loss = torch.mean(negative_log_likelihoods)

        return loss


In [293]:
linear1 = Linear(784, 64)
activation1 = Activation_ReLU()
# linear2 = Linear(64,32)
# activation2 = Activation_ReLU()
linear3 = Linear(64, 16)
activation3 = Activation_ReLU()
output_layer = Linear(16, 10)
softmax = Activation_Softmax()
loss_fn = Loss_CategoricalCrossEntropy()

# # 1st layer
# linear1.forward(X)
# activation1.forward(linear1.output)
# #print(activation1.output[:1])

# # 2nd layer
# linear2.forward(activation1.output)
# activation2.forward(linear2.output)
# #print(activation2.output[:1])

# # 3rd layer 
# linear3.forward(activation2.output)
# activation3.forward(linear3.output)
# #print(activation3.output[:1])

# # output layer
# output_layer.forward(activation3.output)
# softmax.forward(output_layer.output)
#print(softmax.output[:1])

# loss = loss_fn.forward(softmax.output, y)
# print("Loss", loss)

# -------------------------
# NEW: collect parameters + optimizer + epochs
# -------------------------
params = [
    linear1.weights , linear1.bias, 
    # linear2.weights, linear2.bias,
    linear3.weights, linear3.bias,
    output_layer.weights, output_layer.bias
]

optimizer = torch.optim.SGD(params, lr=0.1)

epochs = 1000
for epoch in range(epochs):
       # 1) clear old gradients
    optimizer.zero_grad()

    # ---- forward pass (same as yours) ----
    linear1.forward(X)
    activation1.forward(linear1.output)

    # linear2.forward(activation1.output)
    # activation2.forward(linear2.output)

    linear3.forward(activation1.output)
    activation3.forward(linear3.output)

    output_layer.forward(activation3.output)
    softmax.forward(output_layer.output)

    loss = loss_fn.forward(softmax.output, y)

    # 2) backprop (PyTorch autograd)
    loss.backward()

    # 3) update weights
    optimizer.step()

    print(f"Epoch {epoch+1}/{epochs}  Loss: {loss.item():.4f}")


Epoch 1/1000  Loss: 2.3020
Epoch 2/1000  Loss: 2.3003
Epoch 3/1000  Loss: 2.2987
Epoch 4/1000  Loss: 2.2970
Epoch 5/1000  Loss: 2.2953
Epoch 6/1000  Loss: 2.2936
Epoch 7/1000  Loss: 2.2918
Epoch 8/1000  Loss: 2.2899
Epoch 9/1000  Loss: 2.2880
Epoch 10/1000  Loss: 2.2859
Epoch 11/1000  Loss: 2.2838
Epoch 12/1000  Loss: 2.2817
Epoch 13/1000  Loss: 2.2795
Epoch 14/1000  Loss: 2.2773
Epoch 15/1000  Loss: 2.2751
Epoch 16/1000  Loss: 2.2730
Epoch 17/1000  Loss: 2.2709
Epoch 18/1000  Loss: 2.2690
Epoch 19/1000  Loss: 2.2671
Epoch 20/1000  Loss: 2.2653
Epoch 21/1000  Loss: 2.2637
Epoch 22/1000  Loss: 2.2622
Epoch 23/1000  Loss: 2.2607
Epoch 24/1000  Loss: 2.2594
Epoch 25/1000  Loss: 2.2581
Epoch 26/1000  Loss: 2.2568
Epoch 27/1000  Loss: 2.2556
Epoch 28/1000  Loss: 2.2545
Epoch 29/1000  Loss: 2.2533
Epoch 30/1000  Loss: 2.2522
Epoch 31/1000  Loss: 2.2511
Epoch 32/1000  Loss: 2.2500
Epoch 33/1000  Loss: 2.2489
Epoch 34/1000  Loss: 2.2478
Epoch 35/1000  Loss: 2.2466
Epoch 36/1000  Loss: 2.2455
E