# Importing Required Modules

In [13]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Loading and Analyzing Data

In [14]:
df = pd.read_csv("train.csv")
X_train = df.drop("label", axis=1).values  
y_train = df["label"].values            

indices = np.arange(len(X_train))
np.random.seed(42)
np.random.shuffle(indices)

split_idx = int(len(X_train) * 0.8)

train_idx, val_idx = indices[:split_idx], indices[split_idx:]
X_train, X_val = X_train[train_idx], X_train[val_idx]
y_train, y_val = y_train[train_idx], y_train[val_idx]

num_labels = 10

# df_test = pd.read_csv("test.csv")
# X_test = df_test.values

# df_test_values = pd.read_csv("sample_submission.csv")
# y_test = df_test_values.drop("ImageId", axis = 1).values

# One-Hot Encoding

In [15]:
def one_hot_encode(y, labels):
    one_hot = np.zeros((y.shape[0],labels))
    for i in range(len(y)):
        one_hot[i][y[i]] = 1
    return one_hot

y_train_encoded = one_hot_encode(y_train, num_labels)
y_val_encoded = one_hot_encode(y_val, num_labels)

# Feature Scaling

In [16]:
X_train = X_train/255
X_val = X_val/255

# Defining Activation and Loss Functions

In [17]:
def softmax(x):
    exp = np.exp(x - np.max(x, axis=-1, keepdims=True))  
    return exp / np.sum(exp, axis=-1, keepdims=True)     

def categorical_cross_entropy(y_true, y_pred, epsilon=1e-12):
    y_pred = np.clip(y_pred, epsilon, 1. - epsilon) 
    loss = -np.sum(y_true * np.log(y_pred), axis=1)  
    return np.mean(loss)  
    
def cross_entropy(predictions, labels):
    return -np.sum(labels*np.log(predictions + 1e-9)) / predictions.shape[0]

def cross_entropy_derivative(predictions, labels): 
    return (predictions - labels) / predictions.shape[0]

# Creating custom structures for neurons and layers

In [18]:
class Dense:
    def __init__(self, input_size, neurons):
        self.weights = np.random.randn(input_size, neurons) * np.sqrt(2.0 / input_size)                                              
        self.biases = np.zeros((1, neurons))
        
    def forward(self, x):
        self.input = x
        return np.matmul(x,self.weights) + self.biases
        
    def backward(self, grad_output, learning_rate):

        grad_input = np.dot(grad_output, self.weights.T)
        grad_weights = np.dot(self.input.T, grad_output)
        grad_biases = np.sum(grad_output, axis=0, keepdims=True)
        self.weights -= learning_rate * grad_weights
        self.biases -= learning_rate * grad_biases                                                 
        return grad_input

class ReLU:               
    def forward(self, x):
                                                                             
        self.input = x
        self.output = np.maximum(0, x)                                                        
        return self.output                                                                                   
    
    def backward(self, grad_output, learning_rate=None):

        grad_input = grad_output * (self.input > 0)
        return grad_input
        

In [19]:
class Model:
    def __init__(self,):
        self.layers = [Dense(784,512), ReLU(), Dense(512,256), ReLU(), Dense(256,10)]

    def forward(self,x):
        for layer in self.layers:
            x = layer.forward(x)
        x = softmax(x)
        return x

    def backward(self, predictions, labels, learning_rate):
        dx = cross_entropy_derivative(predictions, labels)
        for layer in self.layers[::-1]:
            dx = layer.backward(dx, learning_rate)
        return dx
        

# Defining the validation-test function

In [20]:
def test_model(model, X, y):
    preds = np.argmax(model.forward(X), axis=1)
    accuracy = np.mean(preds == y)
    print(f"✅ Validation Accuracy: {accuracy:.4f}")
    return preds, accuracy

# Building a Network and Training it

In [29]:
epochs = 100
alpha = 0.1
batch_size = 128

model = Model()
for epoch in range(epochs):
    indices = np.arange(X_train.shape[0])
    np.random.shuffle(indices)
    X_train_shuffled = X_train[indices]
    y_train_shuffled = y_train_encoded[indices]

    total_loss = 0
    num_batches = int(np.ceil(X_train.shape[0] / batch_size))
    for i in range(num_batches):
        start = i * batch_size
        end = min(start + batch_size, X_train.shape[0])

        X_batch = X_train_shuffled[start:end]
        y_batch = y_train_shuffled[start:end]

        # Forward
        output = model.forward(X_batch)
        loss = categorical_cross_entropy(y_batch, output)
        total_loss += loss

        # Backward
        model.backward(output, y_batch, alpha)

    avg_loss = total_loss / num_batches
    print(f"Epoch {epoch+1}, Loss: {avg_loss:.4f}")

    if (epoch + 1) % 10 == 0:
        print("🧪 Evaluating on validation set...")
        test_model(model, X_val, y_val)


Epoch 1, Loss: 0.4635
Epoch 2, Loss: 0.2258
Epoch 3, Loss: 0.1698
Epoch 4, Loss: 0.1371
Epoch 5, Loss: 0.1125
Epoch 6, Loss: 0.0956
Epoch 7, Loss: 0.0811
Epoch 8, Loss: 0.0703
Epoch 9, Loss: 0.0601
Epoch 10, Loss: 0.0523
🧪 Evaluating on validation set...
✅ Validation Accuracy: 0.9714
Epoch 11, Loss: 0.0453
Epoch 12, Loss: 0.0395
Epoch 13, Loss: 0.0350
Epoch 14, Loss: 0.0304
Epoch 15, Loss: 0.0266
Epoch 16, Loss: 0.0236
Epoch 17, Loss: 0.0204
Epoch 18, Loss: 0.0182
Epoch 19, Loss: 0.0160
Epoch 20, Loss: 0.0142
🧪 Evaluating on validation set...
✅ Validation Accuracy: 0.9740
Epoch 21, Loss: 0.0124
Epoch 22, Loss: 0.0109
Epoch 23, Loss: 0.0099
Epoch 24, Loss: 0.0090
Epoch 25, Loss: 0.0080
Epoch 26, Loss: 0.0072
Epoch 27, Loss: 0.0065
Epoch 28, Loss: 0.0060
Epoch 29, Loss: 0.0054
Epoch 30, Loss: 0.0051
🧪 Evaluating on validation set...
✅ Validation Accuracy: 0.9764
Epoch 31, Loss: 0.0047
Epoch 32, Loss: 0.0043
Epoch 33, Loss: 0.0041
Epoch 34, Loss: 0.0038
Epoch 35, Loss: 0.0036
Epoch 36, Lo

# Predicting Outputs for Test Dataset

In [32]:
X_test = pd.read_csv("test.csv").values/255
X_test

def predict(model, X):
    output = model.forward(X)
    return np.argmax(output, axis=1)

predictions = predict(model, X_test)

# Create submission dataframe
submission = pd.DataFrame({
    "ImageId": np.arange(1, len(predictions) + 1),
    "Label": predictions
})

# Save to CSV
submission.to_csv("submission.csv", index=False)
print("📁 Predictions saved to 'submission.csv'")


📁 Predictions saved to 'submission.csv'
