In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
import struct

def load_images(file_path):
    with open(file_path, 'rb') as f:
        # Read header information
        magic, num, rows, cols = struct.unpack('>IIII', f.read(16))
        # Read image data
        images = np.frombuffer(f.read(), dtype=np.uint8)
        images = images.reshape(num, rows * cols)  # Flatten to (num_images, 784)
        return images / 255.0  # Normalize pixel values to [0, 1]

def load_labels(file_path):
    with open(file_path, 'rb') as f:
        # Read header information
        magic, num = struct.unpack('>II', f.read(8))
        # Read label data
        labels = np.frombuffer(f.read(), dtype=np.uint8)
        return labels
# Load the training and testing data
X_train = load_images('/kaggle/input/mnist-dataset/train-images-idx3-ubyte/train-images-idx3-ubyte')
y_train = load_labels('/kaggle/input/mnist-dataset/train-labels-idx1-ubyte/train-labels-idx1-ubyte')
X_test = load_images('/kaggle/input/mnist-dataset/t10k-images-idx3-ubyte/t10k-images-idx3-ubyte')
y_test = load_labels('/kaggle/input/mnist-dataset/t10k-labels-idx1-ubyte/t10k-labels-idx1-ubyte')

In [None]:
# One-hot encode the labels
def one_hot_encode(labels, num_classes=10):
    one_hot = np.zeros((labels.size, num_classes))
    one_hot[np.arange(labels.size), labels] = 1
    return one_hot

y_train = one_hot_encode(y_train)
y_test = one_hot_encode(y_test)

# Split training set into training and validation
def train_val_split(X, y, val_size=0.2):
    split_idx = int((1 - val_size) * X.shape[0])
    return X[:split_idx], X[split_idx:], y[:split_idx], y[split_idx:]

X_train, X_val, y_train, y_val = train_val_split(X_train, y_train)


In [None]:
def initialize_parameters(input_size, hidden_size, output_size):
    np.random.seed(42)
    W1 = np.random.randn(input_size, hidden_size) * 0.01
    b1 = np.zeros((1, hidden_size))
    W2 = np.random.randn(hidden_size, output_size) * 0.01
    b2 = np.zeros((1, output_size))
    return W1, b1, W2, b2

Forward

In [None]:
def relu(Z):
    return np.maximum(0, Z)

def relu_derivative(Z):
    return Z > 0

def softmax(Z):
    exp_Z = np.exp(Z - np.max(Z, axis=1, keepdims=True))
    return exp_Z / np.sum(exp_Z, axis=1, keepdims=True)

def forward_propagation(X, W1, b1, W2, b2):
    Z1 = np.dot(X, W1) + b1
    A1 = relu(Z1)
    Z2 = np.dot(A1, W2) + b2
    A2 = softmax(Z2)
    return Z1, A1, Z2, A2

Back propagation

In [None]:
def backward_propagation(X, y, Z1, A1, Z2, A2, W1, W2):
    m = X.shape[0]
    dZ2 = A2 - y
    dW2 = np.dot(A1.T, dZ2) / m
    db2 = np.sum(dZ2, axis=0, keepdims=True) / m
    dA1 = np.dot(dZ2, W2.T)
    dZ1 = dA1 * relu_derivative(Z1)
    dW1 = np.dot(X.T, dZ1) / m
    db1 = np.sum(dZ1, axis=0, keepdims=True) / m
    return dW1, db1, dW2, db2

train fn

In [None]:
import matplotlib.pyplot as plt

def train(X_train, y_train, X_val, y_val, input_size, hidden_size, output_size, epochs, lr):
    W1, b1, W2, b2 = initialize_parameters(input_size, hidden_size, output_size)
    
    train_losses = []
    val_losses = []

    for epoch in range(epochs):
        Z1, A1, Z2, A2 = forward_propagation(X_train, W1, b1, W2, b2)
        train_loss = -np.mean(np.sum(y_train * np.log(A2 + 1e-9), axis=1))
        train_losses.append(train_loss)

        dW1, db1, dW2, db2 = backward_propagation(X_train, y_train, Z1, A1, Z2, A2, W1, W2)
        W1 -= lr * dW1
        b1 -= lr * db1
        W2 -= lr * dW2
        b2 -= lr * db2

        if epoch % 10 == 0:
            _, _, _, A2_val = forward_propagation(X_val, W1, b1, W2, b2)
            val_loss = -np.mean(np.sum(y_val * np.log(A2_val + 1e-9), axis=1))
            val_losses.append(val_loss)
            print(f"Epoch {epoch}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}")
    
    return W1, b1, W2, b2, train_losses, val_losses


rotate fn

In [None]:
from scipy.ndimage import rotate

def augment_with_rotations(X, y, angles=[90, 180, 270]):
    augmented_X = [X]
    augmented_y = [y]
    for angle in angles:
        rotated_images = np.array([rotate(img.reshape(28, 28), angle, reshape=False).flatten() for img in X])
        augmented_X.append(rotated_images)
        augmented_y.append(y)
    return np.vstack(augmented_X), np.vstack(augmented_y)

In [None]:
# Augment training data with rotated images
X_train_augmented, y_train_augmented = augment_with_rotations(X_train, y_train)
# Augment Validation data with rotated images
X_val_augmented, y_val_augmented = augment_with_rotations(X_val, y_val)
# Augment testing data with rotated images
X_test_augmented, y_test_augmented = augment_with_rotations(X_test, y_test)

print(f"Original Training Data Shape: {X_train.shape}")
print(f"Augmented Training Data Shape: {X_train_augmented.shape}\n")
print(f"Original Validation Data Shape: {X_val.shape}")
print(f"Augmented Validation Data Shape: {X_val_augmented.shape}\n")
print(f"Original testing Data Shape: {X_test.shape}")
print(f"Augmented testing Data Shape: {X_test_augmented.shape}")

train

In [None]:
input_size = 784  # 28x28 images flattened
hidden_size = 128
output_size = 10
epochs = 2000 * 10
learning_rate = 0.02

In [None]:
# Train the model with augmented data
W1, b1, W2, b2, train_losses, val_losses = train(
    X_train_augmented, y_train_augmented, X_val, y_val, input_size, hidden_size, output_size, epochs, learning_rate)

# Plot loss curves
plt.plot(range(epochs), train_losses, label="Training Loss")
plt.plot(range(0, epochs, 10), val_losses, label="Validation Loss", linestyle='--')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss (Augmented Data)')
plt.legend()
plt.grid(True)
plt.show()

test

In [None]:
def predict(X, W1, b1, W2, b2):
    _, _, _, A2 = forward_propagation(X, W1, b1, W2, b2)
    return np.argmax(A2, axis=1)

# Make predictions on test data
y_test_pred = predict(X_test_augmented, W1, b1, W2, b2)
y_test_true = np.argmax(y_test_augmented, axis=1)
accuracy = np.mean(y_test_pred == y_test_true)
print(f"Test Accuracy: {accuracy:.2f}")


In [None]:
def plot_predictions(X, y_true, y_pred, num_images=5):
    ctr = num_images
    col = 10
    while ctr>0:
        l=num_images-ctr
        plt.figure(figsize=(num_images*2, col))
        for i in range(l,l+col):
            plt.subplot(1, num_images, i+1)
            plt.imshow(X[i].reshape(28, 28), cmap='gray')
            plt.title(f"True: {np.argmax(y_true[i])}, Pred: {y_pred[i]}")
            plt.axis('off')
        plt.show()
        ctr-=col

# Plot sample predictions
plot_predictions(X_test, y_test, y_test_pred, num_images=100)

save submission

In [None]:
submission = pd.DataFrame({'ImageId': np.arange(1, len(y_test_pred) + 1), 'Label': y_test_pred})
submission.to_csv('submission.csv', index=False)