In [2]:
import numpy as np
import os

In [3]:
train_image_path = "/home/sifat/AI/data/train_image"
train_label_path = "/home/sifat/AI/data/train_label"
test_image_path = "/home/sifat/AI/data/test_image"
test_label_path = "/home/sifat/AI/data/test_label"

In [4]:
# ---- Load MNIST ----
def load_images(path):
    with open(path, "rb") as f:
        data = f.read()
    magic, num, row, col = np.frombuffer(data[:16], dtype=">i4")
    if magic != 2051: raise ValueError("Invalid image file")
    imgs = np.frombuffer(data[16:], dtype=np.uint8).reshape(num, row*col)
    return imgs

def load_labels(path):
    with open(path, "rb") as f:
        data = f.read()
    magic, num = np.frombuffer(data[:8], dtype=">i4")
    if magic != 2049: raise ValueError("Invalid label file")
    labels = np.frombuffer(data[8:], dtype=np.uint8)
    return labels

In [5]:
X_train_raw, Y_train_raw = load_images(train_image_path), load_labels(train_label_path)
X_test_raw, Y_test_raw = load_images(test_image_path), load_labels(test_label_path)

In [6]:
def one_hot(y, num_classes=10):
    out = np.zeros((y.shape[0], num_classes))
    out[np.arange(y.shape[0]), y] = 1
    return out

In [7]:
X_train, X_test = X_train_raw.astype(np.float32)/255.0, X_test_raw.astype(np.float32)/255.0
Y_train, Y_test = one_hot(Y_train_raw), one_hot(Y_test_raw)
layer_sizes = [784, 728, 512, 256, 128, 64, 32, 10]
learning_rate = 0.03

In [None]:
weights = []
biases = []
for i in range(len(layer_sizes)-1):
    w_file = f"w{i+1}.npy"
    b_file = f"b{i+1}.npy"
    if os.path.exists(w_file):
        weights.append(np.load(w_file))
        biases.append(np.load(b_file))
    else:
        weights.append(np.random.randn(layer_sizes[i], layer_sizes[i+1]) * np.sqrt(2/layer_sizes[i]))
        biases.append(np.zeros(layer_sizes[i+1]))

In [9]:
# ---- Activation functions ----
def relu(x): return np.maximum(0, x)
def relu_deriv(x): return (x > 0).astype(float)
def softmax(x):
    x = x - np.max(x, axis=1, keepdims=True)
    e = np.exp(x)
    return e / np.sum(e, axis=1, keepdims=True)

# ---- Forward ----
def forward(x):
    activations = [x]
    pre_activations = []
    for i in range(len(weights)-1):
        z = activations[-1] @ weights[i] + biases[i]
        a = relu(z)
        pre_activations.append(z)
        activations.append(a)
    # Output layer
    z = activations[-1] @ weights[-1] + biases[-1]
    a = softmax(z)
    pre_activations.append(z)
    activations.append(a)
    return pre_activations, activations

# ---- Loss ----
def cross_entropy(y_true, y_pred):
    eps = 1e-15
    y_pred = np.clip(y_pred, eps, 1-eps)
    return -np.mean(np.sum(y_true*np.log(y_pred), axis=1))

# ---- Backprop ----
def train_step(x, y, lr=learning_rate):
    global weights, biases
    pre_act, act = forward(x)
    loss = cross_entropy(y, act[-1])

    # Gradients
    delta = (act[-1] - y) / x.shape[0]
    grad_w = []
    grad_b = []

    for i in reversed(range(len(weights))):
        grad_w.insert(0, act[i].T @ delta)
        grad_b.insert(0, np.sum(delta, axis=0))
        if i != 0:
            delta = (delta @ weights[i].T) * relu_deriv(pre_act[i-1])

    # Update weights
    for i in range(len(weights)):
        weights[i] -= lr * grad_w[i]
        biases[i] -= lr * grad_b[i]

    return loss

# ---- Accuracy ----
def accuracy(x, y):
    _, act = forward(x)
    preds = np.argmax(act[-1], axis=1)
    labels = np.argmax(y, axis=1)
    return np.mean(preds == labels)

# ---- Save model ----
def save_model():
    for i in range(len(weights)):
        np.save(f"Program/w{i+1}.npy", weights[i])
        np.save(f"Program/b{i+1}.npy", biases[i])

In [14]:
# ---- Training on full MNIST ----
epochs = 10
batch_size = 64

for e in range(epochs):
    # Shuffle data
    idx = np.random.permutation(X_train.shape[0])
    X_train_shuffled = X_train[idx]
    Y_train_shuffled = Y_train[idx]

    for i in range(0, X_train.shape[0], batch_size):
        X_batch = X_train_shuffled[i:i+batch_size]
        Y_batch = Y_train_shuffled[i:i+batch_size]
        loss = train_step(X_batch, Y_batch, lr=0.03)

    acc = accuracy(X_train, Y_train)
    print(f"Epoch {e+1}/{epochs}, Loss: {loss:.4f}, Accuracy: {acc*100:.2f}%")

# ---- Save all weights and biases after training ----
for i in range(len(weights)):
    np.save(f"w{i+1}.npy", weights[i])
    np.save(f"b{i+1}.npy", biases[i])

print("Training finished and weights saved! 🔥")


Epoch 1/10, Loss: 0.0028, Accuracy: 99.99%
Epoch 2/10, Loss: 0.0032, Accuracy: 100.00%
Epoch 3/10, Loss: 0.0004, Accuracy: 100.00%
Epoch 4/10, Loss: 0.0003, Accuracy: 100.00%
Epoch 5/10, Loss: 0.0012, Accuracy: 100.00%
Epoch 6/10, Loss: 0.0001, Accuracy: 100.00%
Epoch 7/10, Loss: 0.0005, Accuracy: 100.00%
Epoch 8/10, Loss: 0.0006, Accuracy: 100.00%
Epoch 9/10, Loss: 0.0005, Accuracy: 100.00%
Epoch 10/10, Loss: 0.0000, Accuracy: 100.00%
Training finished and weights saved! 🔥


In [15]:
test_acc = accuracy(X_test, Y_test)
print(f"Test accuracy is: {test_acc*100:.2f}%")

Test accuracy is: 98.14%
