In [1]:
import sys
import os
sys.path.append(os.path.abspath('..'))
from layers import ConvLayer, ReLULayer, MaxPoolLayer, FullyConnectedLayer
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import numpy as np

In [2]:
print("Downloading MNIST dataset...")
mnist = fetch_openml('mnist_784', version=1, as_frame=False)

# Extract data and labels
X = mnist.data 
y = mnist.target.astype(int)

# Reshape the data to 28x28 pixels
X = X.reshape(-1, 28, 28)

# Normalize the data (convert pixel values to the range [0, 1])
X = X / 255.0

# Step 2: Split Data into Train and Test Sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: One-Hot Encode the Labels
# Initialize the OneHotEncoder
encoder = OneHotEncoder()

# Fit-transform the training labels
y_train_onehot = encoder.fit_transform(y_train.reshape(-1, 1))
y_test_onehot = encoder.transform(y_test.reshape(-1, 1))

# Print dataset shapes
print(f"Training data shape: {X_train.shape}, Training labels shape: {y_train_onehot.shape}")
print(f"Test data shape: {X_test.shape}, Test labels shape: {y_test_onehot.shape}")

Downloading MNIST dataset...
Training data shape: (56000, 28, 28), Training labels shape: (56000, 10)
Test data shape: (14000, 28, 28), Test labels shape: (14000, 10)


In [3]:
class CNNModel:
    def __init__(self):
        self.conv = ConvLayer(num_filters=8, filter_size=3)
        self.relu = ReLULayer()
        self.pool = MaxPoolLayer(pool_size=2)
        self.fc = FullyConnectedLayer(input_len=13*13*8, output_len=10)  # Assuming 28x28 input
    
    def forward(self, input):
        out = self.conv.forward(input)
        out = self.relu.forward(out)
        out = self.pool.forward(out)
        out = self.fc.forward(out)
        return out

In [4]:
def softmax_loss(logits, label):
    """
    Computes the softmax and cross-entropy loss
    """
    exp_logits = np.exp(logits - np.max(logits))
    probs = exp_logits / np.sum(exp_logits)
    loss = -np.log(probs[label])
    d_out = probs
    d_out[label] -= 1
    return loss, d_out

In [None]:
def train(model, X_train, y_train, epochs, batch_size, learning_rate):
    num_samples = X_train.shape[0]
    for epoch in range(epochs):
        total_loss = 0
        num_correct = 0
        
        # Shuffle data
        indices = np.arange(num_samples)
        np.random.shuffle(indices)
        X_train = X_train[indices]
        y_train = y_train[indices]
        
        for i in range(0, num_samples, batch_size):
            X_batch = X_train[i:i+batch_size]
            y_batch = y_train[i:i+batch_size]
            
            for j, (image, label) in enumerate(zip(X_batch, y_batch)):
                # Forward pass
                logits = model.forward(image)
                
                # Compute loss and gradient
                loss, d_out = softmax_loss(logits, np.argmax(label))
                total_loss += loss
                
                # Backward pass
                grad = model.fc.backward(d_out, learning_rate)
                grad = model.pool.backward(grad)
                grad = model.relu.backward(grad)
                model.conv.backward(grad, learning_rate)
                
                # Check prediction
                if np.argmax(logits) == np.argmax(label):
                    num_correct += 1
        
        print(f"Epoch {epoch+1}, Loss: {total_loss/num_samples:.3f}, Accuracy: {num_correct/num_samples:.3f}")

In [6]:
def test(model, X_test, y_test):
    num_correct = 0
    for image, label in zip(X_test, y_test):
        logits = model.forward(image)
        if np.argmax(logits) == np.argmax(label):
            num_correct += 1
    print(f"Test Accuracy: {num_correct / len(y_test):.3f}")

In [9]:
model = CNNModel()

# Train the model
train(model, X_train, y_train_onehot, epochs=5, batch_size=32, learning_rate=0.01)


great
great2
great3
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great3
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5
great3
great4
great5
great4
great5
great4
great5
great4
great5
great4
great5


KeyboardInterrupt: 

In [None]:
# Test the model
test(model, X_test, y_test_onehot)

In [None]:
import pickle

with open('cnn_model.pkl', 'wb') as f:
    pickle.dump(model, f)

print("Model saved successfully!")