In [None]:
import numpy as np
import matplotlib.pyplot as plt
from torchvision import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import confusion_matrix
import seaborn as sns

# Load MNIST dataset
train_dataset = datasets.MNIST(root='data/', train=True, download=True, transform=None)
test_dataset = datasets.MNIST(root='data/', train=False, download=True, transform=None)

# Extract images and labels
train_imgs = train_dataset.data.numpy()
train_labels = train_dataset.targets.numpy()

test_imgs = test_dataset.data.numpy()
test_labels = test_dataset.targets.numpy()

# Normalize pixel values
train_imgs = train_imgs / 255.0
test_imgs = test_imgs / 255.0

# Flatten images
train_imgs = train_imgs.reshape(train_imgs.shape[0], -1)
test_imgs = test_imgs.reshape(test_imgs.shape[0], -1)

# One-hot encode labels
enc = OneHotEncoder(sparse=False)
train_labels = enc.fit_transform(train_labels.reshape(-1, 1))
test_labels = enc.transform(test_labels.reshape(-1, 1))

# Split training set into train and validation sets
train_imgs, val_imgs, train_labels, val_labels = train_test_split(train_imgs, train_labels, test_size=0.2, random_state=42)

# Define neural network architecture
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        self.W1 = np.random.randn(input_size, hidden_size)
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size)
        self.b2 = np.zeros((1, output_size))
    
    def forward(self, X):
        self.z1 = np.dot(X, self.W1) + self.b1
        self.a1 = self.sigmoid(self.z1)
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = self.softmax(self.z2)
        return self.a2
    
    def backward(self, X, y, learning_rate):
        m = X.shape[0]
        
        delta2 = self.a2 - y
        dW2 = (1 / m) * np.dot(self.a1.T, delta2)
        db2 = (1 / m) * np.sum(delta2, axis=0, keepdims=True)
        
        delta1 = np.dot(delta2, self.W2.T) * self.sigmoid_derivative(self.a1)
        dW1 = (1 / m) * np.dot(X.T, delta1)
        db1 = (1 / m) * np.sum(delta1, axis=0)
        
        self.W2 -= learning_rate * dW2
        self.b2 -= learning_rate * db2
        self.W1 -= learning_rate * dW1
        self.b1 -= learning_rate * db1
    
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    
    def sigmoid_derivative(self, x):
        return x * (1 - x)
    
    def softmax(self, x):
        exp_scores = np.exp(x)
        return exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

# Initialize neural network
input_size = train_imgs.shape[1]
hidden_size = 64
output_size = 10
learning_rate = 0.1
num_epochs = 100

model = NeuralNetwork(input_size, hidden_size, output_size)

# Training loop
train_accs = []
val_accs = []

for epoch in range(num_epochs):
    # Forward pass
    outputs = model.forward(train_imgs)
    
    # Backward pass
    model.backward(train_imgs, train_labels, learning_rate)
    
    # Compute training accuracy
    train_preds = np.argmax(outputs, axis=1)
    train_acc = np.mean(train_preds == np.argmax(train_labels, axis=1))
    train_accs.append(train_acc)
    
    # Compute validation accuracy
    val_outputs = model.forward(val_imgs)
    val_preds = np.argmax(val_outputs, axis=1)
    val_acc = np.mean(val_preds == np.argmax(val_labels, axis=1))
    val_accs.append(val_acc)
    
    print(f"Epoch {epoch+1}/{num_epochs}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}")

epochs = range(1, num_epochs+1)
plt.plot(epochs, train_accs, label='Train Accuracy')
plt.plot(epochs, val_accs, label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()
plt.show()

# Test the model
test_outputs = model.forward(test_imgs)
test_preds = np.argmax(test_outputs, axis=1)
test_acc = np.mean(test_preds == np.argmax(test_labels, axis=1))
print(f"Test Accuracy: {test_acc:.4f}")

cm = confusion_matrix(np.argmax(test_labels, axis=1), test_preds)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.show()

In [None]:
import matplotlib.pyplot as plt
from torchvision import datasets
from tqdm import tqdm

train_dataset = datasets.MNIST(root='data/',train=True,download=True,transform=None)
test_dataset = datasets.MNIST(root='data/',train=False,download=True,transform=None)

train_imgs = train_dataset.data.numpy()/255
train_imgs = train_imgs.reshape(train_imgs.shape[0],28*28)
train_labels = train_dataset.targets.numpy()

test_imgs = test_dataset.data.numpy()/255
test_imgs = test_imgs.reshape(test_imgs.shape[0],28*28)
test_labels = test_dataset.targets.numpy()

In [None]:
import numpy as np
import struct
import matplotlib.pyplot as plt

# Function for reading idx file format
def read_idx(filename):
    with open(filename, 'rb') as f:
        zero, data_type, dims = struct.unpack('>HBB', f.read(4))
        shape = tuple(struct.unpack('>I', f.read(4))[0] for d in range(dims))
        return np.frombuffer(f.read(), dtype=np.uint8).reshape(shape)

# Preprocessing functions
def normalize_images(images):
    return images / 255

def one_hot_labels(labels):
    one_hot = np.zeros((labels.size, labels.max() + 1))
    one_hot[np.arange(labels.size), labels] = 1
    return one_hot

# Activation functions and their derivatives
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))

# Parameter initialization
def initialize_parameters(n_inputs, n_hidden, n_outputs):
    W1 = np.random.randn(n_hidden, n_inputs)
    b1 = np.zeros((n_hidden, 1))
    W2 = np.random.randn(n_outputs, n_hidden)
    b2 = np.zeros((n_outputs, 1))
    return W1, b1, W2, b2

# Forward propagation
def forward_propagation(X, W1, b1, W2, b2):
    X = X.reshape(-1, 28*28)
    Z1 = np.dot(W1, X.T) + b1
    A1 = sigmoid(Z1)
    Z2 = np.dot(W2, A1) + b2
    A2 = sigmoid(Z2)
    return Z1, A1, Z2, A2

# Backward propagation
def backward_propagation(Z1, A1, Z2, A2, W1, W2, X, Y):
    X = X.reshape(-1, 28*28)
    m_batch = X.shape[0]
    dZ2 = A2 - Y
    dW2 = (1./m_batch) * np.dot(dZ2, A1.T)
    db2 = (1./m_batch) * np.sum(dZ2, axis=1, keepdims=True)
    dA1 = np.dot(W2.T, dZ2)
    dZ1 = dA1 * sigmoid_derivative(Z1)
    dW1 = (1./m_batch) * np.dot(dZ1, X)
    db1 = (1./m_batch) * np.sum(dZ1, axis=1, keepdims=True)
    return dW1, db1, dW2, db2

# Parameter update
def update_parameters(W1, b1, W2, b2, dW1, db1, dW2, db2, learning_rate):
    W1 = W1 - learning_rate * dW1
    b1 = b1 - learning_rate * db1
    W2 = W2 - learning_rate * dW2
    b2 = b2 - learning_rate * db2
    return W1, b1, W2, b2

# Neural network training
def train(X, Y, n_hidden=64, learning_rate=0.01, n_epochs=10, batch_size=64):
    n_inputs = X.shape[1]*X.shape[2]  # 28*28
    n_outputs = Y.shape[1]  # 10
    W1, b1, W2, b2 = initialize_parameters(n_inputs, n_hidden, n_outputs)
    for epoch in range(n_epochs):
        permutation = np.random.permutation(X.shape[0])
        X_shuffled = X[permutation]
        Y_shuffled = Y[permutation]
        for i in range(0, X.shape[0], batch_size):
            X_batch = X_shuffled[i:i+batch_size]
            Y_batch = Y_shuffled[i:i+batch_size].T
            Z1, A1, Z2, A2 = forward_propagation(X_batch, W1, b1, W2, b2)
            dW1, db1, dW2, db2 = backward_propagation(Z1, A1, Z2, A2, W1, W2, X_batch, Y_batch)
            W1, b1, W2, b2 = update_parameters(W1, b1, W2, b2, dW1, db1, dW2, db2, learning_rate)
    return W1, b1, W2, b2

# Predictions
def predict(X, W1, b1, W2, b2):
    _, _, _, A2 = forward_propagation(X, W1, b1, W2, b2)
    return np.argmax(A2, 0)

# Load the data

