In [7]:
import numpy as np

class NeuralNetwork:
    def __init__(self, sizes):
        # `sizes` is a list containing the number of neurons in each layer (input, hidden, output)
        self.num_layers = len(sizes)
        self.sizes = sizes
        # Initialize weights and biases with random values from a normal distribution
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
        self.weights = [np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])]

    def sigmoid(self, z):
        # Sigmoid activation function
        return 1.0 / (1.0 + np.exp(-z))

    def sigmoid_derivative(self, z):
        # Derivative of the sigmoid function
        return self.sigmoid(z) * (1 - self.sigmoid(z))

    def feedforward(self, a):
        # Pass input `a` through the network
        for b, w in zip(self.biases, self.weights):
            a = self.sigmoid(np.dot(w, a) + b)
        return a

    def backpropagation(self, x, y):
        # Backpropagation algorithm to compute the gradient of the loss function
        # Initialize gradient arrays
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]

        # Forward pass
        activation = x
        activations = [x]  # List to store all activations, layer by layer
        zs = []  # List to store all z vectors, layer by layer (z = w*x + b)
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, activation) + b
            zs.append(z)
            activation = self.sigmoid(z)
            activations.append(activation)

        # Backward pass
        # Compute the output error (delta)
        delta = self.cost_derivative(activations[-1], y) * self.sigmoid_derivative(zs[-1])
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())

        # Backpropagate the error
        for l in range(2, self.num_layers):
            z = zs[-l]
            sp = self.sigmoid_derivative(z)
            delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
            nabla_b[-l] = delta
            nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())

        return (nabla_b, nabla_w)

    def update_mini_batch(self, mini_batch, eta):
        # Update the network's weights and biases by applying gradient descent using backpropagation
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        for x, y in mini_batch:
            delta_nabla_b, delta_nabla_w = self.backpropagation(x, y)
            nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
        self.weights = [w-(eta/len(mini_batch))*nw for w, nw in zip(self.weights, nabla_w)]
        self.biases = [b-(eta/len(mini_batch))*nb for b, nb in zip(self.biases, nabla_b)]

    def train(self, training_data, epochs, mini_batch_size, eta):
        # Train the neural network using mini-batch stochastic gradient descent
        for epoch in range(epochs):
            np.random.shuffle(training_data)
            mini_batches = [
                training_data[k:k+mini_batch_size]
                for k in range(0, len(training_data), mini_batch_size)]
            for mini_batch in mini_batches:
                self.update_mini_batch(mini_batch, eta)
            print(f"Epoch {epoch} complete")

    def cost_derivative(self, output_activations, y):
        # Return the derivative of the cost function
        return (output_activations - y)

# Example usage:
# Create a neural network with 784 input neurons, 30 hidden neurons, and 10 output neurons (MNIST dataset format)
nn = NeuralNetwork([784, 30, 10])

# Example random training data (In practice, use actual MNIST data)
# Each `x` is a 784x1 vector (flattened 28x28 image), each `y` is a 10x1 vector (one-hot encoded digit label)
training_data = [(np.random.randn(784, 1), np.random.randn(10, 1)) for _ in range(1000)]

# Train the neural network for 30 epochs with a mini-batch size of 10 and a learning rate of 3.0
nn.train(training_data, epochs=30, mini_batch_size=10, eta=3.0)


Epoch 0 complete
Epoch 1 complete
Epoch 2 complete
Epoch 3 complete
Epoch 4 complete
Epoch 5 complete
Epoch 6 complete
Epoch 7 complete
Epoch 8 complete
Epoch 9 complete
Epoch 10 complete
Epoch 11 complete
Epoch 12 complete
Epoch 13 complete
Epoch 14 complete
Epoch 15 complete
Epoch 16 complete
Epoch 17 complete
Epoch 18 complete
Epoch 19 complete
Epoch 20 complete
Epoch 21 complete
Epoch 22 complete
Epoch 23 complete
Epoch 24 complete
Epoch 25 complete
Epoch 26 complete
Epoch 27 complete
Epoch 28 complete
Epoch 29 complete


In [11]:
import numpy as np
import tensorflow as tf

# Function to load and preprocess MNIST dataset
def load_mnist_data():
    # Load the MNIST dataset using tensorflow.keras.datasets
    (train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data()

    # Normalize the images (convert from 0-255 to 0-1)
    train_images = train_images / 255.0
    test_images = test_images / 255.0

    # Flatten the 28x28 images into vectors of size 784
    train_images = [image.reshape(784, 1) for image in train_images]
    test_images = [image.reshape(784, 1) for image in test_images]

    # Convert labels to one-hot encoded vectors
    train_labels = [one_hot_encode(label) for label in train_labels]
    test_labels = [one_hot_encode(label) for label in test_labels]

    # Combine images and labels for training and testing data
    training_data = list(zip(train_images, train_labels))
    test_data = list(zip(test_images, test_labels))

    return training_data, test_data

def one_hot_encode(label):
    # Create a 10x1 vector with all zeros, except a 1 at the index of the label
    one_hot = np.zeros((10, 1))
    one_hot[label] = 1.0
    return one_hot

# Now let's load the data
training_data, test_data = load_mnist_data()

# You can now train your neural network using this data:
# Example of creating and training the neural network
nn = NeuralNetwork([784, 40, 10])  # 784 input, 30 hidden, 10 output neurons
nn.train(training_data, epochs=40, mini_batch_size=10, eta=3.0)

# Optionally, write a method to test the accuracy on the test data
def evaluate(network, test_data):
    test_results = [(np.argmax(network.feedforward(x)), np.argmax(y)) for (x, y) in test_data]
    return sum(int(x == y) for (x, y) in test_results)

# Evaluate the network performance
accuracy = evaluate(nn, test_data)
print(f"Test Accuracy: {accuracy} / {len(test_data)}")


Epoch 0 complete
Epoch 1 complete
Epoch 2 complete
Epoch 3 complete
Epoch 4 complete
Epoch 5 complete
Epoch 6 complete
Epoch 7 complete
Epoch 8 complete
Epoch 9 complete
Epoch 10 complete
Epoch 11 complete
Epoch 12 complete
Epoch 13 complete
Epoch 14 complete
Epoch 15 complete
Epoch 16 complete
Epoch 17 complete
Epoch 18 complete
Epoch 19 complete
Epoch 20 complete
Epoch 21 complete
Epoch 22 complete
Epoch 23 complete
Epoch 24 complete
Epoch 25 complete
Epoch 26 complete
Epoch 27 complete
Epoch 28 complete
Epoch 29 complete
Epoch 30 complete
Epoch 31 complete
Epoch 32 complete
Epoch 33 complete
Epoch 34 complete
Epoch 35 complete
Epoch 36 complete
Epoch 37 complete
Epoch 38 complete
Epoch 39 complete
Test Accuracy: 9557 / 10000


In [12]:
pip install torch torchvision




In [13]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# Hyperparameters
input_size = 784    # 28x28 images
hidden_size = 40    # 40 neurons in the hidden layer
output_size = 10    # 10 classes for digits 0-9
num_epochs = 40     # Number of times the training data is used
batch_size = 64     # Batch size for training
learning_rate = 0.003  # Learning rate

# MNIST dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

train_dataset = torchvision.datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=transform)

# Data loaders
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

# Neural Network Model using PyTorch
class NeuralNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)  # Fully connected layer (784 -> 30)
        self.fc2 = nn.Linear(hidden_size, output_size)  # Fully connected layer (30 -> 10)

    def forward(self, x):
        x = x.view(-1, input_size)  # Flatten the images into 784x1
        x = torch.sigmoid(self.fc1(x))  # Apply Sigmoid activation
        x = torch.sigmoid(self.fc2(x))  # Apply Sigmoid activation
        return x

# Initialize the model, loss function, and optimizer
model = NeuralNetwork(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()  # Cross entropy loss for multi-class classification
optimizer = optim.SGD(model.parameters(), lr=learning_rate)  # Stochastic gradient descent optimizer

# Training the model
def train_model():
    for epoch in range(num_epochs):
        for images, labels in train_loader:
            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

# Test the model
def test_model():
    model.eval()  # Set the model to evaluation mode (disable dropout, batchnorm, etc.)
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)  # Get the class with the highest score
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Accuracy of the model on the 10,000 test images: {100 * correct / total:.2f}%')

# Train and test the model
train_model()
test_model()


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 18079446.12it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 493532.97it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 3906583.62it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 11839980.59it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw

Epoch [1/40], Loss: 2.2740
Epoch [2/40], Loss: 2.2755
Epoch [3/40], Loss: 2.2700
Epoch [4/40], Loss: 2.2542
Epoch [5/40], Loss: 2.2193
Epoch [6/40], Loss: 2.2070
Epoch [7/40], Loss: 2.2002
Epoch [8/40], Loss: 2.1627
Epoch [9/40], Loss: 2.1128
Epoch [10/40], Loss: 2.1439
Epoch [11/40], Loss: 2.1158
Epoch [12/40], Loss: 2.0648
Epoch [13/40], Loss: 2.0254
Epoch [14/40], Loss: 2.0659
Epoch [15/40], Loss: 2.0166
Epoch [16/40], Loss: 2.0155
Epoch [17/40], Loss: 1.9986
Epoch [18/40], Loss: 1.9459
Epoch [19/40], Loss: 1.9465
Epoch [20/40], Loss: 1.9289
Epoch [21/40], Loss: 1.9095
Epoch [22/40], Loss: 1.9458
Epoch [23/40], Loss: 1.8982
Epoch [24/40], Loss: 1.9278
Epoch [25/40], Loss: 1.8677
Epoch [26/40], Loss: 1.8423
Epoch [27/40], Loss: 1.8772
Epoch [28/40], Loss: 1.8529
Epoch [29/40], Loss: 1.8015
Epoch [30/40], Loss: 1.8671
Epoch [31/40], Loss: 1.7842
Epoch [32/40], Loss: 1.7801
Epoch [33/40], Loss: 1.8351
Epoch [34/