In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms,datasets
from torch.utils.data import DataLoader


1. You have been given a partially implemented code for a feed-forward neural network using PyTorch. Your task is to complete the missing parts of the code to make it functional.

In [None]:
# Define the neural network architecture
class NeuralNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        return x

# Define the hyperparameters
input_size = 10
hidden_size = 20
label_size = 5
learning_rate = 0.001
num_epochs = 1000

# Create the neural network object
model = NeuralNetwork(input_size, hidden_size, label_size)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

# Generate some dummy data for training
train_data = torch.randn(100, input_size)
train_labels = torch.randint(label_size, (100,))

# Training loop
for epoch in range(num_epochs):
    # Forward pass
    # Complete this line to pass the training data through the model and obtain the predictions
    outputs = model(train_data)

    # Compute the loss
    loss = criterion(outputs, train_labels)

    # Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch+1) % 100 == 0:
        print(f'Epoch: {epoch+1}/{num_epochs}, Loss: {loss.item()}')

# Test the trained model
test_data = torch.randn(10, input_size)
with torch.no_grad():
    # Complete this line to pass the test data through the model and obtain the predictions
    test_outputs = model(test_data)

    # Print the predictions
    _, predicted = torch.max(test_outputs.data, 1)
    print("Predictions:", predicted)

2. In this coding exercise, you need to implement the training of a deep MLP on the MNIST dataset using PyTorch and manually tune the hyperparameters. Follow the steps below to proceed:

* Load the MNIST dataset using torchvision.datasets.MNIST. The dataset contains handwritten digit images, and it can be easily accessed through PyTorch's torchvision module.

In [None]:
# Load the MNIST dataset

mean = 0.13066048920154572
std = 0.30810779333114624

transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize(mean=(mean,), std=(std,))])

train_mnist = datasets.MNIST(
    root="./data", train=True, download=True, transform=transform)
test_mnist = datasets.MNIST(
    root="./data", train=False, download=True, transform=transform)

* Define your deep MLP model. Specify the number of hidden layers, the number of neurons in each layer, and the activation function to be used. You can use the nn.Sequential container to stack the layers.

In [None]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
                              nn.Linear(784, 400),
                              nn.SELU(),
                              nn.Dropout(0.2),
                              nn.Linear(400, 200),
                              nn.SELU(),
                              nn.Dropout(0.2),
                              nn.Linear(200, 30),
                              nn.SELU(),
                              nn.Dropout(0.2),
                              nn.Linear(30, 10))

    def forward(self, x):
        x = self.model(x)
        return x

* Set up the training loop and the hyperparameters. You can use the CrossEntropyLoss as the loss function and the Stochastic Gradient Descent (SGD) optimizer.

In [None]:
# Set hyperparameters
learning_rate = 1e-3
epochs = 10
batch_size = 10

# Create data loaders
train_dloader = DataLoader(train_mnist, batch_size=batch_size, shuffle=True)
test_dloader = DataLoader(test_mnist, batch_size=batch_size, shuffle=True)

# Create an instance of the model
model = MLP()

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

* Train the model by iterating over the training dataset for the specified number of epochs. Compute the loss, perform backpropagation, and update the model's parameters. 

In [None]:
for epoch in range(epochs):
    running_loss = 0
    last_loss = 0
    model.train(True)

    for images, labels in train_dloader:
        # Flatten the images
        images = images.view(-1, 784)

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)

        # Compute the loss
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    epoch_loss = running_loss / len(train_dloader)
    print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss:.4f}")



* Evaluate the trained model on the test dataset and calculate the accuracy (Please take a moment to consider the code below!)

In [None]:
# Evaluation
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_dloader:
        images = images.view(-1, 784)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print("Accuracy: {:.2f}%".format(accuracy))



* Manually tune the hyperparameters, such as the learning rate, by experimenting with different values and observing the performance. You can also search for the optimal learning rate by using techniques like learning rate range test, where you gradually increase the learning rate and monitor the loss.

3. In this coding exercise, you'll have an opportunity to explore the behavior of a deep neural network trained on the CIFAR10 image dataset. Follow the steps below:

* a. Construct a deep neural network (DNN) using 20 hidden layers, each comprising 100 neurons. To facilitate this exploration, employ the Swish activation function for each layer. Utilize nn.ModuleList to manage the layers effectively.

* b. Load the CIFAR10 dataset for training your network. Utilize the appropriate function, such as torchvision.datasets.CIFAR10. The dataset consists of 60,000 color images, with dimensions of 32×32 pixels. It is divided into 50,000 training samples and 10,000 testing samples. With 10 classes in the dataset, ensure that your network has a softmax output layer comprising 10 neurons. When modifying the model's architecture or hyperparameters, conduct a search to identify an appropriate learning rate. Implement early stopping during training and employ the Nadam optimization algorithm.

* c. Experiment by adding batch normalization to your network. Compare the learning curves obtained with and without batch normalization. Analyze whether the model converges faster with batch normalization and observe any improvements in its performance. Additionally, assess the impact of batch normalization on training speed.

* d. As an additional experiment, substitute batch normalization with SELU (Scaled Exponential Linear Units). Make the necessary adjustments to ensure the network self-normalizes. This involves standardizing the input features, initializing the network's weights using LeCun normal initialization (nn.init.kaiming_normal_), and ensuring that the DNN consists solely of dense layers. Observe the effects of utilizing SELU activation and self-normalization on the network's training stability and performance.

In [None]:
# TODO
class MyNN1(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(MyNN1, self).__init__()
        hidden_layers = []
        hidden_layers.append(nn.Linear(input_size, hidden_size))
        hidden_layers.append(nn.SiLU())

        for _ in range(19): 
            hidden_layers.append(nn.Linear(hidden_size, hidden_size))
            hidden_layers.append(nn.SiLU())

        hidden_layers.append(nn.Softmax(dim=1))

        self.model = nn.Sequential(*hidden_layers)

    def forward(self, x):
        return self.model(x)

In [None]:
## LOADING DATASET
transform_CIFAR10 = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

train_CIFAR10 = datasets.CIFAR10(
    root="./data", train=True, download=True, transform=transform_CIFAR10)
print("train size: ", len(train_CIFAR10))
test_CIFAR10 = datasets.CIFAR10(
    root="./data", train=False, download=True, transform=transform_CIFAR10)
print("test size: ", len(test_CIFAR10))

train_dloader = DataLoader(dataset=train_CIFAR10, batch_size=50, shuffle=True)
test_dloader = DataLoader(dataset=test_CIFAR10, batch_size=50, shuffle=False)

## SETTING HYPERPARAMETERS

learning_rate = 1e-3
epochs = 10
input_size = 32*32*3
output_size = 100
num_class = 10

# Create an instance of the model
model = MyNN1(input_size,output_size,num_class)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.NAdam(model.parameters(), lr=learning_rate)


In [None]:
for epoch in range(epochs):
    model.train()

    for inputs, labels in train_dloader:
        images = inputs.view(inputs.size(0), -1)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        total_loss = 0
        for inputs, labels in test_dloader:
            outputs = model(inputs.view(inputs.size(0), -1))
            total_loss += criterion(outputs, labels).item()

    epoch_loss = total_loss / len(test_dloader)
    print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss:.4f}")


In [None]:
# Evaluation
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_dloader:
        images = images.view(inputs.size(0), -1)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print("Accuracy: {:.2f}%".format(accuracy))

In [None]:
class MyNN_WithBatch(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(MyNN_WithBatch, self).__init__()
        hidden_layers = []
        hidden_layers.append(nn.Linear(input_size, hidden_size))
        hidden_layers.append(nn.BatchNorm1d(hidden_size))
        hidden_layers.append(nn.SiLU())

        for _ in range(19): 
            hidden_layers.append(nn.Linear(hidden_size, hidden_size))
            hidden_layers.append(nn.BatchNorm1d(hidden_size))
            hidden_layers.append(nn.SiLU())

        hidden_layers.append(nn.Softmax(dim=1))

        self.model = nn.Sequential(*hidden_layers)

    def forward(self, x):
        return self.model(x)
    

In [None]:
model_Batch = MyNN_WithBatch(input_size,output_size,num_class)
for epoch in range(epochs): 
    model_Batch.train()

    for inputs, labels in train_dloader:
        images = inputs.view(inputs.size(0), -1)
        optimizer.zero_grad()
        outputs = model_Batch(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    # Validation
    model_Batch.eval()
    with torch.no_grad():
        total_loss = 0
        for inputs, labels in test_dloader:
            outputs = model_Batch(inputs.view(inputs.size(0), -1))
            total_loss += criterion(outputs, labels).item()

    epoch_loss = total_loss / len(test_dloader)
    print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss:.4f}")

In [None]:
class MyNN_WithSELU(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(MyNN_WithSELU, self).__init__()
        hidden_layers = []
        hidden_layers.append(nn.Linear(input_size, hidden_size))
        hidden_layers.append(nn.SELU(hidden_size))

        for _ in range(19): 
            hidden_layers.append(nn.Linear(hidden_size, hidden_size))
            hidden_layers.append(nn.SELU())

        hidden_layers.append(nn.Softmax(dim=1))

        self.model = nn.Sequential(*hidden_layers)

    def forward(self, x):
        return self.model(x)

In [None]:
model_Selu = MyNN_WithSELU(input_size,output_size,num_class)
for epoch in range(epochs):  
    model_Selu.train()

    for inputs, labels in train_dloader:
        images = inputs.view(inputs.size(0), -1)
        optimizer.zero_grad()
        outputs = model_Selu(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    # Validation
    model_Selu.eval()
    with torch.no_grad():
        total_loss = 0
        for inputs, labels in test_dloader:
            outputs = model_Selu(inputs.view(inputs.size(0), -1))
            total_loss += criterion(outputs, labels).item()

    epoch_loss = total_loss / len(test_dloader)
    print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss:.4f}")