1. You have been given a partially implemented code for a feed-forward neural network using PyTorch. Your task is to complete the missing parts of the code to make it functional.

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define the neural network architecture
class NeuralNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)  # Complete this line to pass the output of the first layer through the activation function
        x = self.relu(x) # Complete this line to pass the output of the activation function through the second layer
        return x

# Define the hyperparameters
input_size = 10
hidden_size = 20
label_size = 5
learning_rate = 0.001
num_epochs = 1000

# Create the neural network object
model = NeuralNetwork(input_size, hidden_size, label_size)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

# Generate some dummy data for training
train_data = torch.randn(100, input_size)
train_labels = torch.randint(label_size, (100,))

# Training loop
for epoch in range(num_epochs):
    # Forward pass
    # Complete this line to pass the training data through the model and obtain the predictions
    outputs = model(train_data)

    # Compute the loss
    loss = criterion(outputs, train_labels)

    # Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch+1) % 100 == 0:
        print(f'Epoch: {epoch+1}/{num_epochs}, Loss: {loss.item()}')

# Test the trained model
test_data = torch.randn(10, input_size)
with torch.no_grad():
    # Complete this line to pass the test data through the model and obtain the predictions
    test_outputs = model(test_data)

    # Print the predictions
    _, predicted = torch.max(test_outputs.data, 1)
    print("Predictions:", predicted)

Epoch: 100/1000, Loss: 1.6241873502731323
Epoch: 200/1000, Loss: 1.6221901178359985
Epoch: 300/1000, Loss: 1.6203598976135254
Epoch: 400/1000, Loss: 1.6187623739242554
Epoch: 500/1000, Loss: 1.6173750162124634
Epoch: 600/1000, Loss: 1.6161226034164429
Epoch: 700/1000, Loss: 1.6148784160614014
Epoch: 800/1000, Loss: 1.6136634349822998
Epoch: 900/1000, Loss: 1.612565040588379
Epoch: 1000/1000, Loss: 1.611538052558899
Predictions: tensor([0, 3, 0, 4, 3, 4, 1, 0, 1, 2])


2. In this coding exercise, you need to implement the training of a deep MLP on the MNIST dataset using PyTorch and manually tune the hyperparameters. Follow the steps below to proceed:

* Load the MNIST dataset using torchvision.datasets.MNIST. The dataset contains handwritten digit images, and it can be easily accessed through PyTorch's torchvision module.

In [3]:
# Load the MNIST dataset
from torchvision import datasets, transforms
from torch.utils.data import DataLoader


transform = transforms.Compose([
    transforms.ToTensor(),  # Convert PIL Image or numpy.ndarray to tensor
    transforms.Normalize((0.5,), (0.5,))  # Normalize the data with mean and standard deviation
])

trainset = datasets.MNIST(root="./data", train=True, download=True, transform=transform)

testset = datasets.MNIST(root="./data", train=False, download=True, transform=transform)


* Define your deep MLP model. Specify the number of hidden layers, the number of neurons in each layer, and the activation function to be used. You can use the nn.Sequential container to stack the layers.

In [4]:
class MLP(nn.Module):
    def __init__(self, in_features : int, output_features : int, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.mlp = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features, 300),
            nn.BatchNorm1d(300),
            nn.ReLU(),
            nn.Linear(300, 100),
            nn.BatchNorm1d(100),
            nn.ReLU(),
            nn.Linear(100, output_features)
        )
    
    def forward(self, x):
        return self.mlp(x)
    

* Set up the training loop and the hyperparameters. You can use the CrossEntropyLoss as the loss function and the Stochastic Gradient Descent (SGD) optimizer.

In [5]:
# Set hyperparameters
learning_rate = 0.01
epochs = 100
batch_size = 64

# Create data loaders
train_loader = DataLoader(trainset, batch_size=64, shuffle = False)
test_loader = DataLoader(testset, batch_size=64, shuffle=False)

# Create an instance of the model
input_size = 784
output_size = 10

model = MLP(input_size, output_size)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr = learning_rate)

* Train the model by iterating over the training dataset for the specified number of epochs. Compute the loss, perform backpropagation, and update the model's parameters. 

In [6]:
import sys
sys.path.append('/home/mohe/Documents/DeepLearning/deeplearning-bootcamp-pytorch')

from bootcamp_libs import device



device = torch.device(device.find_device())
device
gpu_available = torch.cuda.is_available()
gpu_available

False

In [20]:
# Training loop
running_loss = 0
for epoch in range(epochs):
    for images, labels in train_loader:
        # Flatten the images
#         images = images.view(-1, 784)
#         # Zero the gradients
        optimizer.zero_grad()

#         # Forward pass
        outputs = model(images)

        loss = criterion(outputs, labels)
        running_loss += loss.item()


#         # Backward pass and optimization
        loss.backward()
        optimizer.step()

    

    average_loss = running_loss / len(train_loader)
    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {average_loss}')



Epoch [1/1000], Loss: 0.4802203975570227
Epoch [2/1000], Loss: 0.6672806992157817
Epoch [3/1000], Loss: 0.7959766029439834
Epoch [4/1000], Loss: 0.8921253565343411
Epoch [5/1000], Loss: 0.9667225991358865
Epoch [6/1000], Loss: 1.025702881324949
Epoch [7/1000], Loss: 1.0728031671715221
Epoch [8/1000], Loss: 1.1105547639296087
Epoch [9/1000], Loss: 1.1409444954188694
Epoch [10/1000], Loss: 1.165555464466716
Epoch [11/1000], Loss: 1.185666803904409
Epoch [12/1000], Loss: 1.2023218810051453
Epoch [13/1000], Loss: 1.2162381918698266
Epoch [14/1000], Loss: 1.228026491900492
Epoch [15/1000], Loss: 1.2381492407172363
Epoch [16/1000], Loss: 1.2469521545490914
Epoch [17/1000], Loss: 1.2546883847028054
Epoch [18/1000], Loss: 1.2615545139633002
Epoch [19/1000], Loss: 1.2677000267823328
Epoch [20/1000], Loss: 1.2732416835317795
Epoch [21/1000], Loss: 1.2782756065238523
Epoch [22/1000], Loss: 1.2828765173020316
Epoch [23/1000], Loss: 1.2871090147848692
Epoch [24/1000], Loss: 1.2910172154254806
Epoch

* Evaluate the trained model on the test dataset and calculate the accuracy (Please take a moment to consider the code below!)

In [7]:
# Evaluation
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images = images.view(-1, 784)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print("Accuracy: {:.2f}%".format(accuracy))

Accuracy: 12.00%


* Manually tune the hyperparameters, such as the learning rate, by experimenting with different values and observing the performance. You can also search for the optimal learning rate by using techniques like learning rate range test, where you gradually increase the learning rate and monitor the loss.

In [None]:
# TODO

3. In this coding exercise, you'll have an opportunity to explore the behavior of a deep neural network trained on the CIFAR10 image dataset. Follow the steps below:

* a. Construct a deep neural network (DNN) using 20 hidden layers, each comprising 100 neurons. To facilitate this exploration, employ the Swish activation function for each layer. Utilize nn.ModuleList to manage the layers effectively.

* b. Load the CIFAR10 dataset for training your network. Utilize the appropriate function, such as torchvision.datasets.CIFAR10. The dataset consists of 60,000 color images, with dimensions of 32×32 pixels. It is divided into 50,000 training samples and 10,000 testing samples. With 10 classes in the dataset, ensure that your network has a softmax output layer comprising 10 neurons. When modifying the model's architecture or hyperparameters, conduct a search to identify an appropriate learning rate. Implement early stopping during training and employ the Nadam optimization algorithm.

* c. Experiment by adding batch normalization to your network. Compare the learning curves obtained with and without batch normalization. Analyze whether the model converges faster with batch normalization and observe any improvements in its performance. Additionally, assess the impact of batch normalization on training speed.

* d. As an additional experiment, substitute batch normalization with SELU (Scaled Exponential Linear Units). Make the necessary adjustments to ensure the network self-normalizes. This involves standardizing the input features, initializing the network's weights using LeCun normal initialization (nn.init.kaiming_normal_), and ensuring that the DNN consists solely of dense layers. Observe the effects of utilizing SELU activation and self-normalization on the network's training stability and performance.

In [11]:
# a construct DNN W Swish

class Swish(nn.Module):
    def forward(self, x):
        return x * torch.sigmoid(x)
    

class DeepNeuralNetwork(nn.Module):
    def __init__(self):
        super(DeepNeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        layers = []
        for _ in range(20):
            layers.append(nn.Linear(100, 100))
            layers.append(Swish())
        self.hidden_layers = nn.ModuleList(layers)
        self.output_layer = nn.Linear(100, 10)

    def forward(self, x):
        for i in range(0, len(self.hidden_layers), 2):
            x = self.hidden_layers[i](x)
            x = self.hidden_layers[i + 1](x)
        return self.output_layer(x)
    
# b Load Data set
    
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)

model2 = DeepNeuralNetwork().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.NAdam(model2.parameters())
print(type(train_dataset))

Files already downloaded and verified
Files already downloaded and verified
<class 'torchvision.datasets.cifar.CIFAR10'>


In [9]:

# b

def train(model, criterion, optimizer, train_loader, test_loader, epochs=10, early_stopping_patience=5):
    best_loss = float('inf')
    patience = 0

    for epoch in range(epochs):
        model.train()
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        model.eval()
        with torch.no_grad():
            total_loss = 0.0
            total_correct = 0
            total_samples = 0
        
            
            for inputs, labels in test_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                total_loss += loss.item()

                _, predicted = outputs.max(1)
                total_correct += predicted.eq(labels).sum().item()
                total_samples += labels.size(0)

            average_loss = total_loss / len(test_loader)
            accuracy = total_correct / total_samples * 100.0
            
            print(f"Epoch {epoch + 1}/{epochs}, Loss: {average_loss:.4f}, Accuracy: {accuracy:.2f}%")

            if average_loss < best_loss:
                best_loss = average_loss
                patience = 0
            else:
                patience += 1
                if patience >= early_stopping_patience:
                    print("Early stopping.")
                    break


train(model2, criterion, optimizer, train_loader, test_loader, epochs = 10, early_stopping_patience=5)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (6144x32 and 100x100)

In [18]:
# d train with SELU

class DNNWithSELU(nn.Module):
    def __init__(self):
        super(DNNWithSELU, self).__init__()
        layers = []
        for _ in range(20):
            layers.append(nn.Linear(100, 100))
            layers.append(nn.SELU())
        self.hidden_layers = nn.ModuleList(layers)
        self.output_layer = nn.Linear(100, 10)

    def forward(self, x):
        for layer in self.hidden_layers:
            x = layer(x)
        return self.output_layer(x)
    
model_with_selu = DNNWithSELU().to(device)
optimizer_selu = optim.NAdam(model_with_selu.parameters())

train(model_with_selu, criterion, optimizer_selu, train_loader, test_loader)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (6144x32 and 100x100)

In [None]:

# c batch norm
class DNNWithBatchNorm(nn.Module):
    def __init__(self):
        super(DNNWithBatchNorm, self).__init__()
        layers = []
        for _ in range(20):
            layers.append(nn.Linear(100, 100))
            layers.append(nn.BatchNorm1d(100))
            layers.append(Swish())
        self.hidden_layers = nn.ModuleList(layers)
        self.output_layer = nn.Linear(100, 10)

    def forward(self, x):
        for layer in self.hidden_layers:
            x = layer(x)
        return self.output_layer(x)
    

model_with_batch_norm = DNNWithBatchNorm().to(device)
optimizer_batch_norm = optim.Nadam(model_with_batch_norm.parameters())


# train with BN
train(model_with_batch_norm, criterion, optimizer_batch_norm, train_loader, test_loader)
