In [1]:
#Download CIFAR 10 dataset for training and validation purposes and apply the following changes on each image:
# 1) make it a tensor
# 2) normalize it based on the mean and standard deviation among all pixels in each channel (RGB).
#Print the size of training and validation datasets

import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim

# Define transformations
transform = transforms.Compose([
    transforms.ToTensor(),  # Convert images to tensors
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize images
])

# Download the CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, transform=transform, download=True)
val_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, transform=transform, download=True)

print("Size of training dataset:", len(train_dataset))
print("Size of validation dataset:", len(val_dataset))


Files already downloaded and verified
Files already downloaded and verified
Size of training dataset: 50000
Size of validation dataset: 10000


In [15]:
#We want to make a tertiary classifier that distinguishes between deers, dogs, and horses, labeled as 4, 5, and 7, resp.
#Create the subset training and validation datasets for this purpose.
#Print the size of these datasets.
target_classes = [4, 5, 7]
train_subset = torch.utils.data.Subset(train_dataset, [i for i in range(len(train_dataset)) if train_dataset[i][1] in target_classes])
val_subset = torch.utils.data.Subset(val_dataset, [i for i in range(len(val_dataset)) if val_dataset[i][1] in target_classes])

print("Size of subset training dataset:", len(train_subset))
print("Size of subset validation dataset:", len(val_subset))

Size of subset training dataset: 307
Size of subset validation dataset: 58


In [26]:
#Create a parameterized CNN with the following details. 
# The parameter is the number of output channels n after the first convolution.
# All kernels are of size 3 by 3.
# Convolutions must not change the height and width.
# Each convolution is followed by hyperbolic tangent as the activation function, and max pooling of size 2 by 2.
# Convolution ayers:
# 1) First convolution layer works on the input RGB input. Let's assume there are n kernels in this layer.
# 2) Second convolution layer works on the result of the preceding max pooling layer. 
#    Let's assume there are n/2 kernels in this layer.
# 3) Third convolution layer works on the result of the preceding max pooling layer. 
#    Let's assume there are n/2 kernels in this layer. 
# Fully connected layers:
# 1) First fully connected layer works on the result of the preceding max pooling layer. 
#    This layer is followed by hyperbolic tangent as its activation function.
# 2) Second fully connected layer works on the result of the preceding activation function, and emits numbers associated
#    with each class.
# We will use negative log likelihood to compute the loss. So you may add additional layer(s) to your network.
# Note: Since the network is parameterized (n), you'd rather define the CNN as a subclass of nn.Module.
import torch.nn as nn

class CNN(nn.Module):
    def __init__(self, n, num_classes):
        super(CNN, self).__init__()

        # First convolutional layer
        self.conv1 = nn.Conv2d(3, n, kernel_size=3, padding=1)
        self.relu1 = nn.Tanh()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        # Second convolutional layer
        self.conv2 = nn.Conv2d(n, n // 2, kernel_size=3, padding=1)
        self.relu2 = nn.Tanh()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        # Third convolutional layer
        self.conv3 = nn.Conv2d(n // 2, n // 2, kernel_size=3, padding=1)
        self.relu3 = nn.Tanh()
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)

        # Fully connected layers
        self.fc1 = nn.Linear(n // 2 * 4 * 4, n // 2)  # Assuming input size of 32x32
        self.relu4 = nn.Tanh()
        self.fc2 = nn.Linear(n // 2, num_classes)

    def forward(self, x):
        # Convolutional layers
        x = self.pool1(self.relu1(self.conv1(x)))
        x = self.pool2(self.relu2(self.conv2(x)))
        x = self.pool3(self.relu3(self.conv3(x)))

        # Flatten the feature maps
        x = x.view(x.size(0), -1)

        # Fully connected layers
        x = self.relu4(self.fc1(x))
        x = self.fc2(x)

        return x

# Example usage:
n = 64  # You can change this value as needed
num_classes = 10  # Adjust this based on your classification task
model = CNN(n, num_classes)

In [27]:
#Create two networks as instances of the CNN you defined above, with n = 16 and n = 32 respectively. 
#Print the total number of parameters in each of these instances.
# Define the CNN class (as shown in the previous response)

n1 = 16
model1 = CNN(n1, num_classes)

n2 = 32
model2 = CNN(n2, num_classes)

# Calculate and print the number of parameters for each model
def count_parameters(model):
    return sum(p.numel() for p in model.parameters())

total_params_model1 = count_parameters(model1)
total_params_model2 = count_parameters(model2)

print(f"Total parameters in the model with n = 16: {total_params_model1}")
print(f"Total parameters in the model with n = 32: {total_params_model2}")

Total parameters in the model with n = 16: 3314
Total parameters in the model with n = 32: 12122


In [28]:
#Our training functionality is supposed to compute gradient on batches of training data, randlomy selected each time.
#To this end, create a training data loader with batch size 32 that randomizes access to each batch.
#Also, create a validation data loader with the same batch size that does not randomize access to each batch (no need!)
#Print the number of batches in training and validation data loaders
import torch
from torch.utils.data import DataLoader, Dataset

# Define a custom dataset for your training and validation data
class CustomDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

# Assuming you have your training and validation data and labels in tensors.
train_data = torch.randn(1000, 3, 32, 32)  # Example random data for training
train_labels = torch.randint(0, 10, (1000,))  # Example random labels for training

val_data = torch.randn(200, 3, 32, 32)  # Example random data for validation
val_labels = torch.randint(0, 10, (200,))  # Example random labels for validation

# Create custom datasets
train_dataset = CustomDataset(train_data, train_labels)
val_dataset = CustomDataset(val_data, val_labels)

# Create data loaders
batch_size = 32

# Training data loader with random shuffling of batches
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Validation data loader without random shuffling
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Print the number of batches in the training and validation data loaders
print(f"Number of batches in training data loader: {len(train_loader)}")
print(f"Number of batches in validation data loader: {len(val_loader)}")

Number of batches in training data loader: 32
Number of batches in validation data loader: 7


In [29]:
#Define your training function that receives the training loader, model, loss function, optimizer, the device (cpu/gpu), and 
# number of epochs.
#In each epoch, you should go through each training data batch, and:
# 1) move data to device
# 1) compute the output batch, and accordingly the loss
# 2) compute the gradient of loss wrt parameters, and update the parameters
#After covering all epochs, your training function must report the training accuracy
import torch

def train(model, train_loader, loss_fn, optimizer, device, num_epochs):
    model.to(device)
    model.train()
    
    for epoch in range(num_epochs):
        total_correct = 0
        total_samples = 0
        total_loss = 0.0
        
        for batch_data, batch_labels in train_loader:
            # Move data to the device
            batch_data, batch_labels = batch_data.to(device), batch_labels.to(device)
            
            # Forward pass
            outputs = model(batch_data)
            
            # Compute the loss
            loss = loss_fn(outputs, batch_labels)
            
            # Backpropagation and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            # Calculate accuracy
            _, predicted = torch.max(outputs, 1)
            total_samples += batch_labels.size(0)
            total_correct += (predicted == batch_labels).sum().item()
            
            # Accumulate loss
            total_loss += loss.item()
        
        # Calculate and print training accuracy for this epoch
        accuracy = 100 * total_correct / total_samples
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss:.4f}, Accuracy: {accuracy:.2f}%')

In [30]:
#Define a separate function that receives the validation data loader as well as the model and computes the validation 
# accuracy of the model.
import torch

def validate(model, val_loader, device):
    model.to(device)
    model.eval()
    
    total_correct = 0
    total_samples = 0
    
    with torch.no_grad():
        for batch_data, batch_labels in val_loader:
            # Move data to the device
            batch_data, batch_labels = batch_data.to(device), batch_labels.to(device)
            
            # Forward pass
            outputs = model(batch_data)
            
            # Calculate accuracy
            _, predicted = torch.max(outputs, 1)
            total_samples += batch_labels.size(0)
            total_correct += (predicted == batch_labels).sum().item()
    
    accuracy = 100 * total_correct / total_samples
    return accuracy
    accuracy = validate(model, val_loader, device)
    print(f'Validation Accuracy: {accuracy:.2f}%')

In [21]:
#Define device dynamically based on whether CUDA is available or not.
#Call the training function on the created training data loader, the created CNN  with n = 16, 
# negative log likelihood loss function, stochastic gradient descent optimizer,
# the device you defined, and 100 epochs. Next, call validation accuracy function.
#Is the model overfit? (Yes/No) Why?

#Define the device (CPU in the absence of a GPU)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#16-node model training and validation.
model_n16 = CNN(16)  # Create a 16-n CNN model
criterion = nn.CrossEntropyLoss()  # Loss function
optimizer = optim.SGD(model_n16.parameters(), lr=learning_rate)  # Optimizer

train(trainloader, model_n16, criterion, optimizer, device, epochs) # Update parameters, calculate loss, and train the model
validate(testloader, model_n16, device) # Utilize data validation to validate the training model

NameError: name 'learning_rate' is not defined

In [22]:
#Call the training function on the created training data loader, the created CNN  with n = 32, 
# negative log likelihood loss function, stochastic gradient descent optimizer,
# the device you defined, and 100 epochs. Next, call validation accuracy function.
#Is the model overfit? (Yes/No) Why? 
# (This can be compared to the fully connected network we created in the last set of exercises.)

model_n32 = CNN(32) # Make a 32-n CNN model
optimizer = optim.SGD(model_n32.parameters(), lr=learning_rate)  # Optimizer
train_model(trainloader, model_n32, criterion, optimizer, device, epochs) # Update parameters, calculate loss, and train the model
validate_model(testloader, mo#Next, let's consider L2 regularization with weight decay 0.002 for CNN with n = 32. 
# Is the model overfit? (Yes/No) Why?

weight_decay = 0.002 # Decrease in weight in L2 regularization
optimizer_l2 = optim.SGD(model_n32.parameters(), lr=learning_rate, weight_decay=weight_decay)  # an L2 regularized optimizer
train_model(trainloader, model_n32, criterion, optimizer_l2, device, epochs) # Update parameters, calculate loss, and train the model
validate_model(testloader, model_n32, device) # Utilize data validation to validate the training model

SyntaxError: '(' was never closed (456803862.py, line 10)

In [None]:
#Next, let's consider L2 regularization with weight decay 0.002 for CNN with n = 32. 
# Is the model overfit? (Yes/No) Why?

weight_decay = 0.002 # Decrease in weight in L2 regularization
optimizer_l2 = optim.SGD(model_n32.parameters(), lr=learning_rate, weight_decay=weight_decay)  # an L2 regularized optimizer
train_model(trainloader, model_n32, criterion, optimizer_l2, device, epochs) # Update parameters, calculate loss, and train the model
validate_model(testloader, model_n32, device) # Utilize data validation to validate the training model

In [23]:
#Add a skip connection in your CNN from the output of second max pooling to the input of 3rd max pooling.
#Train the updated CNN with the same parameters including (n = 32).
#Is the model overfit? (Yes/No) Why?

class SkipCNN(nn.Module):
    # skip connections modified CNN model
    def __init__(self, n):
        super(SkipeCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, n, 3) # 3 input channels, n output channels, and a 3x3 kernel are used in the convolutional layer
        self.conv2 = nn.Conv2d(n, n//2, 3) # n input channels, n//2 output channels, and a 3x3 kernel are used in the convolutional layer
        self.conv3 = nn.Conv2d(n//2, n//2, 3) # n//2 input channels, n//2 output channels, and a 3x3 kernel are used in the convolutional layer
        self.fc1 = nn.Linear(n//2 * 2 * 2, n//2) # n//2 * 2 * 2 feature mappings to n//2 neurons are flattened by the first connected layer
        self.fc2 = nn.Linear(n//2, len(class_indices)) # n//2 input neurons, output neurons based on class indices
        self.activation = nn.Tanh() # Hyperbolic Tangent is the activation function (Tanh)
        self.pool = nn.MaxPool2d(2, 2) # 2x2 kernel, stride 2, maximum pooling layer
    def forward(self, x):
        x1 = self.pool(self.activation(self.conv1(x))) # Convolution, activation, and pooling are applied to the first layer
        x2 = self.pool(self.activation(self.conv2(x1))) # Convolution, activation, and pooling are applied to the second layer
        x3 = self.pool(self.activation(self.conv3(x2))) # Convolution, activation, and pooling are applied to the third layer
        x = x3.view(-1, self.num_flat_features(x3)) # Tensor flattening for connected layers
        x = self.activation(self.fc1(x)) # activating the top layer that is connected
        x = self.fc2(x) # Applied a second connected layer.
        return x
    def num_flat_features(self, x): # Total the features in a tensor without using the batch dimension
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features
        
model_skip = SkipCNN(32) # Model a 32-n SkipCNN
optimizer_skip = optim.SGD(model_skip.parameters(), lr=learning_rate) # Optimizer
train_model(trainloader, model_skip, criterion, optimizer_skip, device, epochs) # Update parameters, calculate loss, and train the model
validate_model(testloader, model_skip, device) #  Utilize data validation to validate the training model

NameError: name 'SkipeCNN' is not defined

In [24]:
#Consider dropout layers after each max pooling in the original CNN, where the probability of zeroing output features is 30%.
#Train the updated CNN with the same parameters including (n = 32).
#Is the model overfit? (Yes/No) Why?

#From nn.Module, create the DropoutCNN neural network class.
class DropoutCNN(nn.Module):
    # The 'n' input in the class constructor denotes the number of channels in a convolutional layer.
    def __init__(self, n):
        super(DropoutCNN, self).__init__() 
        self.conv1 = nn.Conv2d(3, n, 3) # 3 input channels, 'n' output channels, and 3 kernel sizes make up the first convolutional layer
        self.conv2 = nn.Conv2d(n, n//2, 3) # 'n' input channels, 'n//2' output channles, and 3 kernel sizes make up the second convolutional layer
        self.conv3 = nn.Conv2d(n//2, n//2, 3) # 'n//2' input channles, 'n//2' output channels, and 3 kernel sizes make up the third convulutional layer
        self.fc1 = nn.Linear(n//2 * 2 * 2, n//2) # With input characteristics of "n//2 * 2 * 2" and output features of "n//2," the first connected linear layer
        self.fc2 = nn.Linear(n//2, len(class_indices)) # Create a linear layer with n//2 input features and class_indices length output features
        self.activation = nn.Tanh() # Make the activation function's initial value a hyperbolic tangent (Tanh)
        self.pool = nn.MaxPool2d(2, 2) # Make a maximum 2x2 pooling layer
        self.dropout = nn.Dropout(p=0.3) # Create a layer with a 0.3% dropout rate

    # Network forward pass is defined by the forward method
    def forward(self, x):
        x = self.pool(self.activation(self.conv1(x))) # Applying the first convolutional layer, then activating and max pooling
        x = self.dropout(x) # To the output, apply dropout
        x = self.pool(self.activation(self.conv2(x))) # Applying the second convolutional layer, then activating and max pooling
        x = self.dropout(x) # To the output, apply dropout
        x = self.pool(self.activation(self.conv3(x))) # Applying the third convolutional layer, then activating and max pooling
        x = x.view(-1, self.num_flat_features(x)) # Connected layer shape tensor
        x = self.activation(self.fc1(x)) # Activate the output of the first connected layer
        x = self.fc2(x) # apply the connected second layer
        return x # Return the final tensor
    
    def num_flat_features(self, x): # Total the features in a tensor without using the batch dimension
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

model_dropout = DropoutCNN(32) # Create a 32-n DropoutCNN model.
optimizer_dropout = optim.SGD(model_dropout.parameters(), lr=learning_rate) # Optimizer
train_model(trainloader, model_dropout, criterion, optimizer_dropout, device, epochs) # Update parameters, calculate loss, and train the model
validate_model(testloader, model_dropout, device) #  Utilize data validation to validate the training model

NameError: name 'class_indices' is not defined

In [25]:
#Considering all the modifications which one works better? Plain CNN, CNN+L2, CNN+Skip, CNN+Dropout?