In [13]:
#Download CIFAR 10 dataset for training and validation purposes and apply the following changes on each image:
# 1) make it a tensor
# 2) normalize it based on the mean and standard deviation among all pixels in each channel (RGB).
#Print the size of training and validation datasets
#Plot the last training image
import torch
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

# Define transformations for dataset
transform = transforms.Compose([
    transforms.ToTensor(),  # Convert images to tensors
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize images
])

# Download CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, transform=transform, download=True)
val_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, transform=transform, download=True)

# Print dataset sizes
print("Size of training dataset:", len(train_dataset))
print("Size of validation dataset:", len(val_dataset))

# Plot the last training image
last_image, _ = train_dataset[-1]
plt.imshow(last_image.permute(1, 2, 0))
plt.title("Last Training Image")
plt.show()

URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:992)>

In [None]:
#We want to make a tertiary classifier that distinguishes between deers, dogs, and horses, labeled as 4, 5, and 7, resp.
#Create the subset training and validation datasets for this purpose.
#Print the size of these datasets.
#Plot the last training image

# Select specific classes (deer, dog, horse)
target_classes = [4, 5, 7]
train_subset = torch.utils.data.Subset(train_dataset, [i for i in range(len(train_dataset)) if train_dataset[i][1] in target_classes])
val_subset = torch.utils.data.Subset(val_dataset, [i for i in range(len(val_dataset)) if val_dataset[i][1] in target_classes])

# Print subset dataset sizes
print("Size of subset training dataset:", len(train_subset))
print("Size of subset validation dataset:", len(val_subset))

# Plot the last training image
subset_last_image, _ = train_subset[-1]
plt.imshow(subset_last_image.permute(1, 2, 0))
plt.title("Last Subset Training Image")
plt.show()

In [None]:
#Create a NN consisting of 
# 1) a linear layer that receives appropriate number of input features and outputs 1024 features, followed by hyperbolic
#    tangent as the activation function,
# 2) a linear layer that receives appropriate number of input features and outputs 256 features, followed by hyperbolic
#    tangent as the activation function,
# 3) the final linear layer with appropriate input and output number of features, followed by logarithm of softmax
#Print total number of exisiting parameters in the NN with respect to which gradient of loss must be computed.

import torch.nn as nn
# Define a simple neural network
n_out = 3
model = nn.Sequential(
    nn.Linear(3 * 32 * 32, 1024),
    nn.Tanh(),
    nn.Linear(1024, 256),
    nn.Tanh(),
    nn.Linear(256, n_out),
    nn.LogSoftmax(dim=1)
)

# Count total number of parameters
total_params = sum(p.numel() for p in model.parameters())
print("Total number of existing parameters in the NN:", total_params)

In [None]:
#Our training functionality is supposed to compute gradient on batches of training data, randlomy selected each time.
#To this end, create a training data loader with batch size 32 that randomizes access to each batch.
#Also, create a validation data loader with the same batch size that does not randomize access to each batch (no need!)
#Print the number of batches in training and validation data loaders
#Print the size of the last batch in each of the training and validation loaders (they are not necessarily 32)

# Create data loaders
batch_size = 32
train_loader = torch.utils.data.DataLoader(train_subset, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_subset, batch_size=batch_size, shuffle=False)

# Print number of batches and size of the last batch
print("Number of batches in training loader:", len(train_loader))
print("Number of batches in validation loader:", len(val_loader))
print("Size of last batch in the training loader:", len(train_loader.dataset) % batch_size)
print("Size of last batch in the validation loader:", len(val_loader.dataset) % batch_size)

In [None]:
#Define your training function that receives the data loaders, model, loss function, optimizer, and number of epochs.
#In each epoch, you should go through each training data batch, and:
# 1) compute the output batch, and accordingly the loss
# 2) compute the gradient of loss wrt parameters, and update the parameters
#After covering all epochs, your training function must report
# 1) the training accuracy, and
# 2) the validation accuracy

# Define training function
def train(train_loader, val_loader, model, loss_fun, optimizer, epochs):
    # Training loop
    for epoch in range(epochs):
        model.train()
        correct_train = 0
        
        for images, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(images.view(images.size(0), -1))
            loss = loss_fun(outputs, labels)
            loss.backward()
            optimizer.step()
            predicted = outputs.argmax(dim=1)
            correct_train += (predicted == labels).sum().item()

        # Validation loop
        model.eval()
        correct_val = 0
        with torch.no_grad():
            for images, labels in val_loader:
                outputs = model(images.view(images.size(0), -1))
                predicted = outputs.argmax(dim=1)
                correct_val += (predicted == labels).sum().item()

        train_accuracy = correct_train / len(train_loader.dataset)
        val_accuracy = correct_val / len(val_loader.dataset)
        print(f"Epoch [{epoch+1}/{epochs}] - Train accuracy: {train_accuracy:.4f}, Validation accuracy: {val_accuracy:.4f}")

In [14]:
#Call the training function on the created data loaders, the created NN, negative log likelihood loss function, 
# stochastic gradient descent optimizer, and 100 epochs.
#Is the model overfit for this problem? (Yes/No) Why?

# Train the model
loss_fun = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)
train(train_loader, val_loader, model, loss_fun, optimizer, epochs=100)

NameError: name 'train_loader' is not defined