In [1]:
# run this to shorten the data import from the files
path_data = '/home/nero/Documents/Estudos/DataCamp/Python/courses/Intermediate_Deep_Learning_with_PyTorch/datasets/'
import torch

# Check if CUDA (GPU support) is available
if torch.cuda.is_available():
    # Get the number of available GPUs
    num_gpus = torch.cuda.device_count()
    print(f"Number of GPUs available: {num_gpus}")
    
    # Get the name and properties of each GPU
    for i in range(num_gpus):
        gpu_properties = torch.cuda.get_device_properties(i)
        print(f"GPU {i} - Name: {gpu_properties.name}, "
              f"Memory Capacity: {gpu_properties.total_memory / (1024 ** 2)} MB")
else:
    print("No GPU available. Using CPU.")

# Set the default device to GPU ("cuda") if available, otherwise use CPU ("cpu")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(device)


torch.cuda.set_device(0)

Number of GPUs available: 1
GPU 0 - Name: NVIDIA GeForce MX110, Memory Capacity: 2002.9375 MB
cuda


In [2]:
# exercise 01

"""
Two-input dataset

Building a multi-input model starts with crafting a custom dataset that can supply all the inputs to the model. In this exercise, you will build the Omniglot dataset that serves triplets consisting of:

    The image of a character to be classified,
    The one-hot encoded alphabet vector of length 30, with zeros everywhere but for a single one denoting the ID of the alphabet the character comes from,
    The target label, an integer between 0 and 963.

You are provided with train_samples, a list of 3-tuples comprising an image's file path, its alphabet vector, and the target label. Also, the following imports have already been done for you, so let's get to it!

from PIL import Image
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms

"""
from PIL import Image
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
# Instructions

"""

    Assign transform and samples to class attributes with the same names.
---

    Implement the .__len()__ method such that it return the number of samples stored in the class' samples attribute.
---

    Unpack the sample at index idx assigning its contents to img_path, alphabet, and label.
    Transform the loaded image with self.transform() and assign it to img_transformed.
---
Question

Nice done! With your implementation of OmniglotDataset ready, you can actually create the dataset and DataLoader, just like you did it before.

dataset_train = OmniglotDataset(
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize((64, 64)),
    ]),
    samples=samples,
)

dataloader_train = DataLoader(
    dataset_train, shuffle=True, batch_size=3,
)

What will dataloader_train produce? To answer, feel free to execute the code above and peek at the first batch of data returned by the DataLoader!
(Images, Alphabet Vectors, labels)
"""

# solution

class OmniglotDataset(Dataset):
    def __init__(self, transform, samples):
		# Assign transform and samples to class attributes
        self.transform = transform
        self.samples = samples
                    
    def __len__(self):
		# Return number of samples
        return len(self.samples)

    def __getitem__(self, idx):
      	# Unpack the sample at index idx
        img_path, alphabet, label = self.samples[idx]
        img = Image.open(img_path).convert('L')
        # Transform the image 
        img_transformed = self.transform(img)
        return img_transformed, alphabet, label

#----------------------------------#

# Conclusion

"""
Well done! You've built a DataLoader that can serve multiple inputs to a model. The next task is to craft a model architecture capable of processing all of these inputs!
"""

"\nWell done! You've built a DataLoader that can serve multiple inputs to a model. The next task is to craft a model architecture capable of processing all of these inputs!\n"

In [4]:
# exercise 02

"""
Two-input model

With the data ready, it's time to build the two-input model architecture! To do so, you will set up a model class with the following methods:

    .__init__(), in which you will define sub-networks by grouping layers; this is where you define the two layers for processing the two inputs, and the classifier that returns a classification score for each class.

    forward(), in which you will pass both inputs through corresponding pre-defined sub-networks, concatenate the outputs, and pass them to the classifier.

torch.nn is already imported for you as nn. Let's do it!
"""

# Instructions

"""

    Define image, alphabet and classifier sub-networks as sequential models, assigning them to self.image_layer, self.alphabet_layer and self.classifier, respectively.
---

    Pass the image and alphabet through the appropriate model layers.
---

    Concatenate the outputs from image and alphabet layers and assign the result to x.

"""

# solution

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # Define sub-networks as sequential models
        self.image_layer = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, padding=1),
            nn.MaxPool2d(kernel_size=2),
            nn.ELU(),
            nn.Flatten(),
            nn.Linear(16*32*32, 128)
        )
        self.alphabet_layer = nn.Sequential(
            nn.Linear(30, 8),
            nn.ELU(), 
        )
        self.classifier = nn.Sequential(
            nn.Linear(128 + 8, 964), 
        )
        
    def forward(self, x_image, x_alphabet):
		# Pass the x_image and x_alphabet through appropriate layers
        x_image = self.image_layer(x_image)
        x_alphabet = self.alphabet_layer(x_alphabet)
        # Concatenate x_image and x_alphabet
        x = torch.cat((x_image, x_alphabet), dim=1)
        return self.classifier(x)

#----------------------------------#

# Conclusion

"""
Fantastic job! With both the dataset and the model defined, let's take a look at the training loop for our two-input model!
"""

NameError: name 'nn' is not defined

In [2]:
# exercise 03

"""
Two-output Dataset and DataLoader

In this and the following exercises, you will build a two-output model to predict both the character and the alphabet it comes from based on the character's image. As always, you will start with getting the data ready.

The OmniglotDataset class you have created before is available for you to use along with updated samples. Let's use it to build the Dataset and the DataLoader.

The following imports have already been done for you:

from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

"""

# Instructions

"""

    Print the element of samples at index 100 and examine its structure.
---

    Use your OmniglotDataset to create dataset_train, passing the two image transforms you have used before: parse the image to a tensor and resize it to size (64, 64).
---

    Create dataloader_train from dataset_train; shuffle the training images and set batch size to 32.

"""

# solution

# Print the sample at index 100
print(samples[100])

# Create dataset_train
dataset_train = OmniglotDataset(
    transform=transforms.Compose([
        transforms.ToTensor(),
      	transforms.Resize((64, 64)),
    ]),
    samples=samples,
)

# Create dataloader_train
dataloader_train = DataLoader(
    dataset_train, shuffle=True, batch_size=32,
)

#----------------------------------#

# Conclusion

"""
Perfect! Notice how samples now contain, next to the image path, the target labels for the character and the alphabet. In the next exercise, you will examine the architecture of the two-output model.
"""

'\n\n'

In [3]:
# exercise 04

"""
Two-output model architecture

In this exercise, you will construct a multi-output neural network architecture capable of predicting the character and the alphabet.

Recall the general structure: in the .__init__() method, you define layers to be used in the forward pass later. In the forward() method, you will first pass the input image through a couple of layers to obtain its embedding, which in turn is fed into two separate classifier layers, one for each output.

torch.nn is already imported under its usual alias, so let's build a model!
"""

# Instructions

"""

    Define self.classifier_alpha and self.classifier_char as linear layers with input shapes matching the output of image_layer, and output shapes corresponding to the number of alphabets (30) and the number of characters (964), respectively.
---

    Pass the image embedding x_image separately through each of the classifiers, assigning the results to output_alpha and output_char, respectively, and return them in this order.

"""

# solution

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.image_layer = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, padding=1),
            nn.MaxPool2d(kernel_size=2),
            nn.ELU(),
            nn.Flatten(),
            nn.Linear(16*32*32, 128)
        )
        # Define the two classifier layers
        self.classifier_alpha = nn.Linear(128, 30)
        self.classifier_char = nn.Linear(128, 964)
        
    def forward(self, x):
        x_image = self.image_layer(x)
        # Pass x_image through the classifiers and return both results
        output_alpha = self.classifier_alpha(x_image)
        output_char = self.classifier_char(x_image)
        return output_alpha, output_char

#----------------------------------#

# Conclusion

"""
Good job defining the model architecture! It is now ready for training. Let's do that next!
"""

'\n\n'

In [4]:
# exercise 05

"""
Training multi-output models

When training models with multiple outputs, it is crucial to ensure that the loss function is defined correctly.

In this case, the model produces two outputs: predictions for the alphabet and the character. For each of these, there are corresponding ground truth labels, which will allow you to calculate two separate losses: one incurred from incorrect alphabet classifications, and the other from incorrect character classification. Since in both cases you are dealing with a multi-label classification task, the Cross-Entropy loss can be applied each time.

Gradient descent can optimize only one loss function, however. You will thus define the total loss as the sum of alphabet and character losses.
"""

# Instructions

"""

    Calculate the alphabet classification loss and assign it to loss_alpha.
    Calculate the character classification loss and assign it to loss_char.
    Compute the total loss as the sum of the two partial losses and assign it to loss.

"""

# solution

net = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.05)

for epoch in range(1):
    for images, labels_alpha, labels_char in dataloader_train:
        optimizer.zero_grad()
        outputs_alpha, outputs_char = net(images)
        # Compute alphabet classification loss
        loss_alpha = criterion(outputs_alpha, labels_alpha)
        # Compute character classification loss
        loss_char = criterion(outputs_char, labels_char)
        # Compute total loss
        loss = loss_alpha + loss_char
        loss.backward()
        optimizer.step()

#----------------------------------#

# Conclusion

"""
Well done! Defining the total loss as the sum of the two task-specific losses is a simple way to obtain the single optimization objective required by gradient descent. There are, however, other ways to combine the partial losses. Let's explore this topic in the final lesson of this course!
"""

'\n\n'

In [5]:
# exercise 06

"""
Multi-output model evaluation

In this exercise, you will practice model evaluation for multi-output models. Your task is to write a function called evaluate_model() that takes an alphabet-and-character-predicting model as input, runs the evaluation loop, and prints the model's accuracy in the two tasks.

You can assume that the function will have access to dataloader_test. The following imports have already been run for you:

import torch
from torchmetrics import Accuracy

Once you have implemented evaluate_model(), you will use it in the following exercise!
"""

# Instructions

"""

    Define acc_alpha and acc_char as multi-class Accuracy() metrics for the two outputs, alphabets and characters, with the appropriate number of classes each (there are 30 alphabets and 964 characters in the dataset).
---

    Define the evaluation loop by iterating over test images, labels_alpha, and labels_char.
    Inside the for-loop, obtain model results for the test data batch and assign them to outputs_alpha, outputs_char.
---

    Update the two accuracy metrics with the current batch's data.
---
"""

# solution

def evaluate_model(model):
    # Define accuracy metrics
    acc_alpha = Accuracy(task="multiclass", num_classes=30)
    acc_char = Accuracy(task="multiclass", num_classes=964)

    model.eval()
    with torch.no_grad():
        for images, labels_alpha, labels_char in dataloader_test:
            # Obtain model outputs
            outputs_alpha, outputs_char = model(images)
            _, pred_alpha = torch.max(outputs_alpha, 1)
            _, pred_char = torch.max(outputs_char, 1)
			# Update both accuracy metrics
            acc_alpha(pred_alpha, labels_alpha)
            acc_char(pred_char, labels_char)
    
    print(f"Alphabet: {acc_alpha.compute()}")
    print(f"Character: {acc_char.compute()}")

#----------------------------------#

# Conclusion

"""
Great job implementing the evaluating function! Let's use it to evaluate a range of models trained with different loss functions in the next exercise.
"""

'\n\n'

In [6]:
# exercise 07

"""
Loss weighting

Three versions of the two-output model for alphabet and character prediction that you built before have been trained: model_a, model_b, and model_c. For all three, the loss was defined as follows:

loss_alpha = criterion(outputs_alpha, labels_alpha)
loss_char = criterion(outputs_char, labels_char)
loss = ((1 - char_weight) * loss_alpha) + (char_weight * loss_char)

However, each of the three models was trained with a different char_weight: 0.1, 0.5, or 0.9.

Use the function you have defined in the previous, evaluate_model(), to check the accuracy of each model. Which char_weight was used to train which model?
"""

# Instructions

"""

"""

# solution
models = [model_a, model_b, model_c]

for model in models:
    evaluate_model(model)

"""Alphabet: 0.2808536887168884
Character: 0.1869264841079712
Alphabet: 0.35044848918914795
Character: 0.01783689111471176
Alphabet: 0.30363956093788147
Character: 0.23837509751319885"""
#----------------------------------#

# Conclusion

"""
Well done, that was a tricky one! Notice how the model with 90% of its focus on alphabet recognition (char_weight=0.1) does very poorly on the character task. As we increase char_weight to 0.5, the alphabet accuracy drops slightly due to the increased focus on characters, but when it reaches char_weight=0.9, the alphabet accuracy increases slightly with the character accuracy, highlighting the synergy between the tasks.
"""

'\n\n'