In [35]:
# run this to shorten the data import from the files
path_data = '/home/nero/Documents/Estudos/DataCamp/Python/courses/Intermediate_Deep_Learning_with_PyTorch/datasets/'
import torch

# Check if CUDA (GPU support) is available
if torch.cuda.is_available():
    # Get the number of available GPUs
    num_gpus = torch.cuda.device_count()
    print(f"Number of GPUs available: {num_gpus}")
    
    # Get the name and properties of each GPU
    for i in range(num_gpus):
        gpu_properties = torch.cuda.get_device_properties(i)
        print(f"GPU {i} - Name: {gpu_properties.name}, "
              f"Memory Capacity: {gpu_properties.total_memory / (1024 ** 2)} MB")
else:
    print("No GPU available. Using CPU.")

# Set the default device to GPU ("cuda") if available, otherwise use CPU ("cpu")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(device)


torch.cuda.set_device(0)

Number of GPUs available: 1
GPU 0 - Name: NVIDIA GeForce MX110, Memory Capacity: 2002.9375 MB
cuda


In [36]:
# exercise 01

"""
PyTorch Dataset

Time to refresh your PyTorch Datasets knowledge!

Before model training can commence, you need to load the data and pass it to the model in the right format. In PyTorch, this is handled by Datasets and DataLoaders. Let's start with building a PyTorch Dataset for our water potability data.

In this exercise, you will define a class called WaterDataset to load the data from a CSV file. To do this, you will need to implement the three methods which PyTorch expects a Dataset to have:

    .__init__() to load the data,
    .__len__() to return data size,
    .__getitem()__ to extract features and label for a single sample.

The following imports that you need have already been done for you:
"""

# Instructions

"""

    In the .__init__() method, load the data from csv_path to a pandas DataFrame and assign it to df.
    Convert df to a NumPy array and assign the result to self.data.
---

    Implement the .__len__() method to return the number of data samples.
---
In the .__getitem__() method, get the label by slicing self.data to extract its last column for the index idx, similarly to how it's done for the features.
"""

# solution
import pandas as pd
from torch.utils.data import Dataset

class WaterDataset(Dataset):
    def __init__(self, csv_path):
        super().__init__()
        # Load data to pandas DataFrame
        df = pd.read_csv(csv_path)
        # Convert data to a NumPy array and assign to self.data
        self.data = df.to_numpy()
        
    # Implement __len__ to return the number of data samples
    def __len__(self):
        return self.data.shape[0]
    
    def __getitem__(self, idx):
        features = self.data[idx, :-1]
        # Assign last data column to label
        label = self.data[idx, -1]
        return features, label

#----------------------------------#

# Conclusion

"""
That's a neat Dataset! Next, let's feed it to the DataLoader to serve training data to the model!
"""

"\nThat's a neat Dataset! Next, let's feed it to the DataLoader to serve training data to the model!\n"

In [37]:
# exercise 02

"""
PyTorch DataLoader

Good job defining the Dataset class! The WaterDataset you just created is now available for you to use.

The next step in preparing the training data is to set up a DataLoader. A PyTorch DataLoader can be created from a Dataset to load data, split it into batches, and perform transformations on the data if desired. Then, it yields a data sample ready for training.

In this exercise, you will build a DataLoader based on the WaterDataset. The DataLoader class you will need has already been imported for you from torch.utils.data. Let's get to it!
"""

# Instructions

"""

    Create an instance of WaterDataset from water_train.csv, assigning it to dataset_train.
    Create dataloader_train based on dataset_train, using a batch size of two and shuffling the samples.
    Get a batch of features and labels from the DataLoader and print them.

"""

# solution
from torch.utils.data import DataLoader

# Create an instance of the WaterDataset
dataset_train = WaterDataset(path_data+'water_train.csv')

# Create a DataLoader based on dataset_train
dataloader_train = DataLoader(
    dataset_train,
    batch_size=2,
    shuffle=True,
)

# Get a batch of features and labels
features, labels = next(iter(dataloader_train))
print(features, labels)

#----------------------------------#

# Conclusion

"""
Good job! You now have the data loaded and served for model training. It's time to build the model itself!
"""

tensor([[0.4508, 0.4901, 0.3429, 0.5325, 0.5281, 0.3227, 0.4312, 0.5827, 0.3897],
        [0.5206, 0.4566, 0.3468, 0.5991, 0.7334, 0.4111, 0.4411, 0.4658, 0.4950]],
       dtype=torch.float64) tensor([1., 0.], dtype=torch.float64)


"\nGood job! You now have the data loaded and served for model training. It's time to build the model itself!\n"

In [38]:
# exercise 03

"""
PyTorch Model

You will use the OOP approach to define the model architecture. Recall that this requires setting up a model class and defining two methods inside it:

    .__init__(), in which you define the layers you want to use;

    forward(), in which you define what happens to the model inputs once it receives them; this is where you pass inputs through pre-defined layers.

Let's build a model with three linear layers and ReLU activations. After the last linear layer, you need a sigmoid activation instead, which is well-suited for binary classification tasks like our water potability prediction problem. Here's the model defined using nn.Sequential(), which you may be more familiar with:

net = nn.Sequential(
  nn.Linear(9, 16),
  nn.ReLU(),
  nn.Linear(16, 8),
  nn.ReLU(),
  nn.Linear(8, 1),
  nn.Sigmoid(),
)

Let's rewrite this model as a class!
"""

# Instructions

"""

    In the .__init__() method, define the three linear layers with dimensions corresponding to the model definition provided and assign them to self.fc1, self.fc2, and self.fc3, respectively.
    In the forward() method, pass the model input x through all the layers, remembering to add activations on top of them, similarly how it's already done for the first layer.

"""

# solution

import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # Define the three linear layers
        self.fc1 = nn.Linear(9,16, bias=True, dtype=torch.float64)
        self.fc2 = nn.Linear(16,8, bias=True, dtype=torch.float64)
        self.fc3 = nn.Linear(8,1, bias=True, dtype=torch.float64)
        
    def forward(self, x):
        # Pass x through linear layers adding activations
        x = nn.functional.relu(self.fc1(x))
        x = nn.functional.relu(self.fc2(x))
        x = nn.functional.sigmoid(self.fc3(x))
        return x

#----------------------------------#

# Conclusion

"""
That's a neat model definition, well done! Next, you'll build upon what you have created to revisit model training, evaluation, and optimizers. See you there!
"""

"\nThat's a neat model definition, well done! Next, you'll build upon what you have created to revisit model training, evaluation, and optimizers. See you there!\n"

In [39]:
def train_model(optimizer, net, num_epochs):
    criterion = nn.BCELoss()
    for epoch in range(num_epochs):
        running_loss = 0.
        for features, labels in dataloader_train:
            optimizer.zero_grad()
            features = features
            labels = labels
            outputs = net(features)
            loss = criterion(outputs, labels.view(-1, 1))
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
    train_loss = running_loss / len(dataloader_train)
    print(f"Training loss after {num_epochs} epochs: {train_loss}")

In [40]:
# exercise 04

"""
Optimizers

It's time to explore the different optimizers that you can use for training your model.

A custom function called train_model(optimizer, net, num_epochs) has been defined for you. It takes the optimizer, the model, and the number of epochs as inputs, runs the training loops, and prints the training loss at the end.

Let's use train_model() to run a few short trainings with different optimizers and compare the results!
"""

# Instructions

"""

    Define the optimizer as Stochastic Gradient Descent.
---
    Define the optimizer as Root Mean Square Propagation (RMSprop), passing the model's parameters as its first argument.
---
    Define the optimizer as Adaptive Moments Estimation (Adam), setting the learning rate to 0.001.

"""

# solution

import torch.optim as optim

net = Net()

# Define the SGD optimizer
optimizer = optim.SGD(net.parameters(), lr=0.001)

train_model(
    optimizer=optimizer,
    net=net,
    num_epochs=10,
)

#----------------------------------#

net = Net()

# Define the RMSprop optimizer
optimizer = optim.RMSprop(net.parameters(), lr=0.001)

train_model(
    optimizer=optimizer,
    net=net,
    num_epochs=10,
)

#----------------------------------#

net = Net()

# Define the Adam optimizer
optimizer = optim.Adam(net.parameters(), lr=0.001)

train_model(
    optimizer=optimizer,
    net=net,
    num_epochs=10,
)

#----------------------------------#

# Conclusion

"""
Great job! Model training has some randomness to it and each time you get slightly different results, but it's very likely that you saw RMSprop and Adam decreasing the loss more than a simple SGD even after just 10 training epochs.
"""

Training loss after 10 epochs: 0.6739504223191554
Training loss after 10 epochs: 0.6619711513373563
Training loss after 10 epochs: 0.6688601887595025


"\nGreat job! Model training has some randomness to it and each time you get slightly different results, but it's very likely that you saw RMSprop and Adam decreasing the loss more than a simple SGD even after just 10 training epochs.\n"

In [41]:
# Create an instance of the WaterDataset
dataset_test = WaterDataset(path_data+'water_test.csv')

# Create a DataLoader based on dataset_train
dataloader_test = DataLoader(
    dataset_train,
    batch_size=2,
    shuffle=True,
)

In [49]:
# exercise 05

"""
Model evaluation

With the training loop sorted out, you have trained the model for 1000 epochs, and it is available to you as net. You have also set up a test_dataloader in exactly the same way as you did with train_dataloader before—just reading the data from the test rather than the train directory.

You can now evaluate the model on test data. To do this, you will need to write the evaluation loop to iterate over the batches of test data, get the model's predictions for each batch, and calculate the accuracy score for it. Let's do it!
"""

# Instructions

"""

    Set up the evaluation metric as Accuracy for binary classification and assign it to acc.
    For each batch of test data, get the model's outputs and assign them to outputs.
    After the loop, compute the total test accuracy and assign it to test_accuracy.

"""

# solution

from torchmetrics import Accuracy

# Set up binary accuracy metric
acc = Accuracy(task='binary')

net.eval()
with torch.no_grad():
    for features, labels in dataloader_test:
        # Get predicted probabilities for test data batch
        outputs = net(features)
        preds = (outputs >= 0.5).float()
        acc(preds, labels.view(-1, 1))

# Compute total test accuracy
test_accuracy = acc.compute()
print(f"Test accuracy: {test_accuracy}")

#----------------------------------#

# Conclusion

"""
Great job evaluating the model! And not a bad result, too, considering the small dataset size and the simplicity of the model architecture!
"""

Test accuracy: 0.6007957458496094


'\nGreat job evaluating the model! And not a bad result, too, considering the small dataset size and the simplicity of the model architecture!\n'

In [51]:
# exercise 06

"""
Initialization and activation

The problems of unstable (vanishing or exploding) gradients are a challenge that often arises in training deep neural networks. In this and the following exercises, you will expand the model architecture that you built for the water potability classification task to make it more immune to those problems.

As a first step, you'll improve the weights initialization by using He (Kaiming) initialization strategy. To do so, you will need to call the proper initializer from the torch.nn.init module, which has been imported for you as init. Next, you will update the activations functions from the default ReLU to the often better ELU.
"""

# Instructions

"""

    Call the He (Kaiming) initializer on the weight attribute of the second layer, fc2, similarly to how it's done for fc1.
    Call the He (Kaiming) initializer on the weight attribute of the third layer, fc3, accounting for the different activation function used in the final layer.
    Update the activation functions in the forward() method from relu to elu.

"""
import torch.nn.init as init
# solution

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(9, 16).cuda()
        self.fc2 = nn.Linear(16, 8).cuda()
        self.fc3 = nn.Linear(8, 1).cuda()
        
        # Apply He initialization
        init.kaiming_uniform_(self.fc1.weight)
        init.kaiming_uniform_(self.fc2.weight)
        init.kaiming_uniform_(self.fc3.weight,nonlinearity="sigmoid")

    def forward(self, x):
        # Update ReLU activation to ELU
        x = nn.functional.elu(self.fc1(x.cuda()))
        x = nn.functional.elu(self.fc2(x.cuda()))
        x = nn.functional.sigmoid(self.fc3(x.cuda()))
        return x

#----------------------------------#

# Conclusion

"""
Good job! With the weights properly initialized and the activations updated, let's see if you remember why we are switching to ELU in the first place!
"""

"\nGood job! With the weights properly initialized and the activations updated, let's see if you remember why we are switching to ELU in the first place!\n"

In [66]:
# exercise 07

"""
Batch Normalization

As a final improvement to the model architecture, let's add the batch normalization layer after each of the two linear layers. The batch norm trick tends to accelerate training convergence and protects the model from vanishing and exploding gradients issues.

Both torch.nn and torch.nn.init have already been imported for you as nn and init, respectively. Once you implement the change in the model architecture, be ready to answer a short question on how batch normalization works!
"""

# Instructions

"""

    Add two BatchNorm1d layers assigning them to self.bn1 and self.bn2.
---

    In the forward() method, pass x through the second set of layers: the linear layer, the batch norm layer, and the activations, similarly to how it's done for the first set of layers.
---

"""

# solution

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(9, 16)
        self.fc2 = nn.Linear(16, 8)
        self.fc3 = nn.Linear(8, 1)
        # Add two batch normalization layers
        self.bn1 = nn.BatchNorm1d(16)
        self.bn2 = nn.BatchNorm1d(8)
        
        init.kaiming_uniform_(self.fc1.weight)
        init.kaiming_uniform_(self.fc2.weight)
        init.kaiming_uniform_(self.fc3.weight, nonlinearity="sigmoid") 
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = nn.functional.elu(x)

        # Pass x through the second set of layers
        x = self.fc2(x)
        x = self.bn2(x)
        x = nn.functional.elu(x)

        x = nn.functional.sigmoid(self.fc3(x))
        return x

#----------------------------------#

# Conclusion

"""
That's correct! By learning how to optimally re-scale the next layer's inputs, batch normalization mitigates the unstable gradients problems! Congratulations on finishing Chapter 1 of this course! See you in Chapter 2, where we will build convolutional neural networks (CNNs)—models designed for image processing!
"""

"\nThat's correct! By learning how to optimally re-scale the next layer's inputs, batch normalization mitigates the unstable gradients problems! Congratulations on finishing Chapter 1 of this course! See you in Chapter 2, where we will build convolutional neural networks (CNNs)—models designed for image processing!\n"