In [19]:
# run this to shorten the data import from the files
import os
cwd = os.path.dirname(os.getcwd())+'/'
path_data = os.path.join(os.path.dirname(os.getcwd()), 'datasets/')


# The number of classes

To determine whether to use a binary or a multi-class model for a classification problem, you need to know the number of classes in the data. The Torch dataset is available in your workspace as train_dataset.

Considering the number of classes in train_dataset, which image classification task would be appropriate?

```python
train_dataset.classes
['cat', 'dog']
```


### Possible answers
    
    Binary (2 classes) {Answer}
    
    Multi-Class (3 classes)
    
    Multi-Class (5 classes)

In [20]:
import torch
import torch.nn as nn

In [21]:
# exercise 01

"""
Binary classification model

As a deep learning practitioner, one of your main tasks is training models for image classification. You often encounter binary classification, where you need to distinguish between two classes. To streamline your workflow and ensure reusability, you have decided to create a template for a binary image classification CNN model, which can be applied to future projects.

The package torch and torch.nn as nn have been imported. All image sizes are 64x64 pixels.
"""

# Instructions

"""


    Create a convolutional layer with 3 channels, 16 output channels, kernel size of 3, stride of 1, and padding of 1.

    Create a fully connected layer with an input size of 16x32x32 and a number of classes equal to 1; include only the values in the provided order (16*32*32, 1).

    Create a sigmoid activation function.


"""

# solution

class BinaryImageClassifier(nn.Module):
    def __init__(self):
        super(BinaryImageClassifier, self).__init__()
        
        # Create a convolutional layer
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.flatten = nn.Flatten()
        
        # Create a fully connected layer
        self.fc = nn.Linear(16*32*32, 1)
        
        # Create an activation function
        self.sigmoid = nn.Sigmoid()

#----------------------------------#

# Conclusion

"""
Great work! Notice that the fully connected (fc) layer has an input size of 16x32x32. This is due to the original image input size being 64x64. After passing through the max pooling layer, the spatial dimensions are reduced in half, resulting in 32x32.
"""

'\nGreat work! Notice that the fully connected (fc) layer has an input size of 16x32x32. This is due to the original image input size being 64x64. After passing through the max pooling layer, the spatial dimensions are reduced in half, resulting in 32x32.\n'

In [22]:
# exercise 02

"""
Multi-class classification model

With a template for a binary classification model in place, you can now build on it to design a multi-class classification model. The model should handle different numbers of classes via a parameter, allowing you to tailor the model to a specific multi-class classification task in the future.

The packages torch and torch.nn as nn have been imported. All image sizes are 64x64 pixels.
"""

# Instructions

"""


    Define the __init__ method including self and num_classes as parameters.

    Create a fully connected layer with the input size of 16*32*32 and the number of classes num_classes as output.

    Create an activation function softmax with dim=1.

"""

# solution

class MultiClassImageClassifier(nn.Module):
  
    # Define the init method
    def __init__(self, num_classes):
        super(MultiClassImageClassifier, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.flatten = nn.Flatten()

        # Create a fully connected layer
        self.fc = nn.Linear(16*32*32, num_classes)
        
        # Create an activation function
        self.softmax = nn.Softmax(dim=1)

#----------------------------------#

# Conclusion

"""
Great job! You now have the foundations for both binary image and multi-class image classfication tasks.
"""

'\nGreat job! You now have the foundations for both binary image and multi-class image classfication tasks.\n'

# RGB, grayscale, or alpha?

Your team obtained a new image dataset for training. Before designing a model, you want to check if the images are RGB, grayscale, or with a transparency alpha channel. Assuming all the images in the dataset are the same, you only need to check one sample image.

The torchvision.transforms.functional module has been imported as F. The sample image to check has been loaded as image.

What kind of images does the dataset consist of?

```pyhton
from PIL import Image
from torchvision.transforms import functional as F

image = Image.open('image.png')
F.get_image_num_channels(image)
3
```

### Possible answers
    
    Grayscale
    
    RGB {Answer}
    
    Alpha

In [25]:
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        
        # Create a convolutional layer
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)

In [26]:
# exercise 03

"""
Adding a new convolutional layer

Your project lead provided you with a new CNN model. Let's take a look at the model's architecture and append a new convolutional layer to it.

The model is available as CNNModel. The packages torch and torch.nn as nn have been imported.
"""

# Instructions

"""


    Instantiate a model from the CNNModel class and access the convolutional layers.

    Create a new convolutional layer with in_channels equal to existing layer's out_channels, out_channels set to 32, and stride and padding both set to 1, and assign it to conv2.

    Append the new layer to the model, calling it "conv2".

"""

# solution

# Create a model
model = CNNModel()
print("Original model: ", model)

# Create a new convolutional layer
conv2 = nn.Conv2d(16,32, stride=1, padding=1, kernel_size=3)

# Append the new layer to the model
model.add_module('conv2', conv2)
print("Extended model: ", model)

#----------------------------------#

# Conclusion

"""
Great job! Knowing the model architecture is a great way to learn about the model complexity.
"""

Original model:  CNNModel(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
Extended model:  CNNModel(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)


'\nGreat job! Knowing the model architecture is a great way to learn about the model complexity.\n'

In [27]:
# exercise 04

"""
Creating a sequential block

You decided to redesign your binary CNN model template by creating a block of convolutional layers. This will help you stack multiple layers sequentially. With this improved model, you will be able to easily design various CNN architectures.

torch and torch.nn as nn have been imported.
"""

# Instructions

"""

    In the __init__() method, define a block of convolutional layers and assign it to self.conv_block.

    In the forward() pass, pass the inputs through the convolutional block you defined.

"""

# solution

class BinaryImageClassification(nn.Module):
  def __init__(self):
    super(BinaryImageClassification, self).__init__()
    # Create a convolutional block
    self.conv_block = nn.Sequential(
      nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1),
      nn.ReLU(),
      nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
      nn.ReLU(),
    )
    
  def forward(self, x):
    # Pass inputs through the convolutional block
    x = self.conv_block(x)
    return x

#----------------------------------#

# Conclusion

"""
Congratulations! With this knowledge, you can now efficiently stack multiple layers to build complex CNN architectures for various image processing tasks.
"""

'\nCongratulations! With this knowledge, you can now efficiently stack multiple layers to build complex CNN architectures for various image processing tasks.\n'

In [None]:
# exercise 05

"""
Save and load a model

A manufacturing company wants to classify their projects based on images and determine the appropriate shipping packaging. Having trained a highly accurate model in PyTorch, you now plan to save the model and its pre-trained weights for future use and to share it with your team, making sure they can seamlessly load it.

torch and torch.nn as nn have been imported. The pre-trained model object is available in your workspace as model, and its architecture as ManufacturingCNN.
"""

# Instructions

"""


    Save the pre-trained model as ModelCNN.pth remembering to save the weights, not only the architecture.

    Create a model instance called loaded_model from the class ManufacturingCNN().

    Load ModelCNN.pth weights to loaded_model by passing the weights to .load_state_dict().

"""

# solution

# Save the model
torch.save(model.state_dict(), 'ModelCNN.pth')

# Create a new model
loaded_model = ManufacturingCNN()

# Load the saved model
loaded_model.load_state_dict(torch.load('ModelCNN.pth'))
print(loaded_model)

#----------------------------------#

# Conclusion

"""
Great! Saving and later re-using models make your workflow more efficient.
"""

'\n\n'

In [None]:
# exercise 06

"""
Loading a pre-trained model

You are building an application to label images from the social media. This task requires high accuracy and speed. You are going to use a pre-trained ResNet18 model to infer image classes.
"""

# Instructions

"""

    Import resnet18 and ResNet18_Weights from torchvision.models.

    Instantiate the model using resnet18(), setting the weights parameter to weights.

    Set model to the evaluation mode.

    Initialize the input transforms and assign them to transform.

"""

# solution

# Import resnet18 model
from torchvision.models import resnet18, ResNet18_Weights

# Initialize model with default weights
weights = ResNet18_Weights.DEFAULT
model = resnet18(weights=weights)

# Set model to evaluation mode
model.eval()

# Initialize the transforms
transform = weights.transforms()

#----------------------------------#

# Conclusion

"""
Well done! Using DEFAULT weights is very convenient as it will provide you with the latest version of weights.
"""

'\n\n'

In [None]:
# exercise 07

"""
Image classification with ResNet

You have created the model from the pre-trained ResNet18. Now, it is time to test it on an example image.

You are going to apply preprocessing transforms to an image and classify it. You will need to use the softmax() layer followed by the argmax(), since ResNet18 has been trained on a multi-class dataset.

You have selected the following image to use for prediction testing: A cup of espresso

The preprocessing transform is saved as transform. The PIL image is uploaded as img.
"""

# Instructions

"""

    Apply the preprocessing transforms to the image and reshape it using .unsqueeze(0) to add the batch dimension.

    Pass the image through the model, reshape the output using .squeeze(0) to remove the batch dimension, and add a softmax() layer.

    Apply argmax() to select the highest-probability class.

"""

# solution

# Apply preprocessing transforms
batch = preprocess(img).unsqueeze(0)

# Apply model with softmax layer
prediction = model(batch).squeeze(0).softmax(0)

# Apply argmax
class_id = prediction.argmax().item()
score = prediction[class_id].item()
category_name = weights.meta["categories"][class_id]
print(category_name)

#----------------------------------#

# Conclusion

"""
Well done! With the ResNet pre-trained model you are able to classify images without training and without any additional labeled data!
"""

'\n\n'