# Handling Images with PyTorch

**Loading Images to PyTorch**

In [None]:
from torchvision.datasets import ImageFolder
from torchvision import transforms

train_transforms = transforms.Compose([
      transforms.ToTensor(),
      transforms.Resize((128, 128)),   ### Ensure all images are same size
])

dataset_train = ImageFolder(
    "data/clouds_train",
    transform=train_transforms,
)

**Displaying images**

In [None]:
dataloader_train = DataLoader(
      dataset_train,
      shuffle=True,
      batch_size=1,
)

image, label = next(iter(dataloader_train))
print(image.shape)


image = image.squeeze().permute(1, 2, 0)
print(image.shape)


import matplotlib.pyplot as plt
plt.imshow(image)
plt.show()

**Data Augmentation**

In [None]:
train_transforms = transforms.Compose([
      transforms.RandomHorizontalFlip(),
      transforms.RandomRotation(45),
      transforms.ToTensor(),
      transforms.Resize((128, 128)),
])

dataset_train = ImageFolder(
"data/clouds/train",
transform=train_transforms,
)

In [None]:
"""

Compose two transformations, the first, to parse the image to a tensor, and one to resize the image to 128 by 128, assigning them to train_transforms.
Use ImageFolder to define dataset_train, passing it the directory path to the data ("clouds_train") and the transforms defined earlier.

"""

from torchvision.datasets import ImageFolder
from torchvision import transforms

# Compose transformations
train_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((128 , 128)),
])

# Create Dataset using ImageFolder
dataset_train = ImageFolder(
    "clouds_train",
    transform=train_transforms,
)

In [None]:
"""

Add two more transformations to train_transforms to perform a random horizontal flip and then a rotation by a random angle between 0 and 45 degrees.
Reshape the image tensor from the DataLoader to make it suitable for display.
Display the image

"""


train_transforms = transforms.Compose([
    # Add horizontal flip and rotation
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(45),
    transforms.ToTensor(),
    transforms.Resize((128, 128)),
])

dataset_train = ImageFolder(
  "clouds_train",
  transform=train_transforms,
)

dataloader_train = DataLoader(
  dataset_train, shuffle=True, batch_size=1
)

image, label = next(iter(dataloader_train))
# Reshape the image tensor
image = image.squeeze().permute(1, 2, 0)
# Display the image
plt.imshow(image)
plt.show()

# Convolutional Neural Networks

**Convolutional Layer**

In [None]:
"""

In a convolutional layer, parameters are collected in one or more small grids called filters. These filters slide over the input,
performing convolution operations at each position to create a feature map.

Here, we slide a 3-by-3 filter over a 5-by-5 input to get a 3-by-3 feature map. A feature map preserves spatial patterns from the input
and uses fewer parameters than a linear layer. In a convolutional layer, we can use many filters. Each results in a separate feature map.

Finally, we apply activations to each feature map. All the feature maps combined form the output of a convolutional layer.
In PyTorch, we use nn.Conv2d to define a convolutional layer. We pass it the number of input and output feature maps.
"""

**Convolution**

In [None]:
"""

In the context of deep learning, a convolution is the dot product between two arrays, the input patch and the filter.
Dot product is element-wise multiplication between the corresponding elements.

We sum all values in the outcome array, returning a single value that becomes part of the output feature map.

"""

**Zero-padding**

In [None]:
"""

Before a convolutional layer processes its input, we often add zeros around it, a technique called zero-padding.
This is done with the padding argument in the convolutional layer. It helps maintain the spatial dimensions of the input and output,
and ensures equal treatment of border pixels. Without padding, the pixels at the border would have a filter slide over them fewer times resulting in information loss.

"""

nn.Conv2d(
    3, 32, kernel_size=3, padding=1
)

**Max Pooling**

In [None]:
"""

Max Pooling is another operation commonly used after convolutional layers. In it, we slide a non-overlapping window over the input.
At each position, we select the maximum value from the window to pass forward. Using a window of two-by-two as shown here halves the input's height and width.
This operation reduces the spatial dimensions of the feature maps, reducing the number of parameters and computational complexity in the network.

In PyTorch, we use nn.MaxPool2d to define a max pooling layer, passing it the kernel size

"""

nn.MaxPool2d(kernel_size=2)

**Convolutional Neural Network**

In [None]:
"""

Our Convolutional Neural Network will have two parts: a feature extractor and a classifier.

Feature extractor has convolution, activation, and max pooling layers repeated twice. The first two arguments in Conv2d are the numbers of input
and output feature maps. The first Conv2d has three input feature maps corresponding to the RGB channels. We use filters of size 3 by 3 set by the kernel_size argument
and zero-padding by setting padding to 1. For max pooling, we use the MaxPool2d layer with a window of size 2 to halve the feature map in height and width.
Finally, we flatten the feature extractor output into a vector.

Our classifier consists of a single linear layer. The output is the number of target classes, the model's argument.

The forward method applies the extractor and classifier to the input image.

"""


class Net(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.feature_extractor = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ELU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ELU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Flatten(),
        )

        self.classifier = nn.Linear(64*16*16, num_classes)

    def forward(self, x):
        x = self.feature_extractor(x)
        x = self.classifier(x)
        return x

In [None]:
"""

Define the feature_extractor part of the model by adding another convolutional layer with 64 output feature maps, the ELU activation,
and a max pooling layer with a window of size two; at the end, flatten the output.

Define the classifier part of the model as a single linear layer with a number of inputs that reflects an input image of 64x64 and the feature extractor defined;
the classifier should have num_classes outputs.

In the forward() method, pass the input image x first through the feature extractor and then through the classifier.

"""

class Net(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        # Define feature extractor
        self.feature_extractor = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ELU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ELU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Flatten(),
        )

        self.classifier = nn.Linear(64*16*16 , num_classes)

    def forward(self, x):
        # Pass input through feature extractor and classifier
        x = self.feature_extractor(x)
        x = self.classifier(x)
        return x

# Training Image Classifiers

**Image classifier training loop**

In [None]:
net = Net(num_classes=7)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

for epoch in range(10):
    for images, labels in dataloader_train:
        optimizer.zero_grad()
        outputs = net(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

In [None]:
"""

Define train_transforms by composing together five transformations: a random horizontal flip, random rotation (by angle from 0 to 45 degrees),
random automatic contrast adjustment, parsing to tensor, and resizing to 64 by 64 pixels.

"""


# Define transforms
train_transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(45),
    transforms.RandomAutocontrast(),
    transforms.ToTensor(),
    transforms.Resize((64 , 64)),
])

dataset_train = ImageFolder(
  "clouds_train",
  transform=train_transforms,
)
dataloader_train = DataLoader(
  dataset_train, shuffle=True, batch_size=16
)

In [None]:
"""

Define the model using your Net class with num_classes set to 7 and assign it to net.
Define the loss function as cross-entropy loss and assign it to criterion.
Define the optimizer as Adam, passing it the model's parameters and the learning rate of 0.001, and assign it to optimizer.
Start the training for-loop by iterating over training images and labels of dataloader_train.

"""



# Define the model
net = Net(num_classes = 7)
# Define the loss function
criterion = nn.CrossEntropyLoss()
# Define the optimizer
optimizer = optim.Adam(net.parameters() , lr = 0.001)

for epoch in range(3):
    running_loss = 0.0
    # Iterate over training batches
    for images, labels in dataloader_train:
        optimizer.zero_grad()
        outputs = net(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    epoch_loss = running_loss / len(dataloader_train)
    print(f"Epoch {epoch+1}, Loss: {epoch_loss:.4f}")

# Evaluating Image Classifiers

**Averaging multi-class metrics**

In [None]:
"""

For example, for recall, we pass average as none to get seven recall scores, one for each class, or we can set it to micro, macro, or weighted.
But when to use each of them? If our dataset is highly imbalanced, micro-average is a good choice because it takes into account the class imbalance.
Macro-averaging treats all classes equally regardless of their size. It can be a good choice if you care about performance on smaller classes,

even if those classes have fewer data points. Weighted averaging is a good choice when class imbalance is a concern and you consider errors in larger classes as more important.



When to use each:
 Micro: Imbalanced datasets
 Macro: Care about performance on small classes
 Weighted: Consider errors in larger classes as more important

"""

from torchmetrics import Recall
recall_per_class = Recall(task="multiclass", num_classes=7, average=None)
recall_micro = Recall(task="multiclass", num_classes=7, average="micro")
recall_macro = Recall(task="multiclass", num_classes=7, average="macro")
recall_weighted = Recall(task="multiclass", num_classes=7, average="weighted")

**Evaluation Loop**

In [None]:
from torchmetrics import Precision, Recall

metric_precision = Precision(
      ask="multiclass", num_classes=7, average="macro"
  )

metric_recall = Recall(
      task="multiclass", num_classes=7, average="macro"
  )

net.eval()
with torch.no_grad():
    for images, labels in dataloader_test:
        outputs = net(images)
        _, preds = torch.max(outputs, 1)
        metric_precision(preds, labels)
        metric_recall(preds, labels)
precision = metric_precision.compute()
recall = metric_recall.compute()

In [None]:
"""

Let's evaluate our cloud classifier with precision and recall to see how well it can classify the seven cloud types. In this multi-class classification task it is important how you average the scores over classes. Recall that there are four approaches:

Not averaging, and analyzing the results per class;
Micro-averaging, ignoring the classes and computing the metrics globally;
Macro-averaging, computing metrics per class and averaging them;
Weighted-averaging, just like macro but with the average weighted by class size.

"""



"""

Define precision and recall metrics calculated globally on all examples.

"""

# Define metrics
metric_precision = Precision(task="multiclass", num_classes=7, average="micro")
metric_recall = Recall(task="multiclass", num_classes=7, average="micro")

net.eval()
with torch.no_grad():
    for images, labels in dataloader_test:
        outputs = net(images)
        _, preds = torch.max(outputs, 1)
        metric_precision(preds, labels)
        metric_recall(preds, labels)

precision = metric_precision.compute()
recall = metric_recall.compute()
print(f"Precision: {precision}")
print(f"Recall: {recall}")


"""

Change your code to compute separate recall and precision metrics for each class and average them with a simple average.

"""

# Define metrics
metric_precision = Precision(task="multiclass", num_classes=7, average="macro")
metric_recall = Recall(task="multiclass", num_classes=7, average="macro")

net.eval()
with torch.no_grad():
    for images, labels in dataloader_test:
        outputs = net(images)
        _, preds = torch.max(outputs, 1)
        metric_precision(preds, labels)
        metric_recall(preds, labels)

precision = metric_precision.compute()
recall = metric_recall.compute()
print(f"Precision: {precision}")
print(f"Recall: {recall}")

In [None]:
"""

While aggregated metrics are useful indicators of the model's performance, it is often informative to look at the metrics per class.
 This could reveal classes for which the model underperforms.

In this exercise, you will run the evaluation loop again to get our cloud classifier's precision, but this time per-class.
Then, you will map these score to the class names to interpret them. As usual, Precision has already been imported for you. Good luck!

"""





"""

Define a precision metric appropriate for per-class results.
Calculate the precision per class by finishing the dict comprehension, iterating over the .items() of the .class_to_idx attribute of dataset_test.

"""

# Define precision metric
metric_precision = Precision(
    task="multiclass", num_classes=7, average = None
)

net.eval()
with torch.no_grad():
    for images, labels in dataloader_test:
        outputs = net(images)
        _, preds = torch.max(outputs, 1)
        metric_precision(preds, labels)
precision = metric_precision.compute()

# Get precision per class
precision_per_class = {
    k: precision[v].item()
    for k, v
    in dataset_test.class_to_idx.items()
}
print(precision_per_class)