# Exercise 2

In this exercise, we are going to use ResNet and tranformer for roof-type classfication. With this exercise, you will learn how to use the idea of transfer learning for image classification. First, we need to download the dataset,

In [None]:
# Import necessary modules:
import gdown
import os

# Mount your Google Drive for local file access. 
# This allows saving files, e.g., trained model files, into your Google Drive (optional):
#from google.colab import drive
#drive.mount('/content/drive')

# Obtain the dataset using its dowloadable public Google Drive link and unzip it. 
# This is a good website for generating dowloadable links for Google Drive files:
# https://sites.google.com/site/gdocs2direct/home

fileURL = 'https://drive.google.com/uc?export=download&id=11STs84PFHXdfZm-VhvRoM8AnxotIdbIu'
fileOut = 'dataset.zip'
gdown.download(fileURL, fileOut, quiet=False)

!unzip dataset.zip -d dataset


In [None]:
# Show the contents of the dataset folder
os.listdir('dataset')

Import necessary modules of Pytorch

In [None]:
import torch
import torchvision
from torchvision import transforms

print(torch.cuda.is_available())

## Roof-type classification with pre-trained ResNet and tranformer

Set random seed

In [None]:
random_seed = 9999
torch.backends.cudnn.enabled = False
torch.manual_seed(random_seed)

Load the dataset using Pytorch API. 

In [None]:
batch_size_train = 64
batch_size_test = 1000

def get_transform():

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_transforms = [
        transforms.RandomResizedCrop(224, scale=(0.7, 1.)),
        transforms.RandomGrayscale(p=0.5),
        transforms.ColorJitter(0.5, 0.5, 0.5, 0.5),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.ToTensor(),
        normalize]

    train_transforms = transforms.Compose(train_transforms)

    val_transforms = [transforms.Resize((224, 224)),
        transforms.ToTensor(),
        normalize]

    val_transforms = transforms.Compose(val_transforms)

    return train_transforms, val_transforms


def Get_RoofTypeImages(transforms, mode = 'train'):

    if mode == 'train':

        root_dir = "./dataset/roof_type/Roof_Training_Images_V1/"

    else:

        root_dir = "./dataset/roof_type/satellite-images-val/"
        
    data = torchvision.datasets.ImageFolder(root=root_dir, transform=transforms)

    return data



train_transforms, val_transforms = get_transform()


train_dataset =  Get_RoofTypeImages(train_transforms, 'train')

print (len(train_dataset))

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size = batch_size_train, shuffle=True, num_workers=2)


val_dataset =  Get_RoofTypeImages(val_transforms, 'val')

print (len(val_dataset))

test_loader = torch.utils.data.DataLoader(val_dataset, batch_size = batch_size_test, shuffle=False, num_workers=2)


Let's show some example images.

In [None]:
examples = enumerate(test_loader)
batch_idx, (example_data, example_targets) = next(examples)

In [None]:
import matplotlib.pyplot as plt


fig = plt.figure(figsize = (20,10))

for i in range(5):
  plt.subplot(1,5,i+1)
  plt.tight_layout()
  plt.imshow(example_data[i][0].numpy())
  plt.xticks([])
  plt.yticks([])

In [None]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In the first experiment, use a pretrained ResNet18 with Pytorch and train the model for one epoch. In the second experiment, use a pretrained transformer ("vit_tiny_patch16_224"), you may need to pip install timm to load pretrained transformers. 

In [None]:
### your code goes here

# Load the pretrained model using torchvision.models


# Get the dimension of the input for the last layer


# Change the number of classes to 3



print (network)

For training the network, we need to specify the optimizer. 

In [None]:
n_epochs = 1
learning_rate = 0.01
momentum = 0.9
log_interval = 10

optimizer = optim.SGD(network.parameters(), lr=learning_rate,
                      momentum=momentum)


In [None]:
train_losses = []
test_losses = []

In [None]:
def train(epoch):
  network.train()
   
  train_loss = 0

  for batch_idx, (data, target) in enumerate(train_loader):
    optimizer.zero_grad()
    
    output = network(data)
    
    loss = F.cross_entropy(output, target)
    
    train_loss += loss.item()
    
    loss.backward()
    
    optimizer.step()
    
    if batch_idx % log_interval == 0:
      print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
        epoch, batch_idx * len(data), len(train_loader.dataset),
        100. * batch_idx / len(train_loader), loss.item()))
  

  train_loss /= len(train_loader.dataset)
  train_losses.append(train_loss)
        

In [None]:
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sn

def test():
  network.eval()

  test_loss = 0
  correct = 0


  predictions = []
  ground_truths = []

  with torch.no_grad():
    for data, target in test_loader:
      output = network(data)
      test_loss += F.cross_entropy(output, target, size_average=False).item()
      pred = output.data.max(1, keepdim=True)[1]
      correct += pred.eq(target.data.view_as(pred)).sum()

      predictions.extend(pred.detach().cpu().tolist())  # Should do the detach internally
      ground_truths.extend(target.detach().cpu().tolist())

  test_loss /= len(test_loader.dataset)
  test_losses.append(test_loss)

  print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(test_loader.dataset),
    100. * correct / len(test_loader.dataset)))
  

  cf = confusion_matrix(ground_truths, predictions)

  fig = plt.figure()
  ax = fig.add_subplot(111)

  df_cm = pd.DataFrame(cf, range(3), range(3))
  print (df_cm)

  #sn.set(font_scale=1.4) # for label size
  sn.heatmap(df_cm, annot=True, cmap='Blues', annot_kws={"size": 10}, fmt='g') # font size

  plt.xlabel("Class Index", fontsize=14)
  plt.ylabel("Class Index", fontsize=14)
  plt.title(" Confusion matrix", fontsize=16)
  ax.tick_params(axis='x', colors='red')
  ax.tick_params(axis='y', colors='red')


  plt.show()

In [None]:
for epoch in range(1, n_epochs + 1):
  
  train(epoch)
  test()