# <center> Object recognition and computer vision
# <center> 2021/2022 
# <center> Assignement 3: Image classification         

### Realized by: Siwar Mhadhbi

---

In this notebook, we clarify our methodology and the steps we have taken to classify birds and improve the accuracy of our final model.

In [None]:
# mount the notebook with drive to import data directly

from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


# 1. Data preparation

In [None]:
path_data = '/content/drive/MyDrive/bird_dataset'
nclasses = 20

# 1.1. Validation set distribution

In [None]:
import shutil
import os

for categ in os.listdir(os.path.join(path_data,'train_images')):
  
  path_categ_train = os.path.join(os.path.join(path_data,'train_images'),categ)
  path_categ_val = os.path.join(os.path.join(path_data,'val_images'),categ)

  nbr_training_images = len(os.listdir(path_categ_train))
  nbr_val_images = len(os.listdir(path_categ_val))
  nbr_images_toTransfer = int((nbr_training_images + nbr_val_images) * 0.2 - nbr_val_images)

  if nbr_images_toTransfer > 0 :
    for i,file in enumerate(os.listdir(path_categ_train)):
        shutil.move(os.path.join(path_categ_train,file), os.path.join(path_categ_val,file))
        if i >= nbr_images_toTransfer -1 :
          break

# 1.2. Crop birds using Faster R-CNN

In [None]:
import PIL.Image as Image
import torchvision
import torch
import numpy as np
import torchvision.transforms as transforms

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

faster_rcnn = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
faster_rcnn.eval().to(device)

transform = transforms.Compose([
    transforms.ToTensor()
  ])

confidence = 0.7
for dirs in os.listdir(path_data):
    for dir in os.listdir(os.path.join(path_data, dirs)):
      for img in os.listdir(os.path.join(os.path.join(path_data, dirs),dir)):
        path_image = os.path.join(os.path.join(os.path.join(path_data, dirs),dir),img)
        image = Image.open(path_image)
        image = transform(image).to(device)
        im = image.unsqueeze(0)

        outputs = faster_rcnn(im)

        label_birds = 16
        index = None
        if label_birds in outputs[0]['labels'].cpu().numpy():
          index = list(outputs[0]['labels'].cpu().numpy()).index(label_birds)
          max_score_bird = outputs[0]['scores'].detach().cpu().numpy()[index]
          if max_score_bird >= confidence :
            pred_boxes = outputs[0]['boxes'].detach().cpu().numpy()
            bbox = pred_boxes[index].astype(np.int32)
            im = transforms.ToPILImage()(image)
            crop_image = im.crop(bbox)
            crop_image.save(path_image)

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth


  0%|          | 0.00/160M [00:00<?, ?B/s]

"\nfor dirs in os.listdir(path_data):\n    for dir in os.listdir(os.path.join(path_data, dirs)):\n      for img in os.listdir(os.path.join(os.path.join(path_data, dirs),dir)):\n        path_image = os.path.join(os.path.join(os.path.join(path_data, dirs),dir),img)\n        image = Image.open(path_image)\n        image = transform(image).to(device)\n        im = image.unsqueeze(0)\n\n        outputs = faster_rcnn(im)\n\n        label_birds = 16\n        index = None\n        if label_birds in outputs[0]['labels'].cpu().numpy():\n          index = list(outputs[0]['labels'].cpu().numpy()).index(label_birds)\n          max_score_bird = outputs[0]['scores'].detach().cpu().numpy()[index]\n          if max_score_bird >= 0.7 :\n            pred_boxes = outputs[0]['boxes'].detach().cpu().numpy()\n            bbox = pred_boxes[index].astype(np.int32)\n            im = transforms.ToPILImage()(image)\n            crop_image = im.crop(bbox)\n            crop_image.save(path_image)\n"

# 1.3. Data augmentation

In [None]:
image_size = 300 
data_transforms = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(30, fill=None),
    transforms.ToTensor(),
    # For normalization
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

data_transforms_test = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor(),
    # For normalization
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# 2. Modeling

* Resnet-50
* VGG-16
* Densenet-161


In [None]:
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models
from torch.optim import lr_scheduler

# 2.1. Model 1. ResNet-50

In [None]:
model_resnet = models.resnet50(pretrained=True)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

In [None]:
nbr_layers_tofreeze = 0
for name, child in model_resnet.named_children():
  nbr_layers_tofreeze += 1
  if nbr_layers_tofreeze < 7 :
    for name2, params in child.named_parameters():
      params.requires_grad = False
  else:
    break;

num_ftrs = model_resnet.fc.in_features

n_units = 64
model_resnet.fc = torch.nn.Sequential(
    torch.nn.Linear(num_ftrs, n_units),
    torch.nn.ReLU(),
    torch.nn.Linear(n_units, nclasses)
)

model_resnet.to(device);

# 2.2. Model 2. VGG-16

In [None]:
model_vgg = models.vgg16(pretrained=True)

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth


  0%|          | 0.00/528M [00:00<?, ?B/s]

In [None]:
nbr_layers_tofreeze = 7
for i,child in enumerate(model_vgg.children()):
  for param in child.parameters():
    param.requires_grad = False
  if i>= nbr_layers_tofreeze:
    break;

num_ftrs = model_vgg.classifier[-1].in_features
model_vgg.classifier[-1] = nn.Linear(num_ftrs, nclasses)

model_vgg.to(device);

# 2.3. Model 3. DenseNet-161

In [None]:
model_densenet = models.densenet161(pretrained=True)

Downloading: "https://download.pytorch.org/models/densenet161-8d451a50.pth" to /root/.cache/torch/hub/checkpoints/densenet161-8d451a50.pth


  0%|          | 0.00/110M [00:00<?, ?B/s]

In [None]:
nbr_layers_tofreeze = 7
for i,child in enumerate(model_densenet.children()):
  for param in child.parameters():
    param.requires_grad = False
  if i>= nbr_layers_tofreeze:
    break;

num_ftrs = model_densenet.classifier.in_features
model_densenet.classifier = nn.Linear(num_ftrs, nclasses)

model_densenet.to(device);

# 3. Pre-training



* Batch size : `batch_size = 64`
* Learning rate : `lr = 0.005`
* Momentum : `momentum = 0.8`
* Number of epochs for training : `epochs = 100`

In [None]:
import torch.optim as optim
from torchvision import datasets

use_cuda = torch.cuda.is_available()
seed = 1
torch.manual_seed(1)

batch_size = 64

train_loader = torch.utils.data.DataLoader(
    datasets.ImageFolder(path_data + '/train_images',
                         transform=data_transforms),
    batch_size=batch_size, shuffle=True, num_workers=1)

val_loader = torch.utils.data.DataLoader(
    datasets.ImageFolder(path_data + '/val_images',
                         transform=data_transforms),
    batch_size=batch_size, shuffle=False, num_workers=1)

In [None]:
# Neural network

lr = 0.005
momentum = 0.8 
log_interval = 10 
epochs = 100
experiment = 'experiment'

def train(epoch, model,optimizer):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        if use_cuda:
            data, target = data.cuda(), target.cuda()
        optimizer.zero_grad()
        output = model(data)
        criterion = torch.nn.CrossEntropyLoss(reduction='mean')
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data.item()))

def validation(model):
    model.eval()
    validation_loss = 0
    correct = 0
    for data, target in val_loader:
        if use_cuda:
            data, target = data.cuda(), target.cuda()
        output = model(data)
        # sum up batch loss
        criterion = torch.nn.CrossEntropyLoss(reduction='mean')
        validation_loss += criterion(output, target).data.item()
        # get the index of the max log-probability
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()
      
    validation_loss /= len(val_loader.dataset)
    validation_accuracy = correct/len(val_loader.dataset)*100
    print('\nValidation set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
        validation_loss, correct, len(val_loader.dataset),
        100. * correct / len(val_loader.dataset)))
    return validation_accuracy

# 4. Training

In [None]:
def training(model,name):

    if use_cuda:
        print('Using GPU')
        model.cuda()
    else:
        print('Using CPU')

    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
    step_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.8, patience=5, 
                                                       verbose=False)

    for epoch in range(1, epochs + 1):
      train(epoch, model,optimizer)
      validation_accuracy = validation(model)
      step_lr_scheduler.step(validation_accuracy)
      model_file = '/content/drive/MyDrive/Saved_models/' + experiment + '/model_' +name+ str(epoch) + '.pth'
      
      torch.save(model.state_dict(), model_file)
      print('Saved model to ' + model_file + '.\n')

# 4.1. Training Model 1. ResNet-50

In [None]:
training(model_resnet,'resnet')

Using GPU

Validation set: Average loss: 0.0511, Accuracy: 29/230 (13%)
Saved model to /content/drive/MyDrive/Saved_models/experiment/model_resnet1.pth.


Validation set: Average loss: 0.0487, Accuracy: 74/230 (32%)
Saved model to /content/drive/MyDrive/Saved_models/experiment/model_resnet2.pth.


Validation set: Average loss: 0.0432, Accuracy: 112/230 (49%)
Saved model to /content/drive/MyDrive/Saved_models/experiment/model_resnet3.pth.


Validation set: Average loss: 0.0354, Accuracy: 133/230 (58%)
Saved model to /content/drive/MyDrive/Saved_models/experiment/model_resnet4.pth.


Validation set: Average loss: 0.0280, Accuracy: 145/230 (63%)
Saved model to /content/drive/MyDrive/Saved_models/experiment/model_resnet5.pth.


Validation set: Average loss: 0.0222, Accuracy: 152/230 (66%)
Saved model to /content/drive/MyDrive/Saved_models/experiment/model_resnet6.pth.


Validation set: Average loss: 0.0177, Accuracy: 172/230 (75%)
Saved model to /content/drive/MyDrive/Saved_models/experime

# 4.2. Training Model 2. VGG-16

In [None]:
training(model_vgg,'vgg')

Using GPU

Validation set: Average loss: 0.0327, Accuracy: 124/230 (54%)
Saved model to /content/drive/MyDrive/Saved_models/experiment/model_vgg1.pth.


Validation set: Average loss: 0.0228, Accuracy: 157/230 (68%)
Saved model to /content/drive/MyDrive/Saved_models/experiment/model_vgg2.pth.


Validation set: Average loss: 0.0199, Accuracy: 157/230 (68%)
Saved model to /content/drive/MyDrive/Saved_models/experiment/model_vgg3.pth.


Validation set: Average loss: 0.0180, Accuracy: 169/230 (73%)
Saved model to /content/drive/MyDrive/Saved_models/experiment/model_vgg4.pth.


Validation set: Average loss: 0.0179, Accuracy: 166/230 (72%)
Saved model to /content/drive/MyDrive/Saved_models/experiment/model_vgg5.pth.


Validation set: Average loss: 0.0165, Accuracy: 171/230 (74%)
Saved model to /content/drive/MyDrive/Saved_models/experiment/model_vgg6.pth.


Validation set: Average loss: 0.0154, Accuracy: 167/230 (73%)
Saved model to /content/drive/MyDrive/Saved_models/experiment/model_vgg7.pt

# 4.3. Training Model 3. DenseNet-161

In [None]:
training(model_densenet,'densenet')

Using GPU

Validation set: Average loss: 0.0455, Accuracy: 74/230 (32%)
Saved model to /content/drive/MyDrive/Saved_models/experiment/model_densenet1.pth.


Validation set: Average loss: 0.0376, Accuracy: 128/230 (56%)
Saved model to /content/drive/MyDrive/Saved_models/experiment/model_densenet2.pth.


Validation set: Average loss: 0.0323, Accuracy: 149/230 (65%)
Saved model to /content/drive/MyDrive/Saved_models/experiment/model_densenet3.pth.


Validation set: Average loss: 0.0274, Accuracy: 164/230 (71%)
Saved model to /content/drive/MyDrive/Saved_models/experiment/model_densenet4.pth.


Validation set: Average loss: 0.0245, Accuracy: 165/230 (72%)
Saved model to /content/drive/MyDrive/Saved_models/experiment/model_densenet5.pth.


Validation set: Average loss: 0.0218, Accuracy: 178/230 (77%)
Saved model to /content/drive/MyDrive/Saved_models/experiment/model_densenet6.pth.


Validation set: Average loss: 0.0206, Accuracy: 176/230 (77%)
Saved model to /content/drive/MyDrive/Saved_mo

# 5. Evaluation

In [None]:
from tqdm import tqdm

use_cuda = torch.cuda.is_available()

nn_models = [model_resnet, model_densenet] # ,model_vgg]#
name_models = ['resnet93','densenet68'] # ,'vgg51']
for i,model in enumerate(nn_models): 
  model_file = '/content/drive/MyDrive/Saved_models/' + experiment + '/model_' +name_models[i]+  '.pth' 
  state_dict = torch.load(model_file)
  model.load_state_dict(state_dict)
  model.eval()

  if use_cuda:
      print('Using GPU')
      model.cuda()
  else:
      print('Using CPU')

test_dir = path_data + '/test_images/mistery_category'

def pil_loader(path):
    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
    with open(path, 'rb') as f:
        with Image.open(f) as img:
            return img.convert('RGB')

outfile =  '/content/drive/MyDrive/Saved_models/' + 'experiment/kaggle.csv'
output_file = open(outfile, "w")
output_file.write("Id,Category\n")
for f in tqdm(os.listdir(test_dir)):
    if 'jpg' in f:
        data = data_transforms_test(pil_loader(test_dir + '/' + f))
        data = data.view(1, data.size(0), data.size(1), data.size(2))
        if use_cuda:
            data = data.cuda()

        output = 0.6 * model_resnet(data) +  0.4 * model_densenet(data)
        
        pred = output.data.max(1, keepdim=True)[1]
        output_file.write("%s,%d\n" % (f[:-4], pred))

output_file.close()

print("Succesfully wrote " + outfile + ', you can upload this file to the kaggle competition website')

Using GPU
Using GPU


100%|██████████| 517/517 [01:28<00:00,  5.84it/s]

Succesfully wrote /content/drive/MyDrive/Saved_models/experiment/kaggle.csv, you can upload this file to the kaggle competition website



