# A3 JACOB Paul





## Download and upzip data

In [None]:
!wget -nc https://www.di.ens.fr/willow/teaching/recvis18orig/assignment3/bird_dataset.zip
!unzip -q bird_dataset

## Import libraries

In [None]:
import zipfile
import os
import argparse
from tqdm import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import PIL
import PIL.Image as Image

import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F

import torchvision.models as models
import torchvision.transforms as transforms

from torchvision import datasets
from torch.autograd import Variable

## Data cropping using R-CNN

Loading R-CNN

In [None]:
rcnn = models.detection.maskrcnn_resnet50_fpn(pretrained=True)
use_cuda = torch.cuda.is_available()

if use_cuda:
    print('Using GPU')
    rcnn.cuda()
else:
    print('Using CPU')

rcnn.eval()

print()

Generate cropped dataset

In [None]:
##Parameters

data_dir = 'bird_dataset'
margins_train = [0.1,0.2,0.5]
margins_val = [0.2]
margins_test = [0.2]
check_all_labels = True

##Core loop

!rm -rf cropped_data/

#Stores the birds which have not been cropped
outliers = []

def pil_loader(path):
    with open(path, 'rb') as f:
        with Image.open(f) as img:
            return img.convert('RGB')
  

for dir in os.listdir(data_dir) :
  print()
  print("Retrieving birds in", dir)
  dataset = os.path.join(data_dir,dir)
  
  if dir == "train_images":
    margins = margins_train
  elif dir == "val_images":
    margins = margins_val
  elif dir == "test_images":
    margins = margins_test
    assert len(margins) == 1
    
  for bird_class in os.listdir(dataset):

    print("Working on class", bird_class)

    bird_class_data = os.path.join(dataset,bird_class)
    if not os.path.isdir(os.path.join("cropped_data",bird_class_data)):
      os.makedirs(os.path.join("cropped_data",bird_class_data))

    for bird in os.listdir(bird_class_data):
      
      #Retrieve bird image, load it and apply Mask R-CNN
      bird_path = os.path.join(bird_class_data,bird)
      data = transforms.ToTensor()(pil_loader(bird_path))
      data = data.view(1, data.size(0), data.size(1), data.size(2))
      data_c = data.cuda()
      outputs = rcnn(data_c)

      #Look for a bird in the image (in the whole list if "check_all_labels")
      labels = outputs[0]['labels']
      good_label = 0
      if check_all_labels:
        for i,lab in enumerate(labels):
          if lab == 16:
            good_label = i
            break

      #Crop the image if a bird has been retrieved
      if len(labels) > 0 and labels[good_label] == 16 :

        x1,y1,x2,y2 = outputs[0]['boxes'][good_label]

        y = y2-y1
        x = x2-x1

        for margin in margins:

          x1 = max(int(x1-margin*x),0)
          x2 = min(int(x2+margin*x),len(data[0,0,0,:])-1)
          y1 = max(int(y1-margin*y),0)
          y2 = min(int(y2+margin*y),len(data[0,0,:,0])-1)

          cropped_data = data[:,:,y1:y2,x1:x2]
          cropped_data = cropped_data[0].transpose(0,2).transpose(0,1).numpy()
      
          if dir == "test_images":
            plt.imsave(os.path.join("cropped_data",bird_path),cropped_data)
          else:
            os.path.join(bird_class_data,bird)
            plt.imsave(os.path.join("cropped_data",os.path.join(bird_class_data,str(int(100*margin))+bird)),cropped_data)

      #Otherwise saves the image as the original
      else:
        print("WARNING: Bird has not been cropped in this image:", bird_path)
        outliers.append(bird_path)
        cropped_data = data[:,:,:,:]
        cropped_data = cropped_data[0].transpose(0,2).transpose(0,1).numpy()
        plt.imsave(os.path.join("cropped_data",bird_path),cropped_data)

Check that the transformed test dataset has been adequately processed

In [None]:
for dir in os.listdir(data_dir) :
  dataset = os.path.join(data_dir,dir)
  if dataset == "test_images":
    for bird_class in os.listdir(dataset):
      bird_class_data = os.path.join(dataset,bird_class)
      for bird in os.listdir(bird_class_data):
        if not os.path.isfile(os.path.join("cropped_data",bird_path)) :
          raise ValueError("One image has not been processed !")

print("The transformed test set looks good")

(Optional) Take a look at the list of outliers

In [None]:
look_at_outliers = True

if look_at_outliers:
  for outlier_path in outliers:

    img = pil_loader(outlier_path)

    data = transforms.ToTensor()(img)
    data = data.view(1, data.size(0), data.size(1), data.size(2))
    data_c = data.cuda()
    outputs = rcnn(data_c)
    labels = outputs[0]['labels'].cpu().numpy()

    print("Path:",outlier_path)
    print("Labels:",labels)
    print("Original image:")
    plt.imshow(img)
    plt.show()

    good_label = -1
    for i,lab in enumerate(labels):
      if lab == 16:
        good_label = i
        break

    if good_label > -1 :

      x1,y1,x2,y2 = outputs[0]['boxes'][good_label]
      y = y2-y1
      x = x2-x1
        
      x1 = max(int(x1-0.2*x),0)
      x2 = min(int(x2+0.2*x),len(data[0,0,0,:])-1)
      y1 = max(int(y1-0.2*y),0)
      y2 = min(int(y2+0.2*y),len(data[0,0,:,0])-1)
      
      cropped_data = data[:,:,y1:y2,x1:x2]
      cropped_data = cropped_data[0].transpose(0,2).transpose(0,1).numpy()
      print("First bird found:",labels)
      plt.imshow(cropped_data)
      plt.show()

(Optional) Zip cropped data to save it

In [None]:
!zip -q -r /content/cropped_data.zip /content/cropped_data

## Data transformations

In [None]:
img_size = (256,256)
img_crop_size = (224,224)
persp_distortion_scale=0.25
rotation_range=(-10.0,10.0)
random_crop_scale = (0.9, 1.0)
hue_variation_scale = .02
saturarion_variation_scale = .02

data_transforms_train_cropped = transforms.Compose([
                                            #transforms.RandomResizedCrop(img_crop_size, scale=random_crop_scale),
                                            #transforms.ColorJitter(hue=hue_variation_scale, saturation=saturarion_variation_scale),
                                            transforms.Resize(img_crop_size),
                                            transforms.RandomHorizontalFlip(),
                                            transforms.RandomPerspective(distortion_scale=persp_distortion_scale, p=0.5, interpolation=3),
                                            transforms.RandomRotation(rotation_range, resample=False, expand=False, center=None, fill=None),
                                            transforms.ToTensor(),
                                            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                                            ])


data_transforms_val_test_cropped = transforms.Compose([
                                          transforms.Resize(img_crop_size),
                                          transforms.ToTensor(),
                                          transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                                          ])

## Classification Models

In [None]:
num_classes = 20

In [None]:
class TransferedResnet(nn.Module):
  
  def __init__(self,resnet=50, pretrained = True, layers_to_freeze=[]):
    super(TransferedResnet,self).__init__()
    if resnet==18:
      self.resnet = models.resnet18(pretrained=pretrained)
    elif resnet==34:
      self.resnet = models.resnet34(pretrained=pretrained)
    elif resnet==50:
      self.resnet = models.resnet50(pretrained=pretrained)
    elif resnet==101:
      self.resnet = models.resnet101(pretrained=pretrained)
    elif resnet==152:
      self.resnet = models.resnet152(pretrained=pretrained)

    for layer in layers_to_freeze:

      if layer == 'conv1':
        for param in self.resnet.conv1.parameters():
          param.requires_grad = False
      if layer == 'bn1':
        for param in self.resnet.bn1.parameters():
          param.requires_grad = False
      if layer == 'layer1':
        for param in self.resnet.layer1.parameters():
          param.requires_grad = False
      if layer == 'layer2':
        for param in self.resnet.layer2.parameters():
          param.requires_grad = False
      if layer == 'layer3':
        for param in self.resnet.layer3.parameters():
          param.requires_grad = False
      if layer == 'layer4':
        for param in self.resnet.layer4.parameters():
          param.requires_grad = False

    num_features = self.resnet.fc.in_features
    self.resnet.fc = nn.Linear(num_features, num_classes)

  def forward(self, input):
    return self.resnet(input)

In [None]:
class TransferedInception(nn.Module):
  
  def __init__(self, pretrained = True, frozen=True):
    super(TransferedInception,self).__init__()

    self.inception = models.inception_v3(pretrained=pretrained)
    self.inception.aux_logits = False

    if frozen:
      for param in self.inception.parameters():
        param.requires_grad = False

    num_features = self.inception.fc.in_features
    self.inception.fc = nn.Linear(num_features, num_classes)

  def forward(self, input):
    return self.inception(input)

In [None]:
class TransferedResneXt(nn.Module):
  
  def __init__(self,resneXt=50, pretrained = True, layers_to_freeze=[]):

    super(TransferedResneXt,self).__init__()

    if resneXt==50:
      self.resnext = models.resnext50_32x4d(pretrained=pretrained)
    elif resneXt==101:
      self.resnext = models.resnext101_32x8d(pretrained=pretrained)

    for layer in layers_to_freeze:

      if layer == 'conv1':
        for param in self.resnext.conv1.parameters():
          param.requires_grad = False
      if layer == 'bn1':
        for param in self.resnext.bn1.parameters():
          param.requires_grad = False
      if layer == 'layer1':
        for param in self.resnext.layer1.parameters():
          param.requires_grad = False
      if layer == 'layer2':
        for param in self.resnext.layer2.parameters():
          param.requires_grad = False
      if layer == 'layer3':
        for param in self.resnext.layer3.parameters():
          param.requires_grad = False
      if layer == 'layer4':
        for param in self.resnext.layer4.parameters():
          param.requires_grad = False

    num_features = self.resnext.fc.in_features
    self.resnext.fc = nn.Linear(num_features, num_classes)

  def forward(self, input):
    return self.resnext(input)

In [None]:
class TransferedDensenet(nn.Module):
  
  def __init__(self,densenet=169, pretrained = True, layers_to_freeze=[]):

    super(TransferedDensenet,self).__init__()

    if densenet==121:
      self.densenet = models.densenet121(pretrained=pretrained)
    elif densenet==161:
      self.densenet = models.densenet161(pretrained=pretrained)
    elif densenet==169:
      self.densenet = models.densenet169(pretrained=pretrained)
    elif densenet==201:
      self.densenet = models.densenet201(pretrained=pretrained)


    for layer in layers_to_freeze:

      if layer == 'conv0':
        for param in self.densenet.features.conv0.parameters():
          param.requires_grad = False
      if layer == 'norm0':
        for param in self.densenet.features.norm0.parameters():
          param.requires_grad = False
      if layer == 'denseblock1':
        for param in self.densenet.features.denseblock1.parameters():
          param.requires_grad = False
      if layer == 'transition1':
        for param in self.densenet.features.transition1.parameters():
          param.requires_grad = False
      if layer == 'denseblock2':
        for param in self.densenet.features.denseblock2.parameters():
          param.requires_grad = False
      if layer == 'transition2':
        for param in self.densenet.features.transition2.parameters():
          param.requires_grad = False
      if layer == 'denseblock3':
        for param in self.densenet.features.denseblock3.parameters():
          param.requires_grad = False    
      if layer == 'transition3':
        for param in self.densenet.features.transition3.parameters():
          param.requires_grad = False      
      if layer == 'denseblock4':
        for param in self.densenet.features.denseblock4.parameters():
          param.requires_grad = False

    num_features = self.densenet.classifier.in_features
    self.densenet.classifier = nn.Linear(num_features, num_classes)

  def forward(self, input):
    return self.densenet(input)

## Training

Settings

In [None]:
data_dir = 'bird_dataset'
batch_size = 128
epochs = 2
random_seed = 1
log_interval = 10
results_dir = 'results'

use_cuda = torch.cuda.is_available()
torch.manual_seed(random_seed)

if not os.path.isdir(results_dir):
    os.makedirs(results_dir)

Loading data

In [None]:
load_cropped_data = True

if load_cropped_data:
  train_loader = torch.utils.data.DataLoader(datasets.ImageFolder(os.path.join("cropped_data", data_dir + '/train_images'),transform=data_transforms_train_cropped),batch_size=batch_size, shuffle=True, num_workers=0)
  val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(os.path.join("cropped_data", data_dir + '/val_images'),transform=data_transforms_val_test_cropped),batch_size=batch_size, shuffle=False, num_workers=0)

else:
  train_loader = torch.utils.data.DataLoader(datasets.ImageFolder(data_dir + '/train_images',transform=data_transforms_train),batch_size=batch_size, shuffle=True, num_workers=0)
  val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(data_dir + '/val_images',transform=data_transforms_val_test),batch_size=batch_size, shuffle=False, num_workers=0)


Selecting model and optimizer

In [None]:
## Settings

which_resnet = 152
which_resnext = 101
which_densenet = 201
pretrained = True
layers_to_freeze_resnet = ['conv1','bn1','layer1','layer2','layer3']
layers_to_freeze_densenet = ['conv0','norm0','denseblock1','transition1','denseblock2','transition2','denseblock3','transition3']
lr = 0.0001

## Choose the model

model = TransferedResnet(resnet=which_resnet,pretrained=pretrained,layers_to_freeze=layers_to_freeze_resnet)
#model = TransferedResneXt(resneXt=which_resnext,pretrained=pretrained,layers_to_freeze=layers_to_freeze_resnet)
#model = TransferedInception(pretrained=pretrained)
#model = TransferedDensenet(densenet=which_densenet,pretrained=pretrained,layers_to_freeze=layers_to_freeze_densenet)

## Choose the optimizer

optimizer = optim.Adam(model.parameters(), lr=lr)
#optimizer = optim.SGD(model.parameters(),lr=lr,momentum=0.9)

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True, factor=0.5)

if use_cuda:
    print('Using GPU')
    model.cuda()
else:
    print('Using CPU')

Training and validation methods

In [None]:
def train(epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        if use_cuda:
            data, target = data.cuda(), target.cuda()
        optimizer.zero_grad()
        output = model(data)
        criterion = torch.nn.CrossEntropyLoss(reduction='mean')
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data.item()))

def validation():
    model.eval()
    validation_loss = 0
    correct = 0
    for data, target in val_loader:
        if use_cuda:
            data, target = data.cuda(), target.cuda()
        output = model(data)
        # sum up batch loss
        criterion = torch.nn.CrossEntropyLoss(reduction='mean')
        validation_loss += criterion(output, target).data.item()
        # get the index of the max log-probability
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    validation_loss /= len(val_loader.dataset)
    scheduler.step(validation_loss)
    print('\nValidation set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
        validation_loss, correct, len(val_loader.dataset),
        100. * correct / len(val_loader.dataset)))

Training loop

In [None]:
for epoch in range(1, epochs + 1):
    train(epoch)
    validation()
    model_file = results_dir + '/model_' + str(epoch) + '.pth'
    torch.save(model.state_dict(), model_file)
    print('Saved model to ' + model_file + '. You can run `python evaluate.py --model ' + model_file + '` to generate the Kaggle formatted csv file\n')


## Evaluation

Evaluation settings

In [None]:
model_path = 'results/model_2.pth'
outfile = 'results/kaggle_resnet152_batch_128_frozen_layers_crops_02_05_08_2.csv'

Load model and weights

In [None]:
state_dict = torch.load(model_path)

model.load_state_dict(state_dict)
model.eval()
if use_cuda:
    print('Using GPU')
    model.cuda()
else:
    print('Using CPU')

Evaluation

In [None]:
if load_cropped_data:
  test_dir = os.path.join("cropped_data",data_dir + '/test_images/mistery_category')

def pil_loader(path):
    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
    with open(path, 'rb') as f:
        with Image.open(f) as img:
            return img.convert('RGB')


output_file = open(outfile, "w")
output_file.write("Id,Category\n")
for f in tqdm(os.listdir(test_dir)):
    if 'jpg' in f:
        data = data_transforms_val_test_cropped(pil_loader(test_dir + '/' + f))
        data = data.view(1, data.size(0), data.size(1), data.size(2))
        if use_cuda:
            data = data.cuda()
        output = model(data)
        pred = output.data.max(1, keepdim=True)[1]
        output_file.write("%s,%d\n" % (f[:-4], pred))

output_file.close()

print("Succesfully wrote " + outfile + ', you can upload this file to the kaggle competition website')

## Majority voting

This part requires to upload the CSV files manually in the folder given by 'csv_folder' before running it.

In [None]:
csv_folder = 'csv_submissions'
majority_outfile = 'results/kaggle_majority_4.csv'

In [None]:
output_file = open(majority_outfile, "w")
output_file.write("Id,Category\n")

csvs = []

for file in os.listdir(csv_folder):
    if file.endswith(".csv"):
        csvs.append(pd.read_csv(os.path.join(csv_folder,file)))

nb_labels = [0 for i in range(len(csvs))]
occurences_max = [0 for i in range(len(csvs))]

for f in tqdm(os.listdir(test_dir)):
  if 'jpg' in f:
    img_name = f[:-4]
    labels = []
    for csv in csvs:
      label = csv[csv['Id'] == img_name].iloc[0]['Category']
      labels.append(label)

    unique, counts = np.unique(labels, return_counts=True)
    majority_label = unique[np.argmax(counts)]

    nb_labels[len(unique)-1] +=1
    occurences_max[max(counts)-1] +=1

    output_file.write("%s,%d\n" % (f[:-4], majority_label))
    
output_file.close()

print("Succesfully wrote " + majority_outfile + ', you can upload this file to the kaggle competition website')
print("The distribution of the number of labels per image is:", nb_labels)
print("The distribution of the number of occurences of the most common label is:", occurences_max)