# Imports and loaders

In [0]:
## Getting the datasets
!wget https://www.di.ens.fr/willow/teaching/recvis18/assignment3/bird_dataset.zip 
import zipfile
with zipfile.ZipFile("/content/bird_dataset.zip", 'r') as zip_ref:
    zip_ref.extractall()

In [0]:
import seaborn as sns
from sklearn.metrics import confusion_matrix
import pandas as pd
from tqdm import tqdm
import os
import PIL.Image as Image
import torch.nn as nn
import torch
import glob
from __future__ import print_function, division
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import copy
# Import the Model Zoo package to get Inceptionresnetv2 architecture
from cnn_finetune import make_model

In [0]:
data_transforms = {
    'train_images': transforms.Compose([
        transforms.Resize((400,400)),
        transforms.RandomApply([transforms.ColorJitter(brightness=0.2, contrast=0.1, saturation=0.05, hue=0),transforms.RandomAffine(degrees=(-45,45), shear=(20,20), resample=False),transforms.RandomHorizontalFlip()], p=0.75),
        torchvision.transforms.RandomResizedCrop((299,299)),
        torchvision.transforms.RandomPerspective(distortion_scale=0.3, p=0.5, interpolation=3),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ]),
    'val_images': transforms.Compose([
        transforms.Resize((400,400)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

#dt = {'train_images' : transforms.Compose([transforms.ToTensor()]), 'val_images': transforms.Compose([transforms.ToTensor()])}
data_dir = 'bird_dataset/'

image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),data_transforms[x]) for x in ['train_images', 'val_images']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=64,
                                             shuffle=True, num_workers=1)
              for x in ['train_images', 'val_images']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train_images', 'val_images']}
class_names = image_datasets['train_images'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")



# Creation of a new dataset with Mask RCNN
---
> The new dataset is called **birds_cropped**. It consists to the same dataset as the original but with cropped images obtained by applying the Mask-RCNN algorithm to detect birds.
>
> If a no birds is detected, we keep the **original** image.
> 
> If multiple birds are detected, we take the bounding box that has the **highest** score.
>
>
>The orginal code comes from the python demo in this link :
>https://github.com/matterport/Mask_RCNN/tree/master/mrcnn
>
> If you want to reproduce the results, you should get the following file from the GitHub repo :
* utils.py
* model.py
* visualize.py
* parallel_model.py
* config.py
* \_\_init\_\_.py

>
> Also, imports from these files should be modified (only the imports between these files in the beginning of the code) since there will be in the same repository(colab) and not in mrcnn.

In [0]:
## Load the model : COCO 
import sys
import random
import math
import skimage.io
import matplotlib
import matplotlib.pyplot as plt
os.mkdir("coco")
# Root directory of the project
ROOT_DIR = os.path.abspath("coco")

# Import Mask RCNN
sys.path.append(ROOT_DIR)  # To find local version of the library
import utils
import model as modellib
import visualize
# Import COCO config
#sys.path.append(os.path.join(ROOT_DIR, "samples/coco/"))  # To find local version
import coco

%matplotlib inline 

# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs")

# Local path to trained weights file
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
# Download COCO trained weights from Releases if needed
if not os.path.exists(COCO_MODEL_PATH):
    utils.download_trained_weights(COCO_MODEL_PATH)

# Directory of images to run detection on
IMAGE_DIR = "/content/birds_dataset/test_images/mistery_category"

In [0]:
from config import Config
class CocoConfig(Config):
    """Configuration for training on MS COCO.
    Derives from the base Config class and overrides values specific
    to the COCO dataset.
    """
    # Give the configuration a recognizable name
    NAME = "coco"

    # We use a GPU with 12GB memory, which can fit two images.
    # Adjust down if you use a smaller GPU.
    IMAGES_PER_GPU = 2

    # Uncomment to train on 8 GPUs (default is 1)
    # GPU_COUNT = 8

    # Number of classes (including background)
    NUM_CLASSES = 1 + 80  # COCO has 80 classes


# Import Mask RCNN
#sys.path.append(ROOT_DIR)
#from coco import CocoConfig
class InferenceConfig(CocoConfig):
    # Set batch size to 1 since we'll be running inference on
    # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

config = InferenceConfig()
config.display()

In [0]:
# Create model object in inference mode.
model_mrcnn = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=config)

# Load weights trained on MS-COCO
model_mrcnn.load_weights(COCO_MODEL_PATH, by_name=True)

In [0]:
# Customed function to create directory (works on Colab)
def verif_roi_results(r):
  """ either return the index of the highest scoring bird or -1 if no birds is detected"""
  ## Get index of ROI of birds
  idx = [i for i in range(len(r['class_ids'])) if r['class_ids'][i] == 15]
  if (len(idx) == 0):
    return -1
  else : 
    ## get the highest score idx for birds 
    highest_score_index = np.argmax(r['scores'][idx])
    hscore_i = idx[highest_score_index]
    return hscore_i

def extract_maskrcnn(model,type_dataset = "train"):
  """Allows to extract the birds images from original images using mask R-CNN """
  os.chdir("/content")
  if not os.path.exists("birds_cropped"):
    os.mkdir("birds_cropped")
    print("Folder birds_cropped created")
  else :
    os.chdir("/content")
  # Choose mod  
  if (type_dataset == 'test'):

    IMAGE_DIR = "/content/bird_dataset/test_images/mistery_category"
    file_names = next(os.walk(IMAGE_DIR))[2]
    os.chdir("birds_cropped")
    os.mkdir("test_images")
    os.chdir("test_images")
    os.mkdir("mistery_category")
    os.chdir("mistery_category")
    for p in file_names :
      image = skimage.io.imread(os.path.join(IMAGE_DIR, p))
      # Run detection
      
      results = model.detect([image])
      r = results[0]
      ## Processing if multiple object in the image
      idx = verif_roi_results(r)
      
        
      # Get new image from bbox outputs
      im = Image.open(os.path.join(IMAGE_DIR, p))
      if (idx == -1):
        im.save(p,'PNG')
      else : 
        newimg = torchvision.transforms.functional.crop(im, r['rois'][idx][0],r['rois'][idx][1] , r['rois'][idx][2] - r['rois'][idx][0],r['rois'][idx][3] - r['rois'][idx][1])
      # save image in the new repo
        im.close()
        newimg.save(p,"PNG")
  else :
    ## Train images
    IMAGE_DIR = "/content/bird_dataset/train_images"
    path_list = sorted(glob.glob(IMAGE_DIR +"/*"))
    name_directory = sorted([x[0][35:] for x in os.walk(IMAGE_DIR)][1:])

    os.chdir("birds_cropped")
    os.mkdir("train_images")
    os.chdir("train_images")
    for cat_path, name_cat in zip(path_list,name_directory) : 
      file_names = next(os.walk(cat_path))[2]
      os.chdir("/content/birds_cropped/train_images")
      os.mkdir(name_cat)
      os.chdir(name_cat)
      for p in file_names : 
        try :
          image = skimage.io.imread(os.path.join(cat_path, p))
          # Run detection
          
          results = model.detect([image])
          r = results[0]
          ## Processing if multiple object in the image
          idx = verif_roi_results(r)
          # Get new image from bbox outputs
          im = Image.open(os.path.join(cat_path, p))

          newimg = torchvision.transforms.functional.crop(im, r['rois'][idx][0],r['rois'][idx][1] , r['rois'][idx][2] - r['rois'][idx][0],r['rois'][idx][3] - r['rois'][idx][1])
          # save image in the new repo
          im.close()
          newimg.save(p,"PNG")
        except :
          pass
    ## Val images 
    IMAGE_DIR = "/content/bird_dataset/val_images"
    path_list = sorted(glob.glob(IMAGE_DIR +"/*"))
    name_directory = sorted([x[0][33:] for x in os.walk(IMAGE_DIR)][1:])

    os.chdir("/content/birds_cropped")
    os.mkdir("val_images")
    os.chdir("val_images")
    for cat_path, name_cat in zip(path_list,name_directory) : 
      file_names = next(os.walk(cat_path))[2]
      os.chdir("/content/birds_cropped/val_images")

      os.mkdir(name_cat)
      os.chdir(name_cat)
      for p in file_names : 
        try :
          image = skimage.io.imread(os.path.join(cat_path, p))
          # Run detection
          results = model.detect([image])
          r = results[0]
          ## Processing if multiple object in the image
          idx = verif_roi_results(r)
          # Get new image from bbox outputs
          im = Image.open(os.path.join(cat_path, p))

          newimg = torchvision.transforms.functional.crop(im, r['rois'][idx][0],r['rois'][idx][1] , r['rois'][idx][2] - r['rois'][idx][0],r['rois'][idx][3] - r['rois'][idx][1])
          # save image in the new repo
          im.close()
          newimg.save(p,"PNG")
        except : 
          pass

In [0]:
## Extract birds from val and train images 
extract_maskrcnn(model)
os.chdir('/content')

In [0]:
## Extract birds from test images
extract_maskrcnn(model,type_dataset = 'test')

# Training models
---
> We got the Inception Resnetv2 from cnn_fintuned library which is available on **Model Zoo**.
>


In [0]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train_images', 'val_images']:
            if phase == 'train_images':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train_images'):
                  ## Train inception 
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                    #loss2 = criterion(aux,labels)

                    # backward + optimize only if in training phase
                    if phase == 'train_images':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            #if phase == 'train_images':
            #    scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]
            if phase == 'train_images':
                  scheduler.step(epoch_loss)
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val_images' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

## Inception Resnet v2 trained on full images and then cropped images
---

In [0]:
# Custom classifier to add at the end of a pre-trained architecture
def make_classifier(in_features, num_classes):
    return nn.Sequential(
        nn.Linear(in_features, 4096),
        nn.ReLU(inplace=True),
        nn.Linear(4096, num_classes),
    )

model_i = make_model('inceptionresnetv2', num_classes=20, pretrained=True, input_size=(400, 400), classifier_factory=make_classifier)

## Then we need to freeze layers at the beginning (the first ones as they play the role of feature extractors)
for name, child in model_i.named_children():
        if name == '_features':
            for param in child.parameters():
                param.requires_grad = False

for name, child in model_i.named_children():
        if name == '_features':
          for n in [9,10,11,12,13,14]:
            for param in child[n].parameters() :
                param.requires_grad =True

In [0]:
model_i = model_i.to(device)

criterion = nn.CrossEntropyLoss(reduction= 'mean')
# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_i.parameters(), lr=0.01, momentum=0.9)
# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
model_i = train_model(model_i, criterion, optimizer_ft, exp_lr_scheduler,num_epochs=50)

## Resnet 152 trained on full images
---

In [0]:
model_resnet152_fi = models.resnet152(pretrained=True)
num_ftrs = model_resnet152_fi.fc.in_features
lay_unfreeze = ['layer4','layer3']
for name, child in model_resnet152_fi.named_children():
        if name in lay_unfreeze:
          if(name =='layer3'):
            list_bottleneck =[i for i in range(17,36)]
            for n in range(0,36):
              if(n>21):
                for param in child[n].parameters():
                  param.requires_grad=True
              else :
                for param in child[n].parameters():
                  param.requires_grad=False
              
            
          elif(name =='layer4') :
              print(name + 'layer4 has been unfrozen.')
              for param in child.parameters():
                param.requires_grad = True
        else:
            for param in child.parameters():
                param.requires_grad = False
# Here the size of each output sample is set to 2.
# Alternatively, it can be generalized to nn.Linear(num_ftrs, len(class_names)).
model_resnet152_fi.fc = nn.Linear(num_ftrs, 20)

model_resnet152_fi = model_resnet152_fi.to(device)

criterion = nn.CrossEntropyLoss(reduction= 'mean')

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_resnet152_fi.parameters(), lr=0.01, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

model_resnet152_fi = train_model(model_resnet152_fi, criterion, optimizer_ft, exp_lr_scheduler,num_epochs=50)

In [0]:
        transforms.RandomApply([transforms.ColorJitter(brightness=0.4, contrast=0.2, saturation=0.2, hue=0),transforms.RandomAffine(degrees=(-45,45), shear=(20,20), resample=False),transforms.RandomHorizontalFlip()], p=0.75),


## Resnet 152 trained on birds_cropped
---


In [0]:
data_transforms = {
    'train_images': transforms.Compose([
        transforms.Resize((299,299)),
        transforms.RandomApply([transforms.ColorJitter(brightness=0.2, contrast=0.1, saturation=0.05, hue=0),transforms.RandomAffine(degrees=(-45,45), shear=(20,20), resample=False),transforms.RandomHorizontalFlip()], p=0.75),
        torchvision.transforms.RandomResizedCrop((299,299)),
        torchvision.transforms.RandomPerspective(distortion_scale=0.3, p=0.5, interpolation=3),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val_images': transforms.Compose([
        transforms.Resize((299,299)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

#dt = {'train_images' : transforms.Compose([transforms.ToTensor()]), 'val_images': transforms.Compose([transforms.ToTensor()])}
data_dir = 'birds_cropped/'

image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),data_transforms[x]) for x in ['train_images', 'val_images']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=64,
                                             shuffle=True, num_workers=1)
              for x in ['train_images', 'val_images']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train_images', 'val_images']}
class_names = image_datasets['train_images'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")



################################################################################


model_ft = models.resnet152(pretrained=True)
num_ftrs = model_ft.fc.in_features
lay_unfreeze = ['layer4','layer3']
for name, child in model_ft.named_children():
        if name in lay_unfreeze:
          if(name =='layer3'):
            list_bottleneck =[i for i in range(17,36)]
            for n in range(0,36):
              if(n>21):
                for param in child[n].parameters():
                  param.requires_grad=True
              else :
                for param in child[n].parameters():
                  param.requires_grad=False
              
            
          elif(name =='layer4') :
              print(name + 'layer4 has been unfrozen.')
              for param in child.parameters():
                param.requires_grad = True
        else:
            for param in child.parameters():
                param.requires_grad = False
# Here the size of each output sample is set to 2.
# Alternatively, it can be generalized to nn.Linear(num_ftrs, len(class_names)).
model_ft.fc = nn.Linear(num_ftrs, 20)

model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss(reduction= 'mean')

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.01, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

# Evaluation
---
> Here we implemented our **emsemble method** to ouput a csv file that can be uploaded in Kaggle.

In [0]:
## Different transforms due to different image size
data_transforms_2 = {
    'val_images': transforms.Compose([
        transforms.Resize((299,299)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

data_transforms_1 = {
    'val_images': transforms.Compose([
        transforms.Resize((400,400)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

In [0]:
## The test images are first processed to detect birds in it. If a no bird is detected, then we keep the original image.
test_dir_2 = '/content/birds_cropped' + '/test_images/mistery_category'
test_dir_1 = '/content/bird_dataset' + '/test_images/mistery_category'

def pil_loader(path):
    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
    with open(path, 'rb') as f:
        with Image.open(f) as img:
            return img.convert('RGB')

## Models are in eval mode
model_resnet152fi.eval()
model_i.eval()
model_ft.eval()
#model_inception.eval()

## Create the output csv file
output_file = open("kaggle_ensemble3.csv", "w")
output_file.write("Id,Category\n")
## Images names in the 2 dataset are identical, only their paths is different
for f in tqdm(os.listdir(test_dir_1)):
    if 'jpg' in f:
        data = data_transforms_1["val_images"](pil_loader(test_dir_1 + '/' + f))
        data2 = data_transforms_2["val_images"](pil_loader(test_dir_2 + '/' + f))
        data = data.view(1, data.size(0), data.size(1), data.size(2))
        data2 = data2.view(1,data2.size(0), data2.size(1), data2.size(2))
        data2 = data2.to(device)
        data = data.to(device)
        output1 = model_i(data2)
        output2 = model_ft(data2)
        output3 = model_resnet152fi(data)
        
        output1 = torch.nn.functional.softmax(output1)
        output2 = torch.nn.functional.softmax(output2)
        output3 = torch.nn.functional.softmax(output3)
        
        w_mean = output1+output2+output3
        pred = w_mean.data.max(1,keepdim = True)[1]
        
        output_file.write("%s,%d\n" % (f[:-4], pred))


output_file.close()

print("Succesfully wrote " + "kaggle_ensemble3.csv" + ', you can upload this file to the kaggle competition website')