# TP 3 : Object Recognition and Computer Vision
## Kaggle Challenge


In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

In [None]:
!pip install timm

In [None]:
import os
import shutil
os.chdir('drive/MyDrive/recvis20_a3')

## Util fonction to delete folder and recreate it
def clear_folder(output_path): 
    shutil.rmtree(output_path)
    os.mkdir(output_path)

## Segmentation using faster RCNN and Cropping images

In [None]:
import torchvision
import torch
import cv2
import matplotlib.pyplot as plt
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import numpy as np
use_cuda = torch.cuda.is_available()
# load a model pre-trained pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
if use_cuda:
    print('Using GPU')
    model.cuda()
else:
    print('Using CPU')
model.eval()
from PIL import Image



path_raw_images = os.getcwd() + '/bird_dataset'
path_cropped_images = path_raw_images + '_cropped'

COCO_INSTANCE_CATEGORY_NAMES = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
    'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
    'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
    'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
    'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
    'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
    'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
    'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
    'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]

def get_prediction(img_path, threshold):
  img = Image.open(img_path) # Load the image
  transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()]) # Defing PyTorch Transform
  img = transform(img).cuda() # Apply the transform to the image
  try :
      pred = model([img]) # Pass the image to the model
  except TypeError:
      pred = model(img[None, :])
  pred_class = np.array([COCO_INSTANCE_CATEGORY_NAMES[i] for i in list(pred[0]['labels'].cpu().numpy())]) # Get the Prediction Score
  pred_boxes = np.array([[(i[0], i[1]), (i[2], i[3])] for i in list(pred[0]['boxes'].detach().cpu().numpy())]) # Bounding boxes
  pred_scores = pred[0]['scores'].detach().cpu().numpy()
  if len(pred_scores):
    where_birds = np.where(pred_class == 'bird')[0]
    if len(where_birds):
        pred_class = pred_class[where_birds]
        pred_boxes = pred_boxes[where_birds]
        pred_scores =  pred_scores[where_birds]
        idx_max_score = np.argmax(pred_scores)
        if pred_scores[idx_max_score] > threshold : 
            pred_boxes = pred_boxes[idx_max_score]
            pred_class = pred_class[idx_max_score]
            return pred_boxes, pred_class, pred_scores[idx_max_score]
        print(pred_class, idx_max_score, pred_scores)
  print("NOT CROPPED : ", img_path)
  return None, None, 0
  
def crop_img(path, threshold) : 
    img = Image.open(path)
    boxes, pred_cls, max_score = get_prediction(path, threshold)
    if boxes is not None : 
        x1, y1, x2, y2 = boxes[0][0]-5, boxes[0][1]-5, boxes[1][0]+5, boxes[1][1]+5
        cropped_img = img.crop((x1, y1, x2, y2))
        cropped = True
    else :
        cropped_img = img
        cropped = False
    return cropped_img, cropped


def object_detection_api(img_path, threshold=0.5, rect_th=3, text_size=3, text_th=3):
    boxes, pred_cls = get_prediction(img_path, threshold) # Get predictions
    img = cv2.imread(img_path) # Read image with cv2
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # Convert to RGB
    for i in range(len(boxes)):
      cv2.rectangle(img, boxes[i][0], boxes[i][1],color=(0, 255, 0), thickness=rect_th) # Draw Rectangle with the coordinates
      cv2.putText(img,pred_cls[i], boxes[i][0],  cv2.FONT_HERSHEY_SIMPLEX, text_size, (0,255,0),thickness=text_th) # Write the prediction class
    plt.figure(figsize=(20,30)) # display the output image
    plt.imshow(img)
    plt.xticks([])
    plt.yticks([])
    plt.show()



def crop_images(input_path, output_path, threshold) : 
    count_positive = 0
    count_total = 0
    for step in ['train_images', 'test_images', 'val_images', 'Inat_mini']:
        input_path_step = input_path + '/%s' % step
        output_path_step = output_path + '/%s' % step
        if not os.path.exists(output_path_step):
            os.mkdir(output_path_step)
        if step != 'test_images' and step != 'Inat_mini': 
            for bird_class in os.listdir(input_path_step):
                if not os.path.exists(output_path_step + '/' + bird_class) :
                    os.mkdir(output_path_step + '/' + bird_class)
                for pic in os.listdir(input_path_step + '/' + bird_class) : 
                    ## To display : 
                    #object_detection_api(path_raw_images + '/' + bird_class + '/' + pic, threshold=threshold, rect_th=3, text_size=3, text_th=3)
                    cropped_img, cropped = crop_img(input_path_step + '/' + bird_class + '/' + pic, threshold)
                    cropped_img.save(output_path_step + '/' + bird_class + '/' + pic)
                    count_total +=1
                    if cropped : 
                      count_positive += 1
        elif step == 'test_images' :
            if not os.path.exists(output_path_step + '/mistery_category') :
                os.mkdir(output_path_step + '/mistery_category')
            for file in  os.listdir(input_path_step + '/mistery_category'):
                cropped_img, cropped = crop_img(input_path_step + '/mistery_category/' + file, threshold)
                cropped_img.save((input_path_step + '/mistery_category/' + file).replace('bird_dataset', 'bird_dataset_cropped'))
                count_total += 1
                if cropped : 
                    count_positive += 1 
        elif step == 'Inat_mini' :
            for file in  os.listdir('Inat_mini'):
                cropped_img, cropped = crop_img('Inat_mini/' + file, threshold)
                cropped_img.save(output_path + '/Inat_mini/' + file)
                count_total += 1
                if cropped : 
                    count_positive += 1 
        print("STEP : %s, TOTAL birds detected : %s, misdetected bird percentage  %.3f" % (step, count_positive, 1 - count_positive/count_total))




In [None]:
crop_images(path_raw_images, path_cropped_images, threshold = 0.6)

## Re-sample Data (option to add prelabelled data)

In [None]:
import random
from PIL import Image
def merge_dicts(a, b):
    c = b.copy()
    for k, v in a.items():
        if k in c.keys():
            c[k] += v
        else:
            c[k] = v
    return c

def ressample_train_test_data(split_percentage, main_dir, dict_inat, output_dir, seed = 41):
    ## RESAMPLE DATA : 
    dict_val = {}
    dict_train = {}
    for bird_class in os.listdir(os.path.join(main_dir, 'val_images')):
        dict_val[bird_class] = os.listdir(os.path.join(main_dir, 'val_images', bird_class))
        dict_train[bird_class] = os.listdir(os.path.join(main_dir, 'train_images', bird_class))

    new_dict_val, new_dict_train = {}, {}
    merged_dicts = merge_dicts(dict_val, dict_train)
    
    for key, item in merged_dicts.items():
        random.Random(seed).shuffle(merged_dicts[key])
        new_dict_val[key] = merged_dicts[key][int(split_percentage * len(merged_dicts[key])):]
        new_dict_train[key] = merged_dicts[key][:int(split_percentage * len(merged_dicts[key]))]
        if dict_inat and key != '004.Groove_billed_Ani':
            new_dict_train[key]+= dict_inat[key]
    print(new_dict_train)
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)
    for step in ['train_images', 'val_images']:
        print(step)
        if not os.path.exists(os.path.join(output_dir, step)):
            os.mkdir(os.path.join(output_dir, step))
        for bird_class in new_dict_val.keys():
            print(bird_class)
            if not os.path.exists(os.path.join(output_dir, step, bird_class)):
                os.mkdir(os.path.join(output_dir, step, bird_class))
            if step == 'val_images' :
                dict_ = new_dict_val[bird_class]
            elif step == 'train_images':
                dict_ = new_dict_train[bird_class]
            for file_bird in dict_:
                try : 
                    img = Image.open(os.path.join(main_dir, 'train_images', bird_class, file_bird))
                except FileNotFoundError:
                    try : 
                        img = Image.open(os.path.join(main_dir, 'val_images', bird_class, file_bird))
                    except FileNotFoundError:
                        img = Image.open(os.path.join(main_dir, 'Inat_mini', file_bird))
                img.save(os.path.join(output_dir, step, bird_class, file_bird))


Create folder with train/val split of ressampled data

In [None]:
clear_folder('bird_dataset_cropped_shuffled')
ressample_train_test_data(0.82, main_dir = 'bird_dataset_cropped', dict_inat = None, output_dir= 'bird_dataset_cropped_shuffled')

{'016.Painted_Bunting': ['Painted_Bunting_0046_16535.jpg', 'Painted_Bunting_0011_16690.jpg', 'Painted_Bunting_0013_15294.jpg', 'Painted_Bunting_0040_16691.jpg', 'Painted_Bunting_0025_16722.jpg', 'Painted_Bunting_0053_16404.jpg', 'Painted_Bunting_0078_16565.jpg', 'Painted_Bunting_0056_16599.jpg', 'Painted_Bunting_0102_16642.jpg', 'Painted_Bunting_0100_16735.jpg', 'Painted_Bunting_0076_16765.jpg', 'Painted_Bunting_0079_15197.jpg', 'Painted_Bunting_0087_15232.jpg', 'Painted_Bunting_0060_15224.jpg', 'Painted_Bunting_0093_15212.jpg', 'Painted_Bunting_0029_16530.jpg', 'Painted_Bunting_0049_16869.jpg', 'Painted_Bunting_0058_16719.jpg', 'Painted_Bunting_0069_16462.jpg', 'Painted_Bunting_0066_15241.jpg', 'Painted_Bunting_0061_16930.jpg', 'Painted_Bunting_0001_16585.jpg', 'Painted_Bunting_0054_16711.jpg', 'Painted_Bunting_0019_15231.jpg', 'Painted_Bunting_0071_15209.jpg', 'Painted_Bunting_0070_16515.jpg', 'Painted_Bunting_0027_16536.jpg', 'Painted_Bunting_0086_16540.jpg', 'Painted_Bunting_0084_1

## Train model on the cropped + re-sampled data

In [None]:
!python main.py --epochs 100 --data 'bird_dataset_cropped_shuffled' --batch-size 10 --modelname 'resnext101_32x8d'

## Load pretrained model and generate pseudo labels


In [None]:
### Load pretrained model and generate predictions : 
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
import argparse
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets
from torch.autograd import Variable
from model import Net
import torchvision.transforms as transforms
from PIL import Image





labels = ['004.Groove_billed_Ani',
 '009.Brewer_Blackbird',
 '010.Red_winged_Blackbird',
 '011.Rusty_Blackbird',
 '012.Yellow_headed_Blackbird',
 '013.Bobolink',
 '014.Indigo_Bunting',
 '015.Lazuli_Bunting',
 '016.Painted_Bunting',
 '019.Gray_Catbird',
 '020.Yellow_breasted_Chat',
 '021.Eastern_Towhee',
 '023.Brandt_Cormorant',
 '026.Bronzed_Cowbird',
 '028.Brown_Creeper',
 '029.American_Crow',
 '030.Fish_Crow',
 '031.Black_billed_Cuckoo',
 '033.Yellow_billed_Cuckoo',
 '034.Gray_crowned_Rosy_Finch']


def generate_dict_pred(path_dataset_inat, confidence_threshold, model_id, modelname):

    dict_modelname_size = {'tf_efficientnet_b4_ns' : 380, 'tf_efficientnet_b5_ns' : 456, 'swsl_resnext101_32x8d' : 224, 'ig_resnext101_32x32d' : 224, 'resnext101_32x8d' : 224}
    size_ = dict_modelname_size[modelname] 

    data_transforms = transforms.Compose([
        transforms.Resize((size_, size_)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
    ])
    print("MODEL SELECTED  : ", model_id)
    dict_predicted = {}
    state_dict = torch.load(model_id)
    custom_model = Net(modelname)
    custom_model.load_state_dict(state_dict)
    custom_model.eval()

    use_cuda = torch.cuda.is_available()
    if use_cuda:
        print('Using GPU')
        custom_model.cuda()
    else:
        print('Using CPU')
    for f in (os.listdir(path_dataset_inat)):
        img = Image.open(os.path.join(path_dataset_inat, f))
        data = data_transforms(img)
        data = data.view(1, data.size(0), data.size(1), data.size(2))
        if use_cuda:
            data = data.cuda()
        output = custom_model(data)
        predicted_class = int(output.data.max(1, keepdim=True)[1])
        proba = torch.nn.functional.softmax(output)
        predicted_class_confidence = proba.data[0][predicted_class]
        if predicted_class_confidence >= confidence_threshold :
            class_name = labels[predicted_class]
            if class_name not in dict_predicted.keys():
                dict_predicted[class_name] = []
            dict_predicted[class_name].append(f)
    return dict_predicted


I selected the model 13. This model was trained on the cropped  + ressampled imgs

# Generate a pseudo labels as a dictionary 

In [None]:
dict_predicted = generate_dict_pred('bird_dataset_cropped/Inat_mini', 0.90, 'experiment/model.pth', 'resnext101_32x8d')

## I now ressample data constituted with **True** data + **Pseudo labelled** data

In [None]:
clear_folder('bird_dataset_cropped_shuffled_pl')
ressample_train_test_data(0.82, main_dir = 'bird_dataset_cropped', dict_inat = dict_predicted, output_dir = 'bird_dataset_cropped_shuffled_pl')

## I finally retrain a model on the true + pseudo labeled data. Results should hopefuly be better.

In [None]:
!python main.py --epochs 100 --data 'bird_dataset_cropped_shuffled_pl' --batch-size 8 --modelname 'tf_efficientnet_b4_ns'

EfficientNet(
  (conv_stem): Conv2dSame(3, 48, kernel_size=(3, 3), stride=(2, 2), bias=False)
  (bn1): BatchNorm2d(48, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  (act1): SiLU(inplace=True)
  (blocks): Sequential(
    (0): Sequential(
      (0): DepthwiseSeparableConv(
        (conv_dw): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=48, bias=False)
        (bn1): BatchNorm2d(48, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
        (act1): SiLU(inplace=True)
        (se): SqueezeExcite(
          (conv_reduce): Conv2d(48, 12, kernel_size=(1, 1), stride=(1, 1))
          (act1): SiLU(inplace=True)
          (conv_expand): Conv2d(12, 48, kernel_size=(1, 1), stride=(1, 1))
        )
        (conv_pw): Conv2d(48, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn2): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
        (act2): Identity()
      )
      (1): DepthwiseSeparableConv

## Evaluate model

In [None]:
!python evaluate.py --model experiment/model_pl.pth --data 'bird_dataset_cropped' --modelname 'resnext101_32x8d'