Fine-tuning a pretrained Faster RCNN for wheat head detection. Reference: https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html

In [1]:
!pip install cython
!pip install 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
!cp /kaggle/input/rcnnutils/engine.py .
!cp /kaggle/input/rcnnutils/utils.py .
!cp /kaggle/input/rcnnutils/coco_eval.py .
!cp /kaggle/input/rcnnutils/coco_utils.py .
!cp /kaggle/input/rcnnutils/transforms.py .

Collecting git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI
  Cloning https://github.com/cocodataset/cocoapi.git to /tmp/pip-req-build-ntcgq08f
  Running command git clone -q https://github.com/cocodataset/cocoapi.git /tmp/pip-req-build-ntcgq08f
Building wheels for collected packages: pycocotools
  Building wheel for pycocotools (setup.py) ... [?25l- \ | / - \ | / done
[?25h  Created wheel for pycocotools: filename=pycocotools-2.0-cp37-cp37m-linux_x86_64.whl size=279395 sha256=6179d3a19c1cefed6b964cfd24e124b9afd86319fedff02b39c2cc1d8cb9deb7
  Stored in directory: /tmp/pip-ephem-wheel-cache-7co1vyuf/wheels/e2/6b/1d/344ac773c7495ea0b85eb228bc66daec7400a143a92d36b7b1
Successfully built pycocotools
Installing collected packages: pycocotools
Successfully installed pycocotools-2.0


In [2]:
import os
import ast
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import cv2

import torch
from PIL import Image
from tqdm.auto import tqdm

import albumentations

import torchvision
from torch.utils.data import DataLoader, Dataset
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from matplotlib import pyplot as plt

In [3]:
!ls /kaggle/input/global-wheat-detection

sample_submission.csv  test  train  train.csv


In [4]:
# Constants
BASE_DIR = '/kaggle/input/global-wheat-detection'

In [5]:
train_df = pd.read_csv(os.path.join(BASE_DIR, 'train.csv'))
train_df.head()

Unnamed: 0,image_id,width,height,bbox,source
0,b6ab77fd7,1024,1024,"[834.0, 222.0, 56.0, 36.0]",usask_1
1,b6ab77fd7,1024,1024,"[226.0, 548.0, 130.0, 58.0]",usask_1
2,b6ab77fd7,1024,1024,"[377.0, 504.0, 74.0, 160.0]",usask_1
3,b6ab77fd7,1024,1024,"[834.0, 95.0, 109.0, 107.0]",usask_1
4,b6ab77fd7,1024,1024,"[26.0, 144.0, 124.0, 117.0]",usask_1


In [6]:
# Let's expand the bounding box coordinates and calculate the area of all the bboxes
train_df[['x_min','y_min', 'width', 'height']] = pd.DataFrame([ast.literal_eval(x) for x in train_df.bbox.tolist()], index= train_df.index)
train_df = train_df[['image_id', 'bbox', 'source', 'x_min', 'y_min', 'width', 'height']]
train_df['area'] = train_df['width'] * train_df['height']
train_df = train_df.drop(['bbox'], axis=1)
train_df.head()

Unnamed: 0,image_id,source,x_min,y_min,width,height,area
0,b6ab77fd7,usask_1,834.0,222.0,56.0,36.0,2016.0
1,b6ab77fd7,usask_1,226.0,548.0,130.0,58.0,7540.0
2,b6ab77fd7,usask_1,377.0,504.0,74.0,160.0,11840.0
3,b6ab77fd7,usask_1,834.0,95.0,109.0,107.0,11663.0
4,b6ab77fd7,usask_1,26.0,144.0,124.0,117.0,14508.0


In [7]:
# train_df = train_df.iloc[:2000]

In [8]:
class WheatDataset(Dataset):
    
    def __init__(self, df, transforms):
        self.df = df
        self.image_ids = train_df['image_id'].unique()
        self.transforms = transforms
    
    def __len__(self):
        return len(self.image_ids)
    
    def __getitem__(self, index):
        image_id = self.image_ids[index]

        image = cv2.imread(os.path.join(BASE_DIR, 'train', f'{image_id}.jpg'), cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        # Convert from NHWC to NCHW as pytorch expects images in NCHW format
        image = np.transpose(image, (2, 0, 1))
        image = torch.from_numpy(image)
        
        # Get bbox coordinates for each wheat head(s)
        bboxes_df = self.df[self.df['image_id'] == image_id]
        boxes, areas = [], []
        n_objects = len(bboxes_df)  # Number of wheat heads in the given image

        for i in range(n_objects):
            x_min = bboxes_df.iloc[i]['x_min']
            x_max = x_min + bboxes_df.iloc[i]['width']
            y_min = bboxes_df.iloc[i]['y_min']
            y_max = y_min + bboxes_df.iloc[i]['height']
            boxes.append([x_min, y_min, x_max, y_max])
            areas.append(bboxes_df.iloc[i]['area'])
            
        
        # Convert everything into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        
        # Get the labels. We have only one class (wheat head)
        labels = torch.ones((n_objects, ), dtype=torch.int64)
        
        areas = torch.as_tensor(areas)
        
        # suppose all instances are not crowd
        iscrowd = torch.zeros((n_objects, ), dtype=torch.int64)
        
        target = {
            'boxes': boxes,
            'labels': labels,
            'image_id': torch.tensor([index]),
            'area': areas,
            'iscrowd': iscrowd
        }
        
        if self.transforms is not None:
            image = self.transforms(image=image)['image']

        return image, target

In [9]:
def get_model(pre_trained=True):
    # load a model pre-trained pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=pre_trained)

    # replace the classifier with a new one, that has
    # num_classes which is user-defined
    num_classes = 2  # 1 class (wheat heads) + background

    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features

    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model

In [10]:
# We can not simply do this as there are bounding boxes and thier location also needs to be changed. So for now not using this
def get_train_transforms():
    return albumentations.Compose([
        albumentations.ShiftScaleRotate(shift_limit=0.0625,
                                       scale_limit=0.15, 
                                       rotate_limit=10,
                                       p=0.9),
        albumentations.HorizontalFlip(p=0.5),
        albumentations.VerticalFlip(p=0.5)
    ])

In [11]:
from engine import train_one_epoch, evaluate
import utils

# train on the GPU or on the CPU, if a GPU is not available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# our dataset has two classes only - background and wheat heads
num_classes = 2

# use our dataset and defined transformations
dataset = WheatDataset(train_df, None)
dataset_test = WheatDataset(train_df, None)

# split the dataset in train and test set
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:-50])
dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:])

# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
    dataset, batch_size=16, shuffle=True, num_workers=4,
    collate_fn=utils.collate_fn)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=16, shuffle=False, num_workers=4,
    collate_fn=utils.collate_fn)

# get the model using our helper function
model = get_model()

# move model to the right device
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)

# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)

# let's train it for 25 epochs
num_epochs = 25

for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, data_loader_test, device=device)


Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth


HBox(children=(FloatProgress(value=0.0, max=167502836.0), HTML(value='')))


Epoch: [0]  [  0/208]  eta: 0:35:25  lr: 0.000029  loss: 4.6920 (4.6920)  loss_classifier: 0.7300 (0.7300)  loss_box_reg: 0.3613 (0.3613)  loss_objectness: 3.3775 (3.3775)  loss_rpn_box_reg: 0.2232 (0.2232)  time: 10.2164  data: 4.8588  max mem: 11464
Epoch: [0]  [ 10/208]  eta: 0:09:13  lr: 0.000270  loss: 2.9558 (3.0932)  loss_classifier: 0.6965 (0.6756)  loss_box_reg: 0.4248 (0.4273)  loss_objectness: 1.6272 (1.7907)  loss_rpn_box_reg: 0.2003 (0.1996)  time: 2.7971  data: 0.5523  max mem: 11729
Epoch: [0]  [ 20/208]  eta: 0:07:45  lr: 0.000512  loss: 1.6672 (2.3873)  loss_classifier: 0.5643 (0.6060)  loss_box_reg: 0.4509 (0.4422)  loss_objectness: 0.4916 (1.1433)  loss_rpn_box_reg: 0.1925 (0.1958)  time: 2.0869  data: 0.1176  max mem: 11729
Epoch: [0]  [ 30/208]  eta: 0:06:53  lr: 0.000753  loss: 1.4668 (2.0524)  loss_classifier: 0.4994 (0.5547)  loss_box_reg: 0.4636 (0.4495)  loss_objectness: 0.2896 (0.8544)  loss_rpn_box_reg: 0.1853 (0.1938)  time: 2.0656  data: 0.1062  max mem: 

In [12]:
torch.save(model.state_dict(), 'fasterrcnn_25.pth')