In [1]:
%%capture
!pip install --upgrade wandb
!wandb login 3da7a23df9fd940d985adf808de2b09ceb85f15b

import wandb
wandb.init(project="global-wheat-detection", name='FasterRCNN with ResNet101 backbone: fold2')

In [2]:
%%capture
!pip install cython
!pip install 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
!cp /kaggle/input/rcnnutilswithwandb/engine.py .
!cp /kaggle/input/rcnnutilswithwandb/utils.py .
!cp /kaggle/input/rcnnutilswithwandb/coco_eval.py .
!cp /kaggle/input/rcnnutilswithwandb/coco_utils.py .
!cp /kaggle/input/rcnnutilswithwandb/transforms.py .

In [3]:
import os
import ast
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import cv2

import torch
from PIL import Image
from tqdm.auto import tqdm

import albumentations
from albumentations.pytorch.transforms import ToTensorV2

from torch import nn
import torchvision
import torch.utils.data as data_utils
from torch.utils.data import DataLoader, Dataset
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

from matplotlib import pyplot as plt
import matplotlib.patches as patches

import utils
from engine import train_one_epoch, evaluate

In [4]:
# Constants
TEST_DIR = '/kaggle/input/global-wheat-detection/test'
BASE_DIR = '/kaggle/input/gwdaugmented/train'
BATCH_SIZE = 2
DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
# our dataset has two classes only - background and wheat heads
N_CLASSES = 2
N_EPOCHS = 5

In [5]:
train_df = pd.read_csv(os.path.join('/kaggle/input/gwdaugmented/', 'train.csv'))
train_df

Unnamed: 0,image_id,x_min,y_min,x_max,y_max,width,height,area,source,kfold
0,b6ab77fd7,834.0,222.0,890.0,258.0,56.0,36.0,2016.0,usask_1,2
1,b6ab77fd7,226.0,548.0,356.0,606.0,130.0,58.0,7540.0,usask_1,2
2,b6ab77fd7,377.0,504.0,451.0,664.0,74.0,160.0,11840.0,usask_1,2
3,b6ab77fd7,834.0,95.0,943.0,202.0,109.0,107.0,11663.0,usask_1,2
4,b6ab77fd7,26.0,144.0,150.0,261.0,124.0,117.0,14508.0,usask_1,2
...,...,...,...,...,...,...,...,...,...,...
295533,5e0747034_aug,876.0,619.0,960.0,714.0,84.0,95.0,7980.0,arvalis_2,1
295534,5e0747034_aug,625.0,549.0,732.0,631.0,107.0,82.0,8774.0,arvalis_2,1
295535,5e0747034_aug,749.0,228.0,890.0,299.0,141.0,71.0,10011.0,arvalis_2,1
295536,5e0747034_aug,410.0,13.0,594.0,92.0,184.0,79.0,14536.0,arvalis_2,1


In [6]:
train_df

Unnamed: 0,image_id,x_min,y_min,x_max,y_max,width,height,area,source,kfold
0,b6ab77fd7,834.0,222.0,890.0,258.0,56.0,36.0,2016.0,usask_1,2
1,b6ab77fd7,226.0,548.0,356.0,606.0,130.0,58.0,7540.0,usask_1,2
2,b6ab77fd7,377.0,504.0,451.0,664.0,74.0,160.0,11840.0,usask_1,2
3,b6ab77fd7,834.0,95.0,943.0,202.0,109.0,107.0,11663.0,usask_1,2
4,b6ab77fd7,26.0,144.0,150.0,261.0,124.0,117.0,14508.0,usask_1,2
...,...,...,...,...,...,...,...,...,...,...
295533,5e0747034_aug,876.0,619.0,960.0,714.0,84.0,95.0,7980.0,arvalis_2,1
295534,5e0747034_aug,625.0,549.0,732.0,631.0,107.0,82.0,8774.0,arvalis_2,1
295535,5e0747034_aug,749.0,228.0,890.0,299.0,141.0,71.0,10011.0,arvalis_2,1
295536,5e0747034_aug,410.0,13.0,594.0,92.0,184.0,79.0,14536.0,arvalis_2,1


In [7]:
class WheatDataset(Dataset):
    
    def __init__(self, df, folds, transforms=None):
        self.df = df[df.kfold.isin(folds)].reset_index(drop=True)
        self.image_ids = self.df['image_id'].unique()
        self.transforms = transforms

    def __len__(self):
        return len(self.image_ids)
    
    def __getitem__(self, index):
        image_id = self.image_ids[index]
        image = cv2.imread(os.path.join(BASE_DIR, 'train', f'{image_id}.jpg'), cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0

        # Convert from NHWC to NCHW as pytorch expects images in NCHW format
        image = np.transpose(image, (2, 0, 1))
        image = torch.from_numpy(image)
        
        # Get bbox coordinates for each wheat head(s)
        bboxes_df = self.df[self.df['image_id'] == image_id]
        boxes, areas = [], []
        n_objects = len(bboxes_df)  # Number of wheat heads in the given image

        for i in range(n_objects):
            x_min = bboxes_df.iloc[i]['x_min']
            x_max = bboxes_df.iloc[i]['x_max']
            y_min = bboxes_df.iloc[i]['y_min']
            y_max = bboxes_df.iloc[i]['y_max']

            boxes.append([x_min, y_min, x_max, y_max])
            areas.append(bboxes_df.iloc[i]['area'])

        boxes = torch.as_tensor(boxes, dtype=torch.int64)
        
        # Get the labels. We have only one class (wheat head)
        labels = torch.ones((n_objects, ), dtype=torch.int64)
        
        areas = torch.as_tensor(areas)
        
        # suppose all instances are not crowd
        iscrowd = torch.zeros((n_objects, ), dtype=torch.int64)
        
        target = {
            'boxes': boxes,
            'labels': labels,
            'image_id': torch.tensor([index]),
            'area': areas,
            'iscrowd': iscrowd
        }
        
        if self.transforms:
            result_aug = self.transforms(image=image, bboxes=boxes, labels=labels)
            image = result_aug['image'].float()
            
            target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*result_aug['bboxes'])))).permute(1, 0)

        return image, target

In [8]:
def get_model(pre_trained=True):
    
    # Reference: https://stackoverflow.com/questions/58362892/resnet-18-as-backbone-in-faster-r-cnn
    resnet_net = torchvision.models.resnet101(pretrained=True) 
    modules = list(resnet_net.children())[:-2]

    backbone = nn.Sequential(*modules)
    backbone.out_channels = 2048

    # let's make the RPN generate 5 x 3 anchors per spatial
    # location, with 5 different sizes and 3 different aspect
    # ratios. We have a Tuple[Tuple[int]] because each feature
    # map could potentially have different sizes and
    # aspect ratios
    anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
                                       aspect_ratios=((0.5, 1.0, 2.0),))

    # put the pieces together inside a FasterRCNN model
    model = FasterRCNN(backbone,
                       num_classes=N_CLASSES,
                       rpn_anchor_generator=anchor_generator)
    return model

In [9]:
%%capture
# get the model using our helper function
model = get_model()

In [10]:
num_classes = 2
train_dataset = WheatDataset(train_df, folds=[0, 1, 3, 4])
train_loader = data_utils.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, collate_fn=utils.collate_fn)

val_dataset = WheatDataset(train_df, folds=[2])
val_loader = data_utils.DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, collate_fn=utils.collate_fn)


# move model to the right device
model.to(DEVICE)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

for epoch in range(N_EPOCHS):
    # train for one epoch, printing every 100 iterations
    train_one_epoch(model, optimizer, train_loader, DEVICE, epoch, print_freq=100)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the validation dataset
    evaluate(model, val_loader, device=DEVICE)

	nonzero(Tensor input, *, Tensor out)
Consider using one of the following signatures instead:
	nonzero(Tensor input, *, bool as_tuple)


Epoch: [0]  [   0/2699]  eta: 2:22:49  lr: 0.000010  loss: 1.7874 (1.7874)  loss_classifier: 0.6396 (0.6396)  loss_box_reg: 0.1284 (0.1284)  loss_objectness: 0.7402 (0.7402)  loss_rpn_box_reg: 0.2793 (0.2793)  time: 3.1752  data: 0.9778  max mem: 5070




Epoch: [0]  [ 100/2699]  eta: 0:23:33  lr: 0.000509  loss: 1.1918 (1.3667)  loss_classifier: 0.4222 (0.4635)  loss_box_reg: 0.2789 (0.1895)  loss_objectness: 0.3409 (0.5080)  loss_rpn_box_reg: 0.1850 (0.2057)  time: 0.5113  data: 0.0103  max mem: 6482
Epoch: [0]  [ 200/2699]  eta: 0:22:05  lr: 0.001009  loss: 1.0755 (1.2632)  loss_classifier: 0.3712 (0.4250)  loss_box_reg: 0.2984 (0.2353)  loss_objectness: 0.2696 (0.4082)  loss_rpn_box_reg: 0.1620 (0.1947)  time: 0.5142  data: 0.0105  max mem: 6482
Epoch: [0]  [ 300/2699]  eta: 0:20:59  lr: 0.001508  loss: 1.1828 (1.2219)  loss_classifier: 0.3860 (0.4070)  loss_box_reg: 0.4049 (0.2753)  loss_objectness: 0.2233 (0.3559)  loss_rpn_box_reg: 0.1530 (0.1837)  time: 0.5282  data: 0.0118  max mem: 6482
Epoch: [0]  [ 400/2699]  eta: 0:20:00  lr: 0.002008  loss: 1.0746 (1.1901)  loss_classifier: 0.3694 (0.3971)  loss_box_reg: 0.4010 (0.3054)  loss_objectness: 0.1602 (0.3134)  loss_rpn_box_reg: 0.1220 (0.1742)  time: 0.5195  data: 0.0101  max me

In [11]:
!rm -rf *

In [12]:
torch.save(model.state_dict(), 'fasterrcnn_resnet101_fold2.pth')

In [13]:
def get_bbox(bboxes, col, color='white'):
    
    for i in range(len(bboxes)):
        # Create a Rectangle patch
        rect = patches.Rectangle(
            (bboxes[i][0], bboxes[i][1]),
            bboxes[i][2] - bboxes[i][0], 
            bboxes[i][3] - bboxes[i][1], 
            linewidth=2, 
            edgecolor=color, 
            facecolor='none')

        # Add the patch to the Axes
        col.add_patch(rect)

In [14]:
for img in os.listdir(TEST_DIR)[:5]:
    image = cv2.imread(os.path.join(TEST_DIR, img), cv2.IMREAD_COLOR)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
    image /= 255.0
    preds = model([torch.from_numpy(np.transpose(image, (2, 0, 1))).to(device)])[0]
    
    pred_bboxes = preds['boxes'].cpu().detach().numpy()
    pred_scores = preds['scores'].cpu().detach().numpy()
    
    mask = pred_scores >= 0.4
    pred_scores = pred_scores[mask]
    pred_bboxes = pred_bboxes[mask]
    
    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 10))
    get_bbox(pred_bboxes, ax, color='red')
    ax.imshow(image)

NameError: name 'device' is not defined