In [1]:
import pandas as pd
import numpy as np
import cv2
import os
import re
import ast
from PIL import Image

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

import torch
import torchvision

from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

from torch.utils.data import DataLoader, Dataset
from torch.utils.data.sampler import SequentialSampler

from matplotlib import pyplot as plt

%load_ext lab_black

## Data Loading and formatting

In [2]:
# Dataset:
"https://www.kaggle.com/code/artgor/object-detection-with-pytorch-lightning"
"https://www.kaggle.com/code/pestipeti/pytorch-starter-fasterrcnn-train/notebook"

'https://www.kaggle.com/code/pestipeti/pytorch-starter-fasterrcnn-train/notebook'

In [3]:
# DIR_INPUT = '/kaggle/input/global-wheat-detection'
TRAIN_DIR = f"../data/wheat/train"
TEST_DIR = f"../data/wheat/test/"

In [4]:
df = pd.read_csv("../data/wheat/train.csv")

In [5]:
df.tail()

Unnamed: 0,image_id,width,height,bbox,source
147788,5e0747034,1024,1024,"[64.0, 619.0, 84.0, 95.0]",arvalis_2
147789,5e0747034,1024,1024,"[292.0, 549.0, 107.0, 82.0]",arvalis_2
147790,5e0747034,1024,1024,"[134.0, 228.0, 141.0, 71.0]",arvalis_2
147791,5e0747034,1024,1024,"[430.0, 13.0, 184.0, 79.0]",arvalis_2
147792,5e0747034,1024,1024,"[875.0, 740.0, 94.0, 61.0]",arvalis_2


In [6]:
def PIL_read(path: str) -> np.ndarray:
    img = Image.open(path)
    img = np.asarray(img)
    return img

### Train/Validate Split
20% Validation

In [7]:
image_list = df["image_id"].unique()
image_list.size

3373

In [8]:
valid_image_list = image_list[-round(3373 * 0.2) :]
train_image_list = image_list[: -round(3373 * 0.2)]
print(
    f"{len(train_image_list)} training images and {len(valid_image_list)} validation images"
)

2698 training images and 675 validation images


In [9]:
# train_paths = [
#     os.path.join(TRAIN_DIR, f"{filename}.jpg") for filename in train_image_list
# ]
# valid_paths = [
#     os.path.join(TRAIN_DIR, f"{filename}.jpg") for filename in valid_image_list
# ]
# train_images = [PIL_read(f) for f in train_paths]
# valid_images = [PIL_read(f) for f in valid_paths]

### Bounding boxes
Lets make the format into 4 columns of our dataframe

In [11]:
df["bbox"]

0          [834.0, 222.0, 56.0, 36.0]
1         [226.0, 548.0, 130.0, 58.0]
2         [377.0, 504.0, 74.0, 160.0]
3         [834.0, 95.0, 109.0, 107.0]
4         [26.0, 144.0, 124.0, 117.0]
                     ...             
147788      [64.0, 619.0, 84.0, 95.0]
147789    [292.0, 549.0, 107.0, 82.0]
147790    [134.0, 228.0, 141.0, 71.0]
147791     [430.0, 13.0, 184.0, 79.0]
147792     [875.0, 740.0, 94.0, 61.0]
Name: bbox, Length: 147793, dtype: object

In [12]:
df["bbox"] = (
    df["bbox"].str.replace("[", "", regex=False).str.replace("]", "", regex=False)
)
df[["x", "y", "w", "h"]] = (
    df["bbox"].str.split(",", expand=True).apply(pd.to_numeric, errors="coerce")
)

In [13]:
df.drop(columns="bbox", inplace=True)

In [14]:
df.head()

Unnamed: 0,image_id,width,height,source,x,y,w,h
0,b6ab77fd7,1024,1024,usask_1,834.0,222.0,56.0,36.0
1,b6ab77fd7,1024,1024,usask_1,226.0,548.0,130.0,58.0
2,b6ab77fd7,1024,1024,usask_1,377.0,504.0,74.0,160.0
3,b6ab77fd7,1024,1024,usask_1,834.0,95.0,109.0,107.0
4,b6ab77fd7,1024,1024,usask_1,26.0,144.0,124.0,117.0


### Plot some samples

In [15]:
# selected_image_nr = 1
# image = train_images[selected_image_nr]
# image_id = image_list[selected_image_nr]
# boxes = df[df["image_id"] == image_id][["x", "y", "w", "h"]].astype(int).values
# boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
# boxes[:, 3] = boxes[:, 1] + boxes[:, 3]

# fig, ax = plt.subplots(1, 1, figsize=(16, 8))
# for box in boxes:
#     cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), (220, 0, 0), 3)

# ax.set_axis_off()
# ax.imshow(image)

In [16]:
# # clear memory
# del train_images
# del valid_images

In [17]:
train_df = df[df["image_id"].isin(train_image_list)]
valid_df = df[df["image_id"].isin(valid_image_list)]
print(f"Training_df shape: {train_df.shape};  Validation_df shape: {valid_df.shape}")

Training_df shape: (122577, 8);  Validation_df shape: (25216, 8)


## Create Wheat Dataset

In [18]:
class WheatDataset(Dataset):
    def __init__(self, dataframe, image_dir, transforms=None) -> None:
        super().__init__()
        self.image_ids = dataframe["image_id"].unique()
        self.df = dataframe
        self.image_dir = image_dir
        self.transforms = transforms

    def __getitem__(self, index: int):
        image_id = self.image_ids[index]
        # get all records of that image_id
        records = self.df[self.df["image_id"] == image_id]
        image = PIL_read(f"{self.image_dir}/{image_id}.jpg").astype(np.float32)
        # image = cv2.imread(f"{self.image_dir}/{image_id}.jpg", cv2.IMREAD_COLOR)
        # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        boxes = records[["x", "y", "w", "h"]].values
        boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
        boxes[:, 3] = boxes[:, 1] + boxes[:, 3]

        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        area = torch.as_tensor(area, dtype=torch.float32)

        # Set here your labels, only one class for this dataset
        labels = torch.ones((records.shape[0],), dtype=torch.int64)

        # suppose all instances are not crowd
        iscrowd = torch.zeros((records.shape[0],), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["img_id"] = torch.tensor([index])
        target["area"] = area
        target["iscrowd"] = iscrowd
        if self.transforms:
            sample = {"image": image, "bboxes": target["boxes"], "labels": labels}
            sample = self.transforms(**sample)
            image = sample["image"]

            target["boxes"] = torch.tensor(sample["bboxes"])

        return image, target, image_id

    def __len__(self) -> int:
        return self.image_ids.shape[0]

### Albumentations

In [19]:
def get_train_transform():
    return A.Compose(
        [A.Flip(0.5), ToTensorV2(p=1.0)],
        bbox_params={"format": "pascal_voc", "label_fields": ["labels"]},
    )


def get_valid_transform():
    return A.Compose(
        [ToTensorV2(p=1.0)],
        bbox_params={"format": "pascal_voc", "label_fields": ["labels"]},
    )

## Work with the model

In [20]:
# # load a model; pre-trained on COCO
# model = torchvision.models.detection.fasterrcnn_resnet50_fpn(
#     weights="FasterRCNN_ResNet50_FPN_Weights.DEFAULT"
# )
# load a model; pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)



In [21]:
num_classes = 2  # 1 class (wheat) + background

# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

In [22]:
class Averager:
    def __init__(self):
        self.current_total = 0.0
        self.iterations = 0.0

    def send(self, value):
        self.current_total += value
        self.iterations += 1

    @property
    def value(self):
        if self.iterations == 0:
            return 0
        else:
            return 1.0 * self.current_total / self.iterations

    def reset(self):
        self.current_total = 0.0
        self.iterations = 0.0

In [23]:
# The collate_fn function is used by the DataLoader to process the input data in batches.
def collate_fn(batch):
    return tuple(zip(*batch))


train_dataset = WheatDataset(train_df, TRAIN_DIR, get_train_transform())
valid_dataset = WheatDataset(valid_df, TRAIN_DIR, get_valid_transform())


# split the dataset in train and valid set
indices = torch.randperm(len(train_dataset)).tolist()

train_data_loader = DataLoader(
    train_dataset, batch_size=4, shuffle=True, num_workers=0, collate_fn=collate_fn
)

valid_data_loader = DataLoader(
    valid_dataset, batch_size=8, shuffle=False, num_workers=0, collate_fn=collate_fn
)

In [24]:
# Set GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

## Let´s get some samples from the dataloader

In [None]:
images, targets, image_ids = next(iter(train_data_loader))
# examples = enumerate(train_data_loader)
# batch_idx, (images, targets, image_ids) = next(examples)

In [None]:
images = list(image.to(device) for image in images)

In [None]:
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

In [None]:
boxes = targets[0]["boxes"].cpu().numpy().astype(np.int32)
sample = images[0].permute(1, 2, 0).cpu().numpy()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(16, 8))

for box in boxes:
    cv2.rectangle(sample, (box[0], box[1]), (box[2], box[3]), (220, 0, 0), 3)

ax.set_axis_off()
ax.imshow(sample)

In [25]:
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = None

num_epochs = 2

In [26]:
loss_hist = Averager()
itr = 1

for epoch in range(num_epochs):
    loss_hist.reset()

    for images, targets, image_ids in train_data_loader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())
        loss_value = losses.item()

        loss_hist.send(loss_value)

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if itr % 50 == 0:
            print(f"Iteration #{itr} loss: {loss_value}")

        itr += 1

    if lr_scheduler is not None:
        lr_scheduler.step()

    print(f"Epoch #{epoch} loss: {loss_hist.value}")

Iteration #50 loss: 0.7918315473514882
Iteration #100 loss: 0.8570312772404838
Iteration #150 loss: 0.8452534319788857
Iteration #200 loss: 0.8038154500725624
Iteration #250 loss: 0.967792148695238
Iteration #300 loss: 0.8165943277686709
Iteration #350 loss: 0.7341394441595273
Iteration #400 loss: 0.9020944098467849
Iteration #450 loss: 0.6313035340825157
Iteration #500 loss: 0.8855056619852486
Iteration #550 loss: 0.7835965218041538
Iteration #600 loss: 0.9741899441734624
Iteration #650 loss: 0.8618736936451081
Epoch #0 loss: 0.9226590766989357
Iteration #700 loss: 0.9478959960359298
Iteration #750 loss: 0.6344556888968363
Iteration #800 loss: 0.8844971659014966
Iteration #850 loss: 0.7784184843601033
Iteration #900 loss: 0.7863307221222076
Iteration #950 loss: 0.71720978258874
Iteration #1000 loss: 0.7681602481158698
Iteration #1050 loss: 0.829146007560338
Iteration #1100 loss: 0.6390534679483189
Iteration #1150 loss: 0.7646782628606876
Iteration #1200 loss: 0.9004667447788712
Iterat

## Validation data

In [None]:
images, targets, image_ids = next(iter(valid_data_loader))

In [None]:
model.eval()
cpu_device = torch.device("cpu")

outputs = model(images)
outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]

## Persist

In [None]:
torch.save(model.state_dict(), "fasterrcnn_resnet50_fpn.pth")