Note: (The training script was used instead of the notebook, this notebook may have errors, try using the script instead)

In [None]:
import set_path

Install dependencies:

In [None]:
%pip install -r requirements.txt
%pip install clearml

Import all libraries/dependencies required:

In [None]:
import copy
import os
import torch
import matplotlib.pyplot as plt
import numpy as np
import cv2

from torchvision import transforms as T
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor, FasterRCNN_ResNet50_FPN_Weights

from src.training.fasterrcnn.dataset import CustomDataset, custom_collate_fn
from src.training.fasterrcnn.engine import train_model_k_folds, evaluate_on_test_set
from src.training.fasterrcnn.data import (
                                        create_processed_dataframes, 
                                        load_annotations, 
                                        train_test_split,
                                        create_train_val_folds,
                                        create_train_val_dataloaders,
                                        load_dataloader
                                        )
from src.training.fasterrcnn.utils import load_environment_variables, initialise_clearml_task

In [None]:
# Constants
NUM_CLASSES = 2
ANNOTATIONS_PATH = "data/annotations/annotations_patches.csv"
IMAGE_DIR = 'data/patches_dataset/'

In [None]:
# Define configuration parameters
config_params = {
    'USE_GPU' : True,
    'learning-rate': 0.001,
    'batch-size': 8, 
    'num-epochs': 100, #for actual training: 20
    'weight-decay': 0.0005
}

Load annotations for patch images

In [None]:
annotations_df = load_annotations(ANNOTATIONS_PATH)

In [None]:
annotations_df

Create a combined feature used for stratification:

In [None]:
annotations_df, image_statistics = create_processed_dataframes(annotations_df=annotations_df)

First split the data into a training set and testing set (50:50) ratio:
- Was originally split into 80:20, but the patch dataset contains at least 11k images (with 1 patch from each image).
- To improve training speeds, the training set has been reduced in size.

In [None]:
train_df, test_df, train_image_names, test_image_names = train_test_split(annotations_df, image_statistics, test_size=0.5)

In [None]:
print(train_df.shape, test_df.shape, annotations_df.shape)

Split the training set into training/validation folds:
- Data is split into 5 folds.
- The images are split into training and validation sets based on the number of objects in each image and the mean area of all the bounding boxes for each image.

In [None]:
all_splits = create_train_val_folds(
                                    annotations_df=annotations_df, 
                                    image_statistics=image_statistics, 
                                    train_image_names=train_image_names
                                    )

In [None]:
# Sanity checks:
for i in range(5):
    print(i, all_splits[i]["train"].shape, all_splits[i]["val"].shape)
    assert (all_splits[i]["train"].shape[0] + all_splits[i]["val"].shape[0] + test_df.shape[0]) == annotations_df.shape[0], "Data processing incorrect, missing objects."


In [None]:
test_df

Define image transforms (data augmentation)

In [None]:
image_transforms = T.Compose([T.ToTensor()])

Check the device available

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

Define the model

In [None]:
# Define model 
model = fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, NUM_CLASSES)

# Move the model to the device
model.to(device)

Initialise the environment variables required to run training with ClearML.

In [None]:
load_environment_variables(env_path="./.env")

Run this in terminal: clearml-init

Upload patches dataset zip using following terminal commands (deprecated):

In [None]:
# clearml-data create --project "Object Detection Project" --name "Patches Dataset"

# clearml-data add --files "data/patches_dataset.zip"

# clearml-data upload

# after the above command run successfully, run: clearml-data close

Initialise ClearML task

In [None]:
task = initialise_clearml_task(configuration_params=config_params)

For remote execution: load dataset we uploaded from clearml (deprecated)

In [None]:
# dataset = ClearMLDataset.get(dataset_project="Object Detection Project", dataset_name="Patches Dataset")
# dataset_path = dataset.get_local_copy()

In [None]:
prefix = task.name+'_'+task.id+'/'
os.makedirs(prefix, exist_ok=True)

Define the dataloaders:
- 'dataloaders_train' will contain all of the data loaders for the training folds.
- 'dataloaders_val' will contain all of the data loaders for the validation folds.

In [None]:
# Load the dataloaders
dataloaders_train, dataloaders_val = create_train_val_dataloaders(
                                                                all_splits=all_splits, 
                                                                image_dir=IMAGE_DIR, 
                                                                image_transforms=image_transforms, 
                                                                config_params=config_params,
                                                                collate_fn=custom_collate_fn,
                                                                )

Creating test dataloader

In [None]:
test_dataset = CustomDataset(test_df, IMAGE_DIR, transforms=image_transforms)
test_dl = load_dataloader(
                        dataset=test_dataset, 
                        config_params=config_params,
                        collate_fn=custom_collate_fn,
                        shuffle=False # Do not shuffle the test set
                        )


# TEMP: Print the first 5 batches of the test loader#
print("num batches in dataloader", len(test_dl))
for i, (images, targets) in enumerate(test_dl):
    print(f"Batch {i}: {len(images)} images, {len(targets)} targets")
    print(targets)
    print(i)
    if i > 5:
        break

Visualise the first 5 images along with its bounding boxes:

In [None]:


for i, (images, targets) in enumerate(dataloaders_train['fold_1_train']):
    print(f"Batch {i}: {len(images)} images, {len(targets)} targets")
    print(targets)
    print(i)
    
    # Convert the tensor image to a numpy array and then to a PIL Image
    image_np = (images[0].numpy() * 255).astype(np.uint8).transpose(1, 2, 0)
    
    # Visualize the image with targets
    bboxes = targets[0]["boxes"].numpy().astype(int)
    print(bboxes.shape)

    image_copy = image_np.copy()

    for bbox in bboxes:
        x_min, y_min, x_max, y_max = bbox
        image_copy = cv2.rectangle(image_copy, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)

    plt.imshow(image_copy)
    plt.axis("off")
    plt.show()
    
    if i > 5:
        break

Start training

In [None]:
best_model, best_fold_model_path = train_model_k_folds(
                                                    model=model, 
                                                    train_dataloaders=dataloaders_train,
                                                    val_dataloaders=dataloaders_val,
                                                    device=device, 
                                                    config_params=config_params,
                                                    prefix=prefix
                                                    )

Evaluate 'best' model

In [None]:
# Test model
final_model = copy.deepcopy(best_model)
final_model.load_state_dict(torch.load(best_fold_model_path, weights_only=True)) # Adjust this path if you want to load the overall best fold

# Call the test function to evaluate on the test set
test_results = evaluate_on_test_set(final_model, test_dl, device)

# Log the test results (e.g., mAP, Precision, Recall) to ClearML
task.upload_artifact(name="Test Results", artifact_object=test_results)

task.close() # uncomment to close the task