In [16]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import os
import json


import torch
import torchvision
import torchvision.transforms as T
from torch.utils.data import DataLoader, Dataset
from torchvision.models.detection import fasterrcnn_resnet50_fpn, FasterRCNN_ResNet50_FPN_Weights
from torchvision.datasets import ImageFolder
from torchvision import transforms
from torchvision import transforms as T
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchmetrics.detection import MeanAveragePrecision

from sklearn.metrics import confusion_matrix, precision_recall_fscore_support
from PIL import Image

In [3]:
transform = T.Compose([
    T.ToTensor(),  
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  
])


In [4]:
class CustomDataset(Dataset):
    def __init__(self, img_dir, annotation_path, transforms=None):
        self.img_dir = img_dir
        self.transforms = transforms

        with open(annotation_path, "r") as f:
            self.annotations = json.load(f)

        self.image_info = {img["id"]: img for img in self.annotations["images"]}

        self.img_to_anns = {}
        for ann in self.annotations["annotations"]:
            img_id = ann["image_id"]
            if img_id not in self.img_to_anns:
                self.img_to_anns[img_id] = []
            self.img_to_anns[img_id].append(ann)

        self.img_ids = list(self.image_info.keys())

    def __len__(self):
        return len(self.img_ids)

    def __getitem__(self, idx):
        img_id = self.img_ids[idx]

        img_info = self.image_info[img_id]
        img_name = img_info["file_name"]
        img_path = os.path.join(self.img_dir, img_name)

        img = Image.open(img_path).convert("RGB")

        anns = self.img_to_anns.get(img_id, [])

        boxes = []
        labels = []
        for ann in anns:
            bbox = ann["bbox"]
            x_min, y_min, width, height = bbox
            x_max = x_min + width
            y_max = y_min + height
            boxes.append([x_min, y_min, x_max, y_max])
            labels.append(ann["category_id"])

        boxes = torch.tensor(boxes, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels

        if self.transforms is not None:
            img = self.transforms(img)

        return img, target

In [5]:
img_dir = "/kaggle/input/tomatodiseasecocogp/train"
annotation_path = "/kaggle/input/tomatodiseasecocogp/train/_annotations.coco.json"
train_dataset = CustomDataset(img_dir, annotation_path, transforms=transform)

validation_dataset = CustomDataset('/kaggle/input/tomatodiseasecocogp/valid', '/kaggle/input/tomatodiseasecocogp/valid/_annotations.coco.json', transforms=transform)
test_dataset = CustomDataset('/kaggle/input/tomatodiseasecocogp/test', '/kaggle/input/tomatodiseasecocogp/test/_annotations.coco.json', transforms=transform)

Image shape: torch.Size([3, 512, 512])
Boxes: tensor([[254., 106., 338., 221.],
        [261., 233., 332., 401.],
        [163.,  48., 256., 192.],
        [357., 172., 466., 364.],
        [ 71., 284., 153., 465.],
        [  7., 105.,  65., 181.],
        [ 21., 266.,  65., 336.],
        [ 46., 238.,  90., 268.],
        [ 71., 212., 129., 276.],
        [328., 216., 367., 276.],
        [319., 155., 356., 211.],
        [190., 175., 233., 227.]])
Labels: tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
Image shape: torch.Size([3, 512, 512])
Boxes: tensor([[  0.,   0., 428., 413.]])
Labels: tensor([4])
Image shape: torch.Size([3, 512, 512])
Boxes: tensor([[ 17.0000,   0.0000, 240.0000, 240.0000],
        [296.0000,  23.0000, 409.0000, 256.0000],
        [ 15.0000, 256.0000, 237.5000, 506.0000],
        [286.0000, 271.0000, 456.5000, 450.5000]])
Labels: tensor([1, 4, 5, 5])


In [12]:
print('Training Data Try')
img, target = train_dataset[0]
print("Image shape:", img.shape)
print("Boxes:", target["boxes"])
print("Labels:", target["labels"])
print('---------------------------------------------------------------------------')
print('Validation Data Try')
img, target = validation_dataset[0]
print("Image shape:", img.shape)
print("Boxes:", target["boxes"])
print("Labels:", target["labels"])
print('---------------------------------------------------------------------------')
print('Test Data Try')
img, target = test_dataset[0]
print("Image shape:", img.shape)
print("Boxes:", target["boxes"])
print("Labels:", target["labels"])

Training Data Try
Image shape: torch.Size([3, 512, 512])
Boxes: tensor([[254., 106., 338., 221.],
        [261., 233., 332., 401.],
        [163.,  48., 256., 192.],
        [357., 172., 466., 364.],
        [ 71., 284., 153., 465.],
        [  7., 105.,  65., 181.],
        [ 21., 266.,  65., 336.],
        [ 46., 238.,  90., 268.],
        [ 71., 212., 129., 276.],
        [328., 216., 367., 276.],
        [319., 155., 356., 211.],
        [190., 175., 233., 227.]])
Labels: tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
---------------------------------------------------------------------------
Validation Data Try
Image shape: torch.Size([3, 512, 512])
Boxes: tensor([[  0.,   0., 428., 413.]])
Labels: tensor([4])
---------------------------------------------------------------------------
Test Data Try
Image shape: torch.Size([3, 512, 512])
Boxes: tensor([[ 17.0000,   0.0000, 240.0000, 240.0000],
        [296.0000,  23.0000, 409.0000, 256.0000],
        [ 15.0000, 256.0000, 237.5000, 50

In [6]:
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
validation_loader = DataLoader(validation_dataset, batch_size=16, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))

In [17]:
model = fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT)
num_classes = 12  
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

In [18]:
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'  
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [19]:
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)


In [None]:
num_epochs = 10
train_losses = {'box': [], 'cls': []}
val_losses = {'box': [], 'cls': []}
train_metrics = {'precision': [], 'recall': []}
val_metrics = {'precision': [], 'recall': [], 'mAP50': [], 'mAP50_95': []}


for epoch in range(num_epochs):
    model.train()
    train_loss_box, train_loss_cls = 0.0, 0.0
    all_preds_train, all_targets_train = [], []

    for images, targets in train_loader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        optimizer.zero_grad()
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        
        train_loss_box += loss_dict.get('loss_box_reg', 0.0).item()
        train_loss_cls += loss_dict.get('loss_classifier', 0.0).item()

        losses.backward()
        optimizer.step()

    train_loss_box /= len(train_loader)
    train_loss_cls /= len(train_loader)
    train_losses['box'].append(train_loss_box)
    train_losses['cls'].append(train_loss_cls)

    model.eval()
    val_loss_box, val_loss_cls = 0.0, 0.0
    all_preds_val, all_targets_val = [], []

    with torch.no_grad():
        for images, targets in validation_loader:
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())
            
            val_loss_box += loss_dict.get('loss_box_reg', 0.0).item()
            val_loss_cls += loss_dict.get('loss_classifier', 0.0).item()

            outputs = model(images)
            for output, target in zip(outputs, targets):
                pred_boxes = output['boxes'].cpu().numpy()
                pred_labels = output['labels'].cpu().numpy()
                true_labels = target['labels'].cpu().numpy()
                all_preds_val.extend(pred_labels)
                all_targets_val.extend(true_labels)

    val_loss_box /= len(validation_loader)
    val_loss_cls /= len(validation_loader)
    val_losses['box'].append(val_loss_box)
    val_losses['cls'].append(val_loss_cls)

    val_cm = confusion_matrix(all_targets_val, all_preds_val)
    val_precision, val_recall, val_f1, _ = precision_recall_fscore_support(all_targets_val, all_preds_val, average='weighted')

    map_metric = MeanAveragePrecision(iou_type="bbox")
    preds_val = [{'boxes': torch.tensor(o['boxes']), 'scores': torch.tensor(o['scores']), 'labels': torch.tensor(o['labels'])} for o in outputs]
    targets_val = [{'boxes': t['boxes'], 'labels': t['labels']} for t in targets]
    map_metric.update(preds_val, targets_val)
    map_results = map_metric.compute()
    val_metrics['precision'].append(val_precision)
    val_metrics['recall'].append(val_recall)
    val_metrics['mAP50'].append(map_results['map_50'].item())
    val_metrics['mAP50_95'].append(map_results['map'].item())

    lr_scheduler.step()

    print(f'Epoch: {epoch + 1}, Train Loss (Box/Cls): {train_loss_box:.4f}/{train_loss_cls:.4f}, '
          f'Val Loss (Box/Cls): {val_loss_box:.4f}/{val_loss_cls:.4f}, '
          f'Val Precision: {val_precision:.4f}, Val Recall: {val_recall:.4f}, Val mAP50: {map_results["map_50"].item():.4f}, Val mAP50:95: {map_results["map"].item():.4f}')

In [None]:
# Plotting results
plt.figure(figsize=(15, 10))

# Train Losses
plt.subplot(3, 4, 1)
plt.plot(train_losses['box'], label='train/box_loss')
plt.plot([x * 0.9 for x in train_losses['box']], '--', label='smooth')
plt.title('train/box_loss')
plt.legend()

plt.subplot(3, 4, 2)
plt.plot(train_losses['cls'], label='train/cls_loss')
plt.plot([x * 0.9 for x in train_losses['cls']], '--', label='smooth')
plt.title('train/cls_loss')
plt.legend()

# Validation Losses
plt.subplot(3, 4, 5)
plt.plot(val_losses['box'], label='val/box_loss')
plt.plot([x * 0.9 for x in val_losses['box']], '--', label='smooth')
plt.title('val/box_loss')
plt.legend()

plt.subplot(3, 4, 6)
plt.plot(val_losses['cls'], label='val/cls_loss')
plt.plot([x * 0.9 for x in val_losses['cls']], '--', label='smooth')
plt.title('val/cls_loss')
plt.legend()

# Metrics
plt.subplot(3, 4, 9)
plt.plot(train_metrics['precision'], label='metrics/precision(B)')
plt.plot([x * 0.95 for x in train_metrics['precision']], '--', label='smooth')
plt.title('metrics/precision(B)')
plt.legend()

plt.subplot(3, 4, 10)
plt.plot(train_metrics['recall'], label='metrics/recall(B)')
plt.plot([x * 0.95 for x in train_metrics['recall']], '--', label='smooth')
plt.title('metrics/recall(B)')
plt.legend()

plt.subplot(3, 4, 11)
plt.plot(val_metrics['mAP50'], label='metrics/mAP50(B)')
plt.plot([x * 0.95 for x in val_metrics['mAP50']], '--', label='smooth')
plt.title('metrics/mAP50(B)')
plt.legend()

plt.subplot(3, 4, 12)
plt.plot(val_metrics['mAP50_95'], label='metrics/mAP50-95(B)')
plt.plot([x * 0.95 for x in val_metrics['mAP50_95']], '--', label='smooth')
plt.title('metrics/mAP50-95(B)')
plt.legend()

plt.tight_layout()
plt.savefig('training_results.png')
plt.close()

# Confusion Matrix for Validation
val_cm_normalized = val_cm.astype('float') / val_cm.sum(axis=1)[:, np.newaxis]
plt.figure(figsize=(10, 8))
plt.imshow(val_cm_normalized, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Validation Confusion Matrix')
plt.colorbar()
plt.savefig('val_confusion_matrix.png')
plt.close()


In [None]:
# Confusion Matrix for Test 
model.eval()
all_preds_test, all_targets_test = [], []
with torch.no_grad():
    for images, targets in test_loader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        outputs = model(images)
        for output, target in zip(outputs, targets):
            pred_labels = output['labels'].cpu().numpy()
            true_labels = target['labels'].cpu().numpy()
            all_preds_test.extend(pred_labels)
            all_targets_test.extend(true_labels)

test_cm = confusion_matrix(all_targets_test, all_preds_test)
test_cm_normalized = test_cm.astype('float') / test_cm.sum(axis=1)[:, np.newaxis]
plt.figure(figsize=(10, 8))
plt.imshow(test_cm_normalized, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Test Confusion Matrix')
plt.colorbar()
plt.savefig('test_confusion_matrix.png')
plt.close()

# Save metrics to CSV
metrics_df = pd.DataFrame({
    'Epoch': range(1, num_epochs + 1),
    'Val_Precision': val_metrics['precision'],
    'Val_Recall': val_metrics['recall'],
    'Val_F1': [precision_recall_fscore_support(all_targets_val, all_preds_val, average='weighted')[2] for _ in range(num_epochs)],
    'Val_mAP50': val_metrics['mAP50'],
    'Val_mAP50_95': val_metrics['mAP50_95']
})
metrics_df.to_csv('metrics_results.csv', index=False)

print("Training complete! Results saved as 'training_results.png', 'val_confusion_matrix.png', 'test_confusion_matrix.png', and 'metrics_results.csv'")

In [None]:
torch.save(model.state_dict(), 'fasterrcnn_model.pth')
print("Final model saved as 'fasterrcnn_model.pth'")