## Connect to drive and import dataset

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
%cd /content/

%cp -r "/content/gdrive/MyDrive/Afstudeerproject/Pictures/" "/content/"
%cp -r "/content/gdrive/MyDrive/Afstudeerproject/output_annotations.json" "/content/"

/content


In [None]:
import os

# Path to your pictures directory
pictures_dir = "/content/Pictures/"

# Loop through all files in the directory
for filename in os.listdir(pictures_dir):
    if filename.startswith("Copy of "):
        # Construct new filename by removing "Copy of "
        new_filename = filename.replace("Copy of ", "", 1)

        # Full paths for old and new names
        old_path = os.path.join(pictures_dir, filename)
        new_path = os.path.join(pictures_dir, new_filename)

        # Rename the file
        os.rename(old_path, new_path)

## Define dataset

In [None]:
import json
import os
import torch
import torchvision.transforms as transforms
from PIL import Image
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision.tv_tensors import BoundingBoxes
from torch import nn

# Load JSON annotation file
with open("output_annotations.json", "r") as f:
    plant_data = json.load(f)

# Map image IDs to file names
image_id_to_filename = {img["id"]: img["file_name"] for img in plant_data["images"]}

# Get image sizes
image_id_to_size = {img["id"]: (img["width"], img["height"]) for img in plant_data["images"]}

# Organize annotations by image_id
image_annotations = {img_id: [] for img_id in image_id_to_filename.keys()}
for ann in plant_data["annotations"]:
    image_annotations[ann["image_id"]].append(ann)

class PlantDataset(Dataset):
    def __init__(self, img_dir, annotations, img_id_to_filename, img_id_to_size, transform=None):
        self.img_dir = img_dir
        self.annotations = annotations
        self.img_id_to_filename = img_id_to_filename
        self.img_id_to_size = img_id_to_size
        self.transform = transform

    def __len__(self):
        return len(self.img_id_to_filename)

    def __getitem__(self, idx):
        # Get image ID and file path
        img_id = list(self.img_id_to_filename.keys())[idx]
        img_path = os.path.join(self.img_dir, self.img_id_to_filename[img_id])

        # Load image as PIL (don't resize here)
        image = Image.open(img_path).convert("RGB")
        orig_w, orig_h = self.img_id_to_size[img_id]

        # Get original annotations (no scaling yet)
        boxes = []
        labels = []
        for ann in self.annotations[img_id]:
            x, y, w, h = ann["bbox"]
            boxes.append([x, y, x + w, y + h])  # Original coordinates
            labels.append(ann["category_id"])

        # Convert to tensors
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)

        # Wrap boxes in BoundingBoxes for transform compatibility
        target = {
            "boxes": BoundingBoxes(boxes, format="XYXY", canvas_size=(orig_h, orig_w)),
            "labels": labels
        }

        # Apply transforms (will handle resizing and box scaling)
        if self.transform:
            image, target = self.transform(image, target)


        # Convert boxes back to tensor
        target["boxes"] = target["boxes"].data  # Convert BoundingBoxes -> tensor

        return image, target

## Define transforms

In [None]:
import torchvision.transforms.v2 as transforms_v2

# Define transforms
train_transform = transforms_v2.Compose([
    transforms_v2.ToTensor(),
    transforms_v2.RandomHorizontalFlip(p=0.5),
    transforms_v2.RandomVerticalFlip(p=0.3),
    transforms_v2.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.2, hue=0.02),
    transforms_v2.RandomAffine(
        degrees=15,
        translate=(0.1, 0.1),
        scale=(0.8, 1.2),  # More aggressive scaling
        shear=5
    ),
    transforms_v2.Resize((640, 640), antialias=True),
    transforms_v2.RandomErasing(p=0.3, scale=(0.02, 0.15)),
    transforms_v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

val_transform = transforms_v2.Compose([
    transforms_v2.ToTensor(),
    transforms_v2.Resize((640, 640), antialias=True),
    transforms_v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Original dataset (no transforms)
full_dataset = PlantDataset(
    img_dir="/content/Pictures/",
    annotations=image_annotations,
    img_id_to_filename=image_id_to_filename,
    img_id_to_size=image_id_to_size,
    transform=None  # Transforms applied later
)

# Split datasets
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(full_dataset, [train_size, val_size])

# Assign transforms
train_dataset.dataset.transform = train_transform
val_dataset.dataset.transform = val_transform



## Define model

In [None]:
from torchvision.models.detection import fasterrcnn_resnet50_fpn

class FastRCNNPredictorWithDropout(nn.Module):
    def __init__(self, in_channels, num_classes):
        super().__init__()
        self.cls_score = nn.Sequential(
            nn.Linear(in_channels, 1024),
            nn.ReLU(),
            nn.Dropout(0.3),  # Higher dropout for small dataset
            nn.Linear(1024, num_classes)
        )
        self.bbox_pred = nn.Linear(in_channels, num_classes * 4)

    def forward(self, x):
        scores = self.cls_score(x)
        bbox_deltas = self.bbox_pred(x)
        return scores, bbox_deltas

num_classes = len(plant_data["categories"]) + 1  # Background + classes
train_dataloader = DataLoader(train_dataset,
                        batch_size=8,
                        num_workers=2,
                        shuffle=True,
                        collate_fn=lambda x: tuple(zip(*x)),
                        pin_memory=True)

model = fasterrcnn_resnet50_fpn(weights='DEFAULT')
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictorWithDropout(in_features, num_classes)



Train model

In [None]:
%pip install torchmetrics



In [None]:
import torch
from torchmetrics.detection import MeanAveragePrecision

num_epochs = 40

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, weight_decay=0.05, momentum=0.9)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)

# Define metric tracker
metric = MeanAveragePrecision(iou_thresholds=[0.5, 0.75])  # COCO uses IoU 0.5:0.95

val_dataloader = DataLoader(
    val_dataset,
    batch_size=8,  # Smaller batch for validation
    num_workers=2,
    collate_fn=lambda x: tuple(zip(*x)),
    pin_memory=True
)

accumulation_steps = 4
i = 0

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    optimizer.zero_grad()

    for images, targets in train_dataloader:
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        loss = sum(loss for loss in loss_dict.values())
        loss = loss / accumulation_steps  # Scale loss

        loss.backward()

        epoch_loss += loss.item()

        if (i + 1) % accumulation_steps == 0:
            optimizer.step()
            optimizer.zero_grad()

        epoch_loss += loss.item() * accumulation_steps  # Re-scale for logging
        i += 1

    print(f"Epoch {epoch+1}, Loss: {epoch_loss:.4f}")

    model.eval()
    metric.reset()

    with torch.no_grad():
        for images, targets in val_dataloader:  # Use validation set if available
            images = [img.to(device) for img in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            preds = model(images)
            metric.update(preds, targets)

    scheduler.step()

    results = metric.compute()
    print(f"Epoch {epoch+1} Metrics:")
    print(f" - mAP@0.5: {results['map_50']:.4f}")
    print(f" - mAP@0.75: {results['map_75']:.4f}")



Epoch 1, Loss: 20.8110
Epoch 1 Metrics:
 - mAP@0.5: 0.2388
 - mAP@0.75: 0.0155
Epoch 2, Loss: 20.6640
Epoch 2 Metrics:
 - mAP@0.5: 0.2608
 - mAP@0.75: 0.0366
Epoch 3, Loss: 20.5257
Epoch 3 Metrics:
 - mAP@0.5: 0.3050
 - mAP@0.75: 0.0518
Epoch 4, Loss: 19.9779
Epoch 4 Metrics:
 - mAP@0.5: 0.3366
 - mAP@0.75: 0.0641
Epoch 5, Loss: 19.6852
Epoch 5 Metrics:
 - mAP@0.5: 0.3682
 - mAP@0.75: 0.0867
Epoch 6, Loss: 19.1765
Epoch 6 Metrics:
 - mAP@0.5: 0.4384
 - mAP@0.75: 0.1179
Epoch 7, Loss: 18.5204
Epoch 7 Metrics:
 - mAP@0.5: 0.4745
 - mAP@0.75: 0.1082
Epoch 8, Loss: 17.6236
Epoch 8 Metrics:
 - mAP@0.5: 0.5076
 - mAP@0.75: 0.1564
Epoch 9, Loss: 17.0213
Epoch 9 Metrics:
 - mAP@0.5: 0.5270
 - mAP@0.75: 0.1379
Epoch 10, Loss: 16.6492
Epoch 10 Metrics:
 - mAP@0.5: 0.5470
 - mAP@0.75: 0.1649
Epoch 11, Loss: 16.1751
Epoch 11 Metrics:
 - mAP@0.5: 0.5589
 - mAP@0.75: 0.1466
Epoch 12, Loss: 15.9855
Epoch 12 Metrics:
 - mAP@0.5: 0.5810
 - mAP@0.75: 0.1547
Epoch 13, Loss: 15.3972
Epoch 13 Metrics:
 - m

In [None]:
model.eval()

import cv2
import matplotlib.pyplot as plt
import numpy as np

category_names = {1: "Crop", 2: "Weed"}

def plot_predictions(image_path, predictions, threshold=0.5):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    boxes = predictions[0]['boxes'].cpu().numpy()
    labels = predictions[0]['labels'].cpu().numpy()
    scores = predictions[0]['scores'].cpu().numpy()

    for box, label, score in zip(boxes, labels, scores):
        if score >= threshold:  # Filter low-confidence detections
            x1, y1, x2, y2 = map(int, box)
            class_name = category_names.get(label, "Unknown")
            color = (0, 255, 0) if label == 1 else (255, 0, 0)  # Green for crops, red for weeds

            # Draw rectangle
            cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
            text = f"{class_name}: {score:.2f}"

            # Put text above rectangle
            cv2.putText(image, text, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    plt.figure(figsize=(8, 8))
    plt.imshow(image)
    plt.axis("off")
    plt.show()

def predict(image_path):
    image = Image.open(image_path).convert("RGB")
    image_tensor = transform(image).unsqueeze(0).to(device)

    with torch.no_grad():
        predictions = model(image_tensor)

    return predictions

# Run inference
image_path = "/home/remo/Afstudeerproject/AgronomischePerformanceMeting/AnnotationAndTraining/Annotation/images/frame10.jpg"
predictions = predict(image_path)
plot_predictions(image_path, predictions)

NameError: name 'model' is not defined

Save model

In [None]:
torch.save(model.state_dict(), "plant_dect_fastercnn_SDG_DO_0.3.pth")