## 8.1 Transfer Learning Faster R-CNN

⚠️⚠️⚠️ *Please open this notebook in Google Colab* by click below link ⚠️⚠️⚠️<br><br>
<a href="https://colab.research.google.com/github/Muhammad-Yunus/Belajar-OpenCV-ObjectDetection/blob/main/Pertemuan%208/8.1%20transfer_learning_faster_rcnn.ipynb" target="_blank"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a><br><br><br>
- Click `Connect` button in top right Google Colab notebook,<br>
<img src="https://github.com/Muhammad-Yunus/Belajar-OpenCV-ObjectDetection/blob/main/Pertemuan%207/resource/cl-connect-gpu.png?raw=1" width="250px">
- If connecting process completed, it will turn to something look like this<br>
<img src="https://github.com/Muhammad-Yunus/Belajar-OpenCV-ObjectDetection/blob/main/Pertemuan%207/resource/cl-connect-gpu-success.png?raw=1" width="250px">

- Check GPU connected into Colab environment is active

In [None]:
!nvidia-smi

- Install necesarry package

In [None]:
!pip install torchmetrics
!pip install onnx
!pip install onnxruntime

#### 8.1.1 Download Dataset From Roboflow
- Folow instruction in [4.1 dataset_annotation_roboflow.ipynb](https://github/Muhammad-Yunus/Belajar-OpenCV-ObjectDetection/blob/main/Pertemuan%204/84.1%20dataset_annotation_roboflow.ipynb) to prepare `Scissors Dataset` example,
- Open `Roboflow` > `Project` > `Versions` menu
- Then click `Download Dataset`<br>
<img src="https://github.com/Muhammad-Yunus/Belajar-OpenCV-ObjectDetection/blob/main/Pertemuan%208/resource/rb-download-dataset.png?raw=1" width="850px">
- Choose `COCO` format and select `Show download code` then click `Continue` <br>
<img src="https://github.com/Muhammad-Yunus/Belajar-OpenCV-ObjectDetection/blob/main/Pertemuan%208/resource/rb-download-format.png?raw=1" width="350px">
- click `Copy` icon to copy roboflow download code<br>
<img src="https://github.com/Muhammad-Yunus/Belajar-OpenCV-ObjectDetection/blob/main/Pertemuan%208/resource/rb-copy-download-code.png?raw=1" width="350px">
- Then <font color="orange">replace below code</font> using the copied roboflow download code above,


In [None]:
!pip install roboflow

from roboflow import Roboflow
rf = Roboflow(api_key="uwcaZm715IIKM3Ej7ix5")
project = rf.workspace("learning-k0ow4").project("skissors-detections")
version = project.version(1)
dataset = version.download("coco")

In [None]:
import torch
import torchvision.transforms as T
from PIL import Image
import json
import os

class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms

        # Load JSON annotations
        with open(os.path.join(root, "_annotations.coco.json")) as f:
            data = json.load(f)

        # Process images and annotations
        self.images = {img['id']: img for img in data['images'] if img['file_name'].endswith('.jpg')}
        self.annotations = data['annotations']
        self.categories = {cat['id']: cat['name'] for cat in data['categories']}

        # Group annotations by image ID for easy lookup
        self.img_to_anns = {img_id: [] for img_id in self.images.keys()}
        for ann in self.annotations:
            self.img_to_anns[ann['image_id']].append(ann)

        # Store only images that have annotations
        self.imgs = [img_id for img_id in self.img_to_anns if self.img_to_anns[img_id]]

    def __getitem__(self, idx):
        # Get image and annotations for this index
        img_id = self.imgs[idx]
        img_info = self.images[img_id]
        img_path = os.path.join(self.root, img_info['file_name'])
        img = Image.open(img_path).convert("RGB")

        # Get bounding boxes and labels
        boxes = []
        labels = []
        for ann in self.img_to_anns[img_id]:
            bbox = ann['bbox']
            boxes.append([bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]])  # Convert to [x_min, y_min, x_max, y_max]
            labels.append(ann['category_id'])

        # Convert boxes and labels to tensors
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)

        target = {"boxes": boxes, "labels": labels}

        # Apply any transforms
        if self.transforms:
            img = self.transforms(img)

        return img, target

    def __len__(self):
        return len(self.imgs)


In [None]:
transform = T.Compose([
    T.ToTensor(),
    # Add other augmentations as needed
])


In [None]:
from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# Load the pre-trained model
model = fasterrcnn_resnet50_fpn_v2(weights='DEFAULT')
num_classes = 2  # Change this based on your dataset (including background)

# Get the number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# Replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

In [None]:
from torch.utils.data import DataLoader
import torch.optim as optim
from torchmetrics.detection.mean_ap import MeanAveragePrecision

# Initialize mAP metric
map_metric = MeanAveragePrecision(iou_thresholds=[0.5, 0.75, 0.9],  # IoU thresholds
#                                  max_detection_thresholds=[10, 100],  # Max detection count (optional)
                                  class_metrics=True)  # Separate mAP per class

# Assume dataset and DataLoader for training and validation have been set up
DATASET_ROOT_PATH = dataset.location
dataset_train = CustomDataset(root=f"{dataset.location}/train", transforms=transform)
dataset_val = CustomDataset(root=f"{dataset.location}/valid", transforms=transform)
data_loader_train = DataLoader(dataset_train, batch_size=4, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
data_loader_val = DataLoader(dataset_val, batch_size=4, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

# Move model to device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

# Define optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

# Evaluation function
def evaluate(model, validation_loader, device):
    model.eval()  # Ensure model is in eval mode

    for images, targets in validation_loader:
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        with torch.no_grad():
            outputs = model(images)

        # Format predictions and targets for torchmetrics
        preds = [
            {
                "boxes": output["boxes"].cpu(),
                "scores": output["scores"].cpu(),
                "labels": output["labels"].cpu(),
            }
            for output in outputs
        ]

        gts = [
            {
                "boxes": target["boxes"].cpu(),
                "labels": target["labels"].cpu(),
            }
            for target in targets
        ]

        # Update mAP metric with predictions and ground truths
        map_metric.update(preds, gts)

    # Compute mAP at the end of the epoch
    map_result = map_metric.compute()
    return map_result

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for images, targets in data_loader_train:
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        # Forward pass
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        total_loss += losses.item()

        # Backward pass
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

    # Validation step
    mAP = evaluate(model, data_loader_val, device)
    print(f"Epoch {epoch+1}, \tLoss: {total_loss/len(data_loader_train)}, \tmAP: {mAP['map']:.4f}, \tmAP-50: {mAP['map_50']:.4f}")


In [None]:
# Export the model to ONNX format
MODEL_NAME = "fasterrcnn_resnet50_fpn_v2_scissors.onnx"
torch.onnx.export(
    model,
    torch.rand(1, 3, 224, 224).to(device),
    MODEL_NAME,  # Output path
    export_params=True,  # Store trained weights
    opset_version=11,    # ONNX opset version
    do_constant_folding=True,  # Optimize model by folding constants
    input_names=["input"],  # Name of the input layer
    output_names=["boxes", "labels", "scores"],  # Names of output layers
    dynamic_axes={
        "input": {0: "batch_size"},  # Variable batch size
        "boxes": {0: "num_boxes"},   # Variable number of boxes
        "labels": {0: "num_boxes"},
        "scores": {0: "num_boxes"}
    }
)

In [None]:
import onnx
import onnxruntime as ort

# Load the ONNX model
onnx_model = onnx.load(MODEL_NAME)
onnx.checker.check_model(onnx_model)

# Run inference with ONNX Runtime
ort_session = ort.InferenceSession(MODEL_NAME)

# Load your image
image_path = "image2.jpg"
image = Image.open(image_path).convert("RGB")

# Define the same transformations used during training
transform = T.Compose([
    T.Resize((224, 224)),  # Resize to match the model's input size, if needed
    T.ToTensor(),          # Convert the image to a tensor
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize using ImageNet stats
])

# Apply transformations and add batch dimension
input_tensor = transform(image).unsqueeze(0)  # Shape: [1, 3, 224, 224]

# Convert tensor to numpy format
input_image = input_tensor.numpy()

# Example inference
outputs = ort_session.run(None, {"input": input_image})
print("ONNX model outputs:", outputs)

In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image
import numpy as np

# Define a function to display the image with detected boxes and labels
def display_detections(image_path, boxes, labels, scores, threshold=0.5, class_names=None):
    """
    Displays the image with bounding boxes, labels, and confidence scores.

    Parameters:
    - image_path: Path to the input image.
    - boxes: Bounding boxes from the model output.
    - labels: Class labels from the model output.
    - scores: Confidence scores from the model output.
    - threshold: Minimum confidence score to display a detection.
    - class_names: List of class names corresponding to label indices.
    """
    # Load the image
    image = Image.open(image_path).convert("RGB")
    image = image.resize((224, 224))
    plt.figure(figsize=(10, 10))
    plt.imshow(image)

    # Create a plot overlay for bounding boxes
    ax = plt.gca()

    for box, label, score in zip(boxes, labels, scores):
        if score >= threshold:  # Only display boxes above the confidence threshold
            # Extract box coordinates
            x_min, y_min, x_max, y_max = box

            # Draw the bounding box
            rect = patches.Rectangle(
                (x_min, y_min), x_max - x_min, y_max - y_min,
                linewidth=2, edgecolor="red", facecolor="none"
            )
            ax.add_patch(rect)

            # Add label and score
            label_text = f"{class_names[label]}: {score:.2f}" if class_names else f"Label {label}: {score:.2f}"
            plt.text(
                x_min, y_min - 10, label_text,
                color="red", fontsize=12, backgroundcolor="white"
            )

    # Show the plot
    plt.axis("off")
    plt.show()

# Example usage with outputs from the ONNX model
image_path = "image2.jpg"
boxes = outputs[0]  # Bounding boxes
labels = outputs[1]  # Class labels
scores = outputs[2]  # Confidence scores

# Define class names if available, for example: class_names = ["background", "scissors", "other"]
class_names = ["background", "scissors"]  # Modify based on your dataset

# Display detections
display_detections(image_path, boxes, labels, scores, threshold=0.5, class_names=class_names)
