<a href="https://colab.research.google.com/github/GaganKumar2375/Palm-Tree-Detection-using-YOLOv8-and-R-CNN/blob/main/Faster_R_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Faster R-CNN Code

In [None]:
# Install pyyaml and ensure PyTorch and torchvision are installed
!pip install pyyaml
!pip install --upgrade torch torchvision


In [None]:
import os
import torch
import torchvision
import torch.utils.data
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as T
import torchvision.transforms.functional as F
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict
import cv2


In [None]:
# Install Roboflow library
!pip install roboflow

# Import Roboflow
from roboflow import Roboflow
rf = Roboflow(api_key="A2wycrFsDr1E3JqnNRlT")
project = rf.workspace("capstone-p9zrm").project("palm-tree-jkpzn")
version = project.version(1)
dataset = version.download("yolov8")


In [None]:
# List the contents of the dataset directory
!ls {dataset.location}


data.yaml  README.dataset.txt  README.roboflow.txt  test  train  valid


In [None]:
class PalmTreeDataset(Dataset):

    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms

        imgs = list(sorted(os.listdir(os.path.join(root, "images"))))
        labels = list(sorted(os.listdir(os.path.join(root, "labels"))))

        # Filter out images without annotations
        self.imgs = []
        self.labels = []
        for img_file, label_file in zip(imgs, labels):
            label_path = os.path.join(root, "labels", label_file)
            with open(label_path, 'r') as f:
                lines = f.readlines()
                if not lines or all(line.strip() == '' for line in lines):
                    continue  # Skip images with no annotations
            self.imgs.append(img_file)
            self.labels.append(label_file)


    def __getitem__(self, idx):
        # Load image
        img_path = os.path.join(self.root, "images", self.imgs[idx])
        img = Image.open(img_path).convert("RGB")
        img = np.array(img)
        height, width = img.shape[:2]

        # Load annotations
        label_path = os.path.join(self.root, "labels", self.labels[idx])
        boxes = []
        labels = []
        with open(label_path, 'r') as f:
            for line in f:
                if line.strip() == '':
                    continue
                values = line.strip().split()
                if len(values) != 5:
                    continue  # Skip lines with incorrect format
                class_id, x_center, y_center, bbox_width, bbox_height = map(float, values)
                # Convert to pixel coordinates
                x_center *= width
                y_center *= height
                bbox_width *= width
                bbox_height *= height
                x_min = x_center - bbox_width / 2
                y_min = y_center - bbox_height / 2
                x_max = x_center + bbox_width / 2
                y_max = y_center + bbox_height / 2
                boxes.append([x_min, y_min, x_max, y_max])
                labels.append(int(class_id) + 1)  # +1 for background class

        # **Handle empty boxes**
        if len(boxes) == 0:
            # Skip images with no annotations
            return self.__getitem__((idx + 1) % len(self))

        # Convert to tensors
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)

        # **Ensure boxes is 2D**
        if boxes.ndim == 1:
            boxes = boxes.unsqueeze(0)

        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        iscrowd = torch.zeros((len(labels),), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        # Apply transforms
        if self.transforms:
            img = self.transforms(img)

        return img, target


    def __len__(self):
        return len(self.imgs)

In [None]:
def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)


In [None]:
import torchvision
import os
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# Load the pre-trained model
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")

# Number of classes (including background)
num_classes = 2  # 1 class (palm tree) + background

# Get the number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# Replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)


In [None]:
# Define root directories for train and validation datasets
train_dir = os.path.join(dataset.location, 'train')
valid_dir = os.path.join(dataset.location, 'valid')

# Create the datasets
dataset_train = PalmTreeDataset(train_dir, transforms=get_transform(train=True))
dataset_valid = PalmTreeDataset(valid_dir, transforms=get_transform(train=False))

# Define data loaders
def collate_fn(batch):
    return tuple(zip(*batch))

data_loader_train = DataLoader(dataset_train, batch_size=4, shuffle=True, num_workers=2, collate_fn=collate_fn)
data_loader_valid = DataLoader(dataset_valid, batch_size=1, shuffle=False, num_workers=2, collate_fn=collate_fn)


In [None]:
num_empty = 0
for idx in range(len(dataset_train)):
    _, target = dataset_train[idx]
    if len(target['boxes']) == 0:
        print(f"Image at index {idx} has no annotations.")
        num_empty += 1
print(f"Total images without annotations: {num_empty}")




Total images without annotations: 0


In [None]:
for img_file, label_file in zip(dataset_train.imgs, dataset_train.labels):
    img_name = os.path.splitext(img_file)[0]
    label_name = os.path.splitext(label_file)[0]
    if img_name != label_name:
        print(f"Mismatch: {img_name} and {label_name}")



In [None]:
import torch.optim as optim

# Use GPU if available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

# Construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

# Learning rate scheduler (optional)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)


In [None]:
dataset_train = PalmTreeDataset(train_dir, transforms=get_transform(train=True))


In [None]:
import os

# Access the image and label file names from the dataset instance
imgs = dataset_train.imgs
labels = dataset_train.labels

# Iterate over the image and label files
for img_file, label_file in zip(imgs, labels):
    img_name = os.path.splitext(img_file)[0]
    label_name = os.path.splitext(label_file)[0]
    if img_name != label_name:
        print(f"Mismatch: {img_name} and {label_name}")



In [None]:
train_losses = []
val_losses = []
box_losses = []

num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    epoch_box_loss = 0
    i = 0

    for images, targets in data_loader_train:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        box_loss = loss_dict['loss_box_reg'].item()

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        epoch_loss += losses.item()
        epoch_box_loss += box_loss

        if i % 10 == 0:
            print(f"Epoch [{epoch+1}/{num_epochs}] Step [{i}/{len(data_loader_train)}] "
                  f"Loss: {losses.item():.4f}, Box Loss: {box_loss:.4f}")
        i += 1

    lr_scheduler.step()

    avg_epoch_loss = epoch_loss / len(data_loader_train)
    avg_box_loss = epoch_box_loss / len(data_loader_train)
    train_losses.append(avg_epoch_loss)
    box_losses.append(avg_box_loss)

    print(f"✅ Epoch [{epoch+1}/{num_epochs}] Avg Loss: {avg_epoch_loss:.4f}, Avg Box Loss: {avg_box_loss:.4f}")

    # Validation step
    model.train()  # 👈 Temporarily switch to train mode to get loss dict
    val_epoch_loss = 0
    with torch.no_grad():
        for images, targets in data_loader_valid:
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            val_loss_dict = model(images, targets)  # ✅ Now returns a loss dict
            val_losses_total = sum(loss for loss in val_loss_dict.values())
            val_epoch_loss += val_losses_total.item()

    model.eval()  # 👈 Set it back to eval mode after validation


    avg_val_loss = val_epoch_loss / len(data_loader_valid)
    val_losses.append(avg_val_loss)

    print(f"🔍 Validation Loss after Epoch [{epoch+1}/{num_epochs}]: {avg_val_loss:.4f}")


In [None]:
%pip install torchmetrics


In [None]:
import torch
from torchvision.transforms.functional import to_pil_image
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import torchvision.ops as ops
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, precision_score, recall_score, f1_score
import numpy as np

# If you have multiple classes, extend this
label_map = {
    1: "Palm Tree"
}

def compute_iou(boxA, boxB):
    """Compute IoU between two boxes."""
    boxA = torch.tensor(boxA).unsqueeze(0)
    boxB = torch.tensor(boxB).unsqueeze(0)
    return ops.box_iou(boxA, boxB).item()


def visualize_predictions(model, dataset, device, num_images=5, threshold=0.5):
    model.eval()
    torch.set_grad_enabled(False)

    total_gt = 0
    total_correct = 0

    for i in range(num_images):
        img, target = dataset[i]
        img = img.to(device)
        prediction = model([img])[0]

        img_cpu = img.cpu()
        pil_img = to_pil_image(img_cpu)
        pred_boxes = prediction['boxes'].cpu().numpy()
        pred_scores = prediction['scores'].cpu().numpy()
        pred_labels = prediction['labels'].cpu().numpy()

        gt_boxes = target['boxes'].numpy()
        gt_labels = target['labels'].numpy()
        total_gt += len(gt_labels)

        gt_matched = set()
        pred_matched = set()

        for gt_idx, gt_box in enumerate(gt_boxes):
            for pred_idx, (p_box, p_label, p_score) in enumerate(zip(pred_boxes, pred_labels, pred_scores)):
                if p_score < threshold or pred_idx in pred_matched or gt_idx in gt_matched:
                    continue
                iou = compute_iou(gt_box, p_box)
                if iou > 0.5 and p_label == gt_labels[gt_idx]:
                    total_correct += 1
                    gt_matched.add(gt_idx)
                    pred_matched.add(pred_idx)
                    break

        # Visualization
        fig, ax = plt.subplots(1, figsize=(12, 9))
        ax.imshow(pil_img)

        for box, score, label in zip(pred_boxes, pred_scores, pred_labels):
            if score > threshold:
                x_min, y_min, x_max, y_max = box
                class_name = label_map.get(label, f"Class {label}")
                rect = patches.Rectangle((x_min, y_min), x_max - x_min, y_max - y_min,
                                         linewidth=2, edgecolor='red', facecolor='none')
                ax.add_patch(rect)
                ax.text(x_min, y_min - 10, f'{class_name}: {score:.2f}', color='red', fontsize=12)

        plt.title(f"Prediction {i+1}")
        plt.axis('off')
        plt.show()

    accuracy = total_correct / total_gt if total_gt > 0 else 0.0
    # print(f"\nDetection Accuracy (IoU > 0.5 & correct label): {accuracy:.2%}")


def evaluate_model(model, dataset, device, threshold=0.5):
    model.eval()
    torch.set_grad_enabled(False)

    y_true_all = []
    y_pred_all = []

    total_tp = 0
    total_fp = 0
    total_fn = 0
    num_images = len(dataset)

    for i in range(num_images):
        img, target = dataset[i]
        img = img.to(device)
        output = model([img])[0]

        pred_boxes = output['boxes'].cpu().numpy()
        pred_scores = output['scores'].cpu().numpy()
        pred_labels = output['labels'].cpu().numpy()

        gt_boxes = target['boxes'].numpy()
        gt_labels = target['labels'].numpy()

        matched_gt = set()
        matched_pred = set()

        for pred_idx, (p_box, p_label, p_score) in enumerate(zip(pred_boxes, pred_labels, pred_scores)):
            if p_score < threshold:
                continue
            for gt_idx, (gt_box, gt_label) in enumerate(zip(gt_boxes, gt_labels)):
                if gt_idx in matched_gt or pred_idx in matched_pred:
                    continue
                iou = compute_iou(gt_box, p_box)
                if iou > 0.5:
                    y_true_all.append(gt_label)
                    y_pred_all.append(p_label)
                    total_tp += 1
                    matched_gt.add(gt_idx)
                    matched_pred.add(pred_idx)
                    break
            else:
                total_fp += 1  # unmatched prediction

        total_fn += len(gt_boxes) - len(matched_gt)

    # Classification metrics
    precision = precision_score(y_true_all, y_pred_all, average='weighted', zero_division=0)
    recall = recall_score(y_true_all, y_pred_all, average='weighted', zero_division=0)
    f1 = f1_score(y_true_all, y_pred_all, average='weighted', zero_division=0)

    print(f"\nModel Evaluation (IoU > 0.5 & correct class):")
    print(f"Precision: {precision:.2%}")
    print(f"Recall:    {recall:.2%}")
    print(f"F1 Score:  {f1:.2%}")
    print(f"TP: {total_tp}, FP: {total_fp}, FN: {total_fn}")

    # Confusion matrix
    if len(set(y_true_all)) > 0:
        labels = list(label_map.keys())  # [0, 1]
        cm = confusion_matrix(y_true_all, y_pred_all, labels=labels)
        display_labels = [label_map[lbl] for lbl in labels]
        disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=display_labels)

        disp.plot(cmap='Blues')
        plt.title("Confusion Matrix")
        plt.show()
from torchmetrics.detection.mean_ap import MeanAveragePrecision

def compute_map(model, dataset, device, threshold=0.5):
    model.eval()
    torch.set_grad_enabled(False)

    metric = MeanAveragePrecision()
    for i in range(len(dataset)):
        img, target = dataset[i]
        img = img.to(device)

        with torch.no_grad():
            pred = model([img])[0]

        # Format predictions
        pred_formatted = [{
            "boxes": pred["boxes"].cpu(),
            "scores": pred["scores"].cpu(),
            "labels": pred["labels"].cpu()
        }]

        # Format ground truth
        target_formatted = [{
            "boxes": target["boxes"],
            "labels": target["labels"]
        }]

        metric.update(pred_formatted, target_formatted)

    results = metric.compute()
    print(f"\n✅ Mean Average Precision (mAP@0.5:0.95): {results['map']:.4f}")
    print(f"✅ Mean Average Precision (mAP@0.5): {results['map_50']:.4f}")

# Show prediction results on sample images
visualize_predictions(model, dataset_valid, device, num_images=5, threshold=0.5)
compute_map(model, dataset_valid, device)

# Evaluate model performance across all data
evaluate_model(model, dataset_valid, device)





In [None]:
plt.figure(figsize=(10, 6))
plt.plot(range(1, num_epochs + 1), train_losses, marker='o', linestyle='-')
plt.title("Training Loss Curve")
plt.xlabel("Epoch")
plt.ylabel("Average Loss")
plt.grid(True)
plt.show()


In [None]:
# Save the model's state dictionary
torch.save(model.state_dict(), 'faster_rcnn_palm_tree.pth')


In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Copy the model to Google Drive
!cp faster_rcnn_palm_tree.pth /content/drive/MyDrive/


Mounted at /content/drive


In [None]:
# Load the model structure
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=None)

# Modify the model for your number of classes
in_features = model.roi_heads.box_predictor.cls_score.in_features
num_classes = 2  # 1 class (palm tree) + background
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

# Load the saved state dictionary
model.load_state_dict(torch.load('faster_rcnn_palm_tree.pth'))

# Move the model to the device
model.to(device)


In [None]:
# Load from Google Drive
model.load_state_dict(torch.load('/content/drive/MyDrive/faster_rcnn_palm_tree.pth'))
