In [1]:
# Cell 1: Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import xml.etree.ElementTree as ET
import torch
import torchvision
from torchvision import models, transforms
from torch.utils.data import DataLoader, Dataset
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from PIL import Image
import os

In [2]:
# Cell 2: Custom Dataset Class
class CustomDataset(Dataset):
    def __init__(self, dataset_path, split, transform=None):
        self.dataset_path = dataset_path
        self.split = split
        self.transform = transform
        self.images = []
        self.annotations = []

        split_dir = os.path.join(dataset_path, split)
        images_dir = os.path.join(split_dir, "images")
        annotations_dir = os.path.join(split_dir, "annotations")

        for filename in os.listdir(images_dir):
            if filename.endswith(".jpg") or filename.endswith(".png"):
                image_path = os.path.join(images_dir, filename)
                self.images.append(image_path)

                annotation_path = os.path.join(annotations_dir, os.path.splitext(filename)[0] + ".xml")
                tree = ET.parse(annotation_path)
                root = tree.getroot()
                annotation = []

                for obj in root.findall("object"):
                    name = obj.find("name").text
                    bbox = obj.find("bndbox")
                    xmin = int(bbox.find("xmin").text)
                    ymin = int(bbox.find("ymin").text)
                    xmax = int(bbox.find("xmax").text)
                    ymax = int(bbox.find("ymax").text)
                    annotation.append((xmin, ymin, xmax, ymax))

                self.annotations.append(annotation)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, index):
        image_path = self.images[index]
        image = Image.open(image_path).convert("L")  # Load thermal image as grayscale
        annotation = self.annotations[index]

        if self.transform:
            image = self.transform(image)

        boxes = torch.as_tensor(annotation, dtype=torch.float32)
        labels = torch.ones((len(annotation),), dtype=torch.int64)  # Set all labels to 1 (vehicle)

        return image, {"boxes": boxes, "labels": labels}



In [3]:
 # Test the dataset
dataset_path = "dataset"  # Replace with the path to your dataset
split = "train"  # Replace with the appropriate split (e.g., "train", "val", "test")
thermal_transform = transforms.Compose([
    transforms.Resize((800, 800)),
    transforms.RandomHorizontalFlip(0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

thermal_dataset = CustomDataset(dataset_path, split, thermal_transform)
print(f"Number of images in the dataset: {len(thermal_dataset)}")
image, target = thermal_dataset[0]
print(f"Image shape: {image.shape}")
print(f"Target boxes: {target['boxes']}")
print(f"Target labels: {target['labels']}")

Number of images in the dataset: 67
Image shape: torch.Size([1, 800, 800])
Target boxes: tensor([[454.,  79., 595., 116.],
        [438., 108., 477., 125.]])
Target labels: tensor([1, 1])


In [4]:
# Cell 3: Dataset Preprocessing
def preprocess_dataset(dataset):
    preprocessed_images = []
    preprocessed_annotations = []
    
    for image, target in dataset:
        if isinstance(image, torch.Tensor):
            image = transforms.ToPILImage()(image)
        else:
            image = Image.fromarray(image)
        
        image = thermal_transform(image)  # Use the same transform as in the dataset creation
        
        boxes = target['boxes']
        labels = target['labels']
        
        # Convert bounding box coordinates to relative values
        _, height, width = image.shape
        boxes[:, [0, 2]] /= width
        boxes[:, [1, 3]] /= height
        
        # Create target dictionary
        target = {'boxes': boxes, 'labels': labels}
        
        preprocessed_images.append(image)
        preprocessed_annotations.append(target)
    
    return preprocessed_images, preprocessed_annotations

# Test the preprocessing function
preprocessed_images, preprocessed_annotations = preprocess_dataset(thermal_dataset)
print(f"Number of preprocessed images: {len(preprocessed_images)}")
print(f"Number of preprocessed annotations: {len(preprocessed_annotations)}")
print(f"Preprocessed image shape: {preprocessed_images[0].shape}")
print(f"Preprocessed annotation boxes shape: {preprocessed_annotations[0]['boxes'].shape}")
print(f"Preprocessed annotation labels shape: {preprocessed_annotations[0]['labels'].shape}")

Number of preprocessed images: 67
Number of preprocessed annotations: 67
Preprocessed image shape: torch.Size([1, 800, 800])
Preprocessed annotation boxes shape: torch.Size([2, 4])
Preprocessed annotation labels shape: torch.Size([2])


In [5]:
# Cell 4: Dataset and DataLoader Creation
dataset_path = "dataset"
split = "train"

thermal_transform = transforms.Compose([
    transforms.Resize((800, 800)),
    transforms.RandomHorizontalFlip(0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

thermal_dataset = CustomDataset(dataset_path, split, thermal_transform)

class_labels = set()
for _, annotation in thermal_dataset:
    for obj in annotation:
        name = obj[0]
        class_labels.add("vehicle")  # Map all vehicle labels to "vehicle"

class_to_idx = {"vehicle": 0}  # Create a dictionary with a single "vehicle" class
print("Class labels:", class_to_idx)

preprocessed_thermal_images, preprocessed_thermal_annotations = preprocess_dataset(thermal_dataset)

def collate_fn(batch):
    images = [item[0] for item in batch]
    targets = [item[1] for item in batch]
    
    images = torch.stack(images, dim=0)
    
    return images, targets

train_thermal_dataset = list(zip(preprocessed_thermal_images, preprocessed_thermal_annotations))
train_thermal_loader = DataLoader(train_thermal_dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)

# Test the dataloader
for images, targets in train_thermal_loader:
    print(f"Batch images shape: {images.shape}")
    print(f"Batch targets boxes shape: {targets[0]['boxes'].shape}")
    print(f"Batch targets labels shape: {targets[0]['labels'].shape}")
    break

Class labels: {'vehicle': 0}
Batch images shape: torch.Size([4, 1, 800, 800])
Batch targets boxes shape: torch.Size([3, 4])
Batch targets labels shape: torch.Size([3])


In [6]:
# Cell 5: Model Definition and Training
num_classes = len(class_to_idx) + 1

thermal_model = fasterrcnn_resnet50_fpn(weights="DEFAULT")
in_features = thermal_model.roi_heads.box_predictor.cls_score.in_features
thermal_model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
thermal_model.to(device)

thermal_optimizer = torch.optim.SGD(thermal_model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)

num_epochs = 10

for epoch in range(num_epochs):
    thermal_model.train()
    
    thermal_epoch_loss = 0.0
    
    for thermal_images, thermal_targets in train_thermal_loader:
        thermal_images = list(image.to(device) for image in thermal_images)
        thermal_targets = [{k: v.to(device) for k, v in t.items()} for t in thermal_targets]
        
        thermal_loss_dict = thermal_model(thermal_images, thermal_targets)
        thermal_losses = sum(loss for loss in thermal_loss_dict.values())
        
        thermal_optimizer.zero_grad()
        thermal_losses.backward()
        thermal_optimizer.step()
        
        thermal_epoch_loss += thermal_losses.item()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Thermal Loss: {thermal_epoch_loss/len(train_thermal_loader):.4f}")

torch.save(thermal_model.state_dict(), "thermal_trained_model.pth")

# Test the trained model
thermal_model.eval()
with torch.no_grad():
    for images, targets in train_thermal_loader:
        images = list(image.to(device) for image in images)
        outputs = thermal_model(images)
        
        for i in range(len(images)):
            boxes = outputs[i]['boxes'].cpu().numpy()
            labels = outputs[i]['labels'].cpu().numpy()
            scores = outputs[i]['scores'].cpu().numpy()
            
            print(f"Image {i+1} - Boxes: {boxes}, Labels: {labels}, Scores: {scores}")
        
        break

Epoch [1/10], Thermal Loss: 1.3076
Epoch [2/10], Thermal Loss: 0.2855
Epoch [3/10], Thermal Loss: 0.2391
Epoch [4/10], Thermal Loss: 0.1688
Epoch [5/10], Thermal Loss: 0.1659
Epoch [6/10], Thermal Loss: 0.1625
Epoch [7/10], Thermal Loss: 0.1570
Epoch [8/10], Thermal Loss: 0.1405
Epoch [9/10], Thermal Loss: 0.1626
Epoch [10/10], Thermal Loss: 0.1093
Image 1 - Boxes: [[0.0000000e+00 8.4166527e-03 8.5089493e-01 2.9415581e+00]
 [0.0000000e+00 7.6106787e-03 3.7368519e+00 2.6598716e+00]
 [4.7867441e-01 1.9732863e-05 6.4908254e-01 1.0730572e-02]
 [9.8200750e-01 4.5942679e-02 1.1545837e+00 9.7001940e-02]
 [1.0332977e+00 2.7096623e-01 1.1705803e+00 2.9882103e-01]
 [0.0000000e+00 1.2505531e-02 7.7459931e+00 4.3148017e+00]
 [1.4372691e+00 2.7050617e-01 1.5814084e+00 3.0529585e-01]
 [4.8625040e-01 1.3814304e+00 6.2472200e-01 1.4154294e+00]
 [1.4855034e+00 6.5693390e-01 1.6462066e+00 6.9911933e-01]], Labels: [1 1 1 1 1 1 1 1 1], Scores: [0.7108599  0.7108599  0.6937962  0.53665274 0.41325888 0.2407

In [7]:
# Cell 6: Evaluation and Testing
def evaluate_model(model, dataloader, device):
    model.eval()
    
    all_predictions = []
    all_targets = []
    
    with torch.no_grad():
        for images, targets in dataloader:
            images = list(image.to(device) for image in images)
            outputs = model(images)
            
            for output in outputs:
                boxes = output['boxes'].cpu().numpy()
                labels = output['labels'].cpu().numpy()
                scores = output['scores'].cpu().numpy()
                
                # Apply non-maximum suppression to remove overlapping detections
                indices = torchvision.ops.nms(torch.tensor(boxes), torch.tensor(scores), iou_threshold=0.5)
                
                filtered_boxes = boxes[indices]
                filtered_labels = labels[indices]
                filtered_scores = scores[indices]
                
                all_predictions.append((filtered_boxes, filtered_labels, filtered_scores))
            
            for target in targets:
                boxes = target['boxes'].cpu().numpy()
                labels = target['labels'].cpu().numpy()
                
                all_targets.append((boxes, labels))
    
    return all_predictions, all_targets

# Test the evaluation function
test_split = "test"
test_thermal_dataset = CustomDataset(dataset_path, test_split, thermal_transform)
test_thermal_loader = DataLoader(test_thermal_dataset, batch_size=1, shuffle=False, collate_fn=collate_fn)

test_predictions, test_targets = evaluate_model(thermal_model, test_thermal_loader, device)
print(f"Number of test predictions: {len(test_predictions)}")
print(f"Number of test targets: {len(test_targets)}")
print(f"Test prediction boxes shape: {test_predictions[0][0].shape}")
print(f"Test prediction labels shape: {test_predictions[0][1].shape}")
print(f"Test prediction scores shape: {test_predictions[0][2].shape}")

Number of test predictions: 10
Number of test targets: 10
Test prediction boxes shape: (6, 4)
Test prediction labels shape: (6,)
Test prediction scores shape: (6,)


In [15]:
# Cell 7: Load the trained model
thermal_model.load_state_dict(torch.load("thermal_trained_model.pth"))
thermal_model.eval()

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [21]:
# Cell 8: Prepare the test dataset
test_split = "test"
test_thermal_dataset = CustomDataset(dataset_path, test_split, thermal_transform)
test_thermal_loader = DataLoader(test_thermal_dataset, batch_size=1, shuffle=False, collate_fn=collate_fn)

In [23]:
# Cell 9: Evaluate the model on the test dataset
test_predictions, test_targets = evaluate_model(thermal_model, test_thermal_loader, device)

# Print the unique labels predicted by the model
unique_labels = np.unique(labels)
print("Unique labels:", unique_labels)
print(f"Boxes: {boxes}")
print(f"Labels: {labels}")
print(f"Scores: {scores}")

Unique labels: [1]
Boxes: [[2.9258728e-03 0.0000000e+00 1.9116384e+00 2.6771240e+00]
 [5.5000782e-03 0.0000000e+00 3.5934463e+00 6.2468156e-02]
 [0.0000000e+00 0.0000000e+00 4.9486938e+00 2.9298139e+00]
 [5.4076523e-01 4.9853525e-01 6.2079901e-01 5.1606959e-01]
 [1.5144279e+00 3.1305653e-01 1.5735066e+00 3.2516348e-01]
 [6.8590331e-01 9.3631053e-01 7.6117373e-01 9.5267284e-01]
 [2.1178732e+00 1.1545707e-01 2.1702194e+00 1.2670097e-01]]
Labels: [1 1 1 1 1 1 1]
Scores: [0.7458528  0.7458528  0.7403982  0.5833959  0.2661557  0.24714203
 0.12668324]


In [24]:
# Cell 10: Visualize the object detection results
def visualize_detections(image, boxes, labels, scores, class_labels, confidence_threshold=0.3):  # Adjust the confidence threshold
    image_with_detections = image.copy()
    
    for box, label, score in zip(boxes, labels, scores):
        if score >= confidence_threshold:
            xmin, ymin, xmax, ymax = box.astype(int)
            class_name = class_labels[label.item()]  # Convert label to a scalar value
            
            cv2.rectangle(image_with_detections, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
            cv2.putText(image_with_detections, f"{class_name}: {score:.2f}", (xmin, ymin - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
    
    return image_with_detections

# Visualize the object detection results for the first few test images
num_visualizations = 5
class_labels = {1: "vehicle"}

for i in range(num_visualizations):
    image_path = test_thermal_dataset.images[i]
    image = cv2.imread(image_path)  # Read the image in color mode
    
    boxes, labels, scores = test_predictions[i]
    
    print(f"Boxes: {boxes}")
    print(f"Labels: {labels}")
    print(f"Scores: {scores}")
    
    image_with_detections = visualize_detections(image, boxes, labels, scores, class_labels)
    
    cv2.imshow(f"Thermal Object Detection - Image {i+1}", image_with_detections)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

Boxes: [[8.9168549e-04 0.0000000e+00 7.3191488e-01 5.7565325e-01]
 [6.7140871e-01 6.0547167e-01 7.6231176e-01 6.2662452e-01]
 [6.5970236e-01 6.1804998e-01 7.4673384e-01 6.3826191e-01]
 [1.3548290e+00 2.9004845e-01 1.4111799e+00 3.0378476e-01]
 [1.2371109e+00 8.9516515e-01 1.3152559e+00 9.1440147e-01]
 [1.6977897e+00 6.3215578e-01 1.7721329e+00 6.4769292e-01]]
Labels: [1 1 1 1 1 1]
Scores: [0.75111425 0.5129781  0.5086281  0.39780572 0.13173448 0.10310223]
Boxes: [[0.0000000e+00 1.6462803e-04 4.3652678e+00 2.0668240e+00]
 [0.0000000e+00 0.0000000e+00 5.6748209e+00 3.2516885e+00]
 [8.3711225e-01 3.2933399e-01 9.2519683e-01 3.4744599e-01]
 [9.6014380e-01 6.0103106e-01 1.0416191e+00 6.1747348e-01]
 [1.1062247e+00 8.8230133e-01 1.1719178e+00 8.9647543e-01]
 [1.5080084e+00 6.2519759e-01 1.5917872e+00 6.3925952e-01]
 [1.8805952e+00 4.4338188e-01 1.9432111e+00 4.5799801e-01]
 [1.4109907e+00 9.7812682e-01 1.4805651e+00 9.9282295e-01]]
Labels: [1 1 1 1 1 1 1 1]
Scores: [0.7741793  0.7313582  0.5