In [None]:
# Cell 1: Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import xml.etree.ElementTree as ET
import torch
import torchvision
from torchvision import models, transforms
from torch.utils.data import DataLoader, Dataset
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from PIL import Image
import os

In [None]:
# Cell 2: Custom Dataset Class
class CustomDataset(Dataset):
    def __init__(self, dataset_path, split, transform=None):
        self.dataset_path = dataset_path
        self.split = split
        self.transform = transform
        self.images = []
        self.annotations = []

        split_dir = os.path.join(dataset_path, split)
        images_dir = os.path.join(split_dir, "images")
        annotations_dir = os.path.join(split_dir, "annotations")

        for filename in os.listdir(images_dir):
            if filename.endswith(".jpg") or filename.endswith(".png"):
                image_path = os.path.join(images_dir, filename)
                self.images.append(image_path)

                annotation_path = os.path.join(annotations_dir, os.path.splitext(filename)[0] + ".xml")
                tree = ET.parse(annotation_path)
                root = tree.getroot()
                annotation = []

                for obj in root.findall("object"):
                    name = obj.find("name").text
                    bbox = obj.find("bndbox")
                    xmin = int(bbox.find("xmin").text)
                    ymin = int(bbox.find("ymin").text)
                    xmax = int(bbox.find("xmax").text)
                    ymax = int(bbox.find("ymax").text)
                    annotation.append((xmin, ymin, xmax, ymax))

                self.annotations.append(annotation)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, index):
        image_path = self.images[index]
        image = Image.open(image_path).convert("L")  
        annotation = self.annotations[index]

        if self.transform:
            image = self.transform(image)

        boxes = torch.as_tensor(annotation, dtype=torch.float32)
        labels = torch.ones((len(annotation),), dtype=torch.int64)  

        return image, {"boxes": boxes, "labels": labels}



In [None]:
 # Test the dataset
dataset_path = "dataset"  
split = "train"  
thermal_transform = transforms.Compose([
    transforms.Resize((800, 800)),
    transforms.RandomHorizontalFlip(0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

thermal_dataset = CustomDataset(dataset_path, split, thermal_transform)
print(f"Number of images in the dataset: {len(thermal_dataset)}")
image, target = thermal_dataset[0]
print(f"Image shape: {image.shape}")
print(f"Target boxes: {target['boxes']}")
print(f"Target labels: {target['labels']}")

In [None]:
# Cell 3: Dataset Preprocessing
def preprocess_dataset(dataset):
    preprocessed_images = []
    preprocessed_annotations = []
    
    for image, target in dataset:
        if isinstance(image, torch.Tensor):
            image = transforms.ToPILImage()(image)
        else:
            image = Image.fromarray(image)
        
        image = thermal_transform(image) 
        
        boxes = target['boxes']
        labels = target['labels']
        
        _, height, width = image.shape
        boxes[:, [0, 2]] /= width
        boxes[:, [1, 3]] /= height
        
        target = {'boxes': boxes, 'labels': labels}
        
        preprocessed_images.append(image)
        preprocessed_annotations.append(target)
    
    return preprocessed_images, preprocessed_annotations

# Test the preprocessing function
preprocessed_images, preprocessed_annotations = preprocess_dataset(thermal_dataset)
print(f"Number of preprocessed images: {len(preprocessed_images)}")
print(f"Number of preprocessed annotations: {len(preprocessed_annotations)}")
print(f"Preprocessed image shape: {preprocessed_images[0].shape}")
print(f"Preprocessed annotation boxes shape: {preprocessed_annotations[0]['boxes'].shape}")
print(f"Preprocessed annotation labels shape: {preprocessed_annotations[0]['labels'].shape}")

In [None]:
# Cell 4: Dataset and DataLoader Creation
dataset_path = "dataset"
split = "train"

thermal_transform = transforms.Compose([
    transforms.Resize((800, 800)),
    transforms.RandomHorizontalFlip(0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

thermal_dataset = CustomDataset(dataset_path, split, thermal_transform)

class_labels = set()
for _, annotation in thermal_dataset:
    for obj in annotation:
        name = obj[0]
        class_labels.add("vehicle")  

class_to_idx = {"vehicle": 0}  
print("Class labels:", class_to_idx)

preprocessed_thermal_images, preprocessed_thermal_annotations = preprocess_dataset(thermal_dataset)

def collate_fn(batch):
    images = [item[0] for item in batch]
    targets = [item[1] for item in batch]
    
    images = torch.stack(images, dim=0)
    
    return images, targets

train_thermal_dataset = list(zip(preprocessed_thermal_images, preprocessed_thermal_annotations))
train_thermal_loader = DataLoader(train_thermal_dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)

for images, targets in train_thermal_loader:
    print(f"Batch images shape: {images.shape}")
    print(f"Batch targets boxes shape: {targets[0]['boxes'].shape}")
    print(f"Batch targets labels shape: {targets[0]['labels'].shape}")
    break

In [None]:
# Cell 5: Model Definition and Training
num_classes = len(class_to_idx) + 1

thermal_model = fasterrcnn_resnet50_fpn(weights="DEFAULT")
in_features = thermal_model.roi_heads.box_predictor.cls_score.in_features
thermal_model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
thermal_model.to(device)

thermal_optimizer = torch.optim.SGD(thermal_model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)

num_epochs = 10

for epoch in range(num_epochs):
    thermal_model.train()
    
    thermal_epoch_loss = 0.0
    
    for thermal_images, thermal_targets in train_thermal_loader:
        thermal_images = list(image.to(device) for image in thermal_images)
        thermal_targets = [{k: v.to(device) for k, v in t.items()} for t in thermal_targets]
        
        thermal_loss_dict = thermal_model(thermal_images, thermal_targets)
        thermal_losses = sum(loss for loss in thermal_loss_dict.values())
        
        thermal_optimizer.zero_grad()
        thermal_losses.backward()
        thermal_optimizer.step()
        
        thermal_epoch_loss += thermal_losses.item()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Thermal Loss: {thermal_epoch_loss/len(train_thermal_loader):.4f}")

torch.save(thermal_model.state_dict(), "thermal_trained_model.pth")

thermal_model.eval()
with torch.no_grad():
    for images, targets in train_thermal_loader:
        images = list(image.to(device) for image in images)
        outputs = thermal_model(images)
        
        for i in range(len(images)):
            boxes = outputs[i]['boxes'].cpu().numpy()
            labels = outputs[i]['labels'].cpu().numpy()
            scores = outputs[i]['scores'].cpu().numpy()
            
            print(f"Image {i+1} - Boxes: {boxes}, Labels: {labels}, Scores: {scores}")
        
        break

In [None]:
# Cell 6: Evaluation and Testing
def evaluate_model(model, dataloader, device):
    model.eval()
    
    all_predictions = []
    all_targets = []
    
    with torch.no_grad():
        for images, targets in dataloader:
            images = list(image.to(device) for image in images)
            outputs = model(images)
            
            for output in outputs:
                boxes = output['boxes'].cpu().numpy()
                labels = output['labels'].cpu().numpy()
                scores = output['scores'].cpu().numpy()
                
                indices = torchvision.ops.nms(torch.tensor(boxes), torch.tensor(scores), iou_threshold=0.5)
                
                filtered_boxes = boxes[indices]
                filtered_labels = labels[indices]
                filtered_scores = scores[indices]
                
                all_predictions.append((filtered_boxes, filtered_labels, filtered_scores))
            
            for target in targets:
                boxes = target['boxes'].cpu().numpy()
                labels = target['labels'].cpu().numpy()
                
                all_targets.append((boxes, labels))
    
    return all_predictions, all_targets

test_split = "test"
test_thermal_dataset = CustomDataset(dataset_path, test_split, thermal_transform)
test_thermal_loader = DataLoader(test_thermal_dataset, batch_size=1, shuffle=False, collate_fn=collate_fn)

test_predictions, test_targets = evaluate_model(thermal_model, test_thermal_loader, device)
print(f"Number of test predictions: {len(test_predictions)}")
print(f"Number of test targets: {len(test_targets)}")
print(f"Test prediction boxes shape: {test_predictions[0][0].shape}")
print(f"Test prediction labels shape: {test_predictions[0][1].shape}")
print(f"Test prediction scores shape: {test_predictions[0][2].shape}")

In [None]:
# Cell 7: Load the trained model
thermal_model.load_state_dict(torch.load("thermal_trained_model.pth"))
thermal_model.eval()

In [None]:
# Cell 8: Prepare the test dataset
test_split = "test"
test_thermal_dataset = CustomDataset(dataset_path, test_split, thermal_transform)
test_thermal_loader = DataLoader(test_thermal_dataset, batch_size=1, shuffle=False, collate_fn=collate_fn)

In [None]:
# Cell 9: Evaluate the model on the test dataset
test_predictions, test_targets = evaluate_model(thermal_model, test_thermal_loader, device)

unique_labels = np.unique(labels)
print("Unique labels:", unique_labels)
print(f"Boxes: {boxes}")
print(f"Labels: {labels}")
print(f"Scores: {scores}")

In [None]:
# Cell 10: Visualize the object detection results
def visualize_detections(image, boxes, labels, scores, class_labels, confidence_threshold=0.3):
    image_with_detections = image.copy()
    
    height, width, _ = image.shape
    
    if not isinstance(boxes, (list, np.ndarray)):
        boxes = [boxes]
    
    if not isinstance(labels, (list, np.ndarray)):
        labels = [labels]
    
    if not isinstance(scores, (list, np.ndarray)):
        scores = [scores]
    
    for box, label, score in zip(boxes, labels, scores):
        if score >= confidence_threshold:
            if isinstance(box, (list, np.ndarray)):
                xmin, ymin, xmax, ymax = box
            else:
                xmin, ymin, xmax, ymax = box, box, box, box  
            xmin = int(xmin * width)
            ymin = int(ymin * height)
            xmax = int(xmax * width)
            ymax = int(ymax * height)
            
            class_name = class_labels[int(label)]  
            
            cv2.rectangle(image_with_detections, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
            cv2.putText(image_with_detections, f"{class_name}: {score:.2f}", (xmin, ymin - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
    
    return image_with_detections

num_visualizations = 5
class_labels = {1: "vehicle"}

for i in range(num_visualizations):
    image_path = test_thermal_dataset.images[i]
    image = cv2.imread(image_path)  
    
    boxes, labels, scores = test_predictions[i]
    
    print(f"Boxes: {boxes}")
    print(f"Labels: {labels}")
    print(f"Scores: {scores}")
    
    image_with_detections = visualize_detections(image, boxes, labels, scores, class_labels)
    
    cv2.imshow(f"Thermal Object Detection - Image {i+1}", image_with_detections)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

In [None]:
# Cell 11: Image Classification Dataset
class ImageClassificationDataset(Dataset):
    def __init__(self, image_paths, transform=None):
        self.image_paths = image_paths
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, index):
        image_path = self.image_paths[index]
        image = Image.open(image_path).convert("RGB")
        
        if self.transform:
            image = self.transform(image)
        
        if "smallCar" in image_path:
            label = 0
        elif "largeCar" in image_path:
            label = 1
        elif "lightTruck" in image_path:
            label = 2
        elif "heavyTruck" in image_path:
            label = 3
        else:
            label = -1  
        
        return image, label


dataset = ImageClassificationDataset(vehicle_image_paths, transform=classification_transform)
image, label = dataset[0]
print("Image shape:", image.shape)
print("Label:", label)

In [None]:
# Cell 12: Image Classification Dataset Creation
classification_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

vehicle_image_paths = ["vehicle_images/" + img for img in os.listdir("vehicle_images")]

classification_dataset = ImageClassificationDataset(vehicle_image_paths, transform=classification_transform)
classification_dataloader = DataLoader(classification_dataset, batch_size=32, shuffle=True)


for images, labels in classification_dataloader:
    print("Batch shape:", images.shape)
    print("Labels:", labels)
    break

In [None]:
# Cell 13: Image Classification Model
class_names = ["car", "truck", "bus", "motorcycle"]
num_classes = len(class_names)

classification_model = models.resnet18(pretrained=True)
num_features = classification_model.fc.in_features
classification_model.fc = nn.Linear(num_features, num_classes)

classification_model.to(device)

print("Model architecture:")
print(classification_model)

In [None]:
# Cell 14: Image Classification Training
classification_criterion = nn.CrossEntropyLoss()
classification_optimizer = torch.optim.Adam(classification_model.parameters(), lr=0.001)

num_epochs = 10

for epoch in range(num_epochs):
    classification_model.train()
    
    for images, labels in classification_dataloader:
        images = images.to(device)
        labels = labels.to(device)
        
        outputs = classification_model(images)
        loss = classification_criterion(outputs, labels)
        
        classification_optimizer.zero_grad()
        loss.backward()
        classification_optimizer.step()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Classification Loss: {loss.item():.4f}")

torch.save(classification_model.state_dict(), "classification_model.pth")

classification_model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in classification_dataloader:
        images = images.to(device)
        labels = labels.to(device)
        
        outputs = classification_model(images)
        _, predicted = torch.max(outputs.data, 1)
        
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f"Classification Accuracy: {accuracy:.2f}%")