In [1]:
import os
import json
import xml.etree.ElementTree as ET

In [2]:
# Path to the directory containing the XML annotations
xml_dir = r"C:\Users\gokul\Desktop\CSIS_SS23\F_RCNN_dataset\train\labels"

# Output file path for the COCO annotations
output_file = r'C:\Users\gokul\Desktop\CSIS_SS23\F_RCNN_dataset\train\train_annotations.json'

# Initialize the COCO annotations dictionary
coco_annotations = {
    'images': [],
    'annotations': [],
    'categories': []
}

# Define your class labels and their corresponding IDs
class_labels = {'window': 1}

# Iterate over the XML files
for filename in os.listdir(xml_dir):
    if filename.endswith('.xml'):
        xml_path = os.path.join(xml_dir, filename)

        # Parse the XML file
        tree = ET.parse(xml_path)
        root = tree.getroot()

        # Extract image-level information
        image_id = len(coco_annotations['images']) + 1
        image_width = int(root.find('size/width').text)
        image_height = int(root.find('size/height').text)
        image_file_name = root.find('filename').text

        # Create image entry
        image_entry = {
            'id': image_id,
            'width': image_width,
            'height': image_height,
            'file_name': image_file_name
        }

        # Append the image entry to the COCO annotations
        coco_annotations['images'].append(image_entry)

        # Extract object-level information
        for obj in root.findall('object'):
            class_label = obj.find('name').text

            # Skip objects that are not in your class labels
            if class_label not in class_labels:
                continue

            class_id = class_labels[class_label]

            bbox = obj.find('bndbox')
            xmin = int(bbox.find('xmin').text)
            ymin = int(bbox.find('ymin').text)
            xmax = int(bbox.find('xmax').text)
            ymax = int(bbox.find('ymax').text)

            # Create annotation entry
            annotation_entry = {
                'id': len(coco_annotations['annotations']) + 1,
                'image_id': image_id,
                'category_id': class_id,
                'bbox': [xmin, ymin, xmax - xmin, ymax - ymin],
                'area': (xmax - xmin) * (ymax - ymin),
                'iscrowd': 0
            }

            # Append the annotation entry to the COCO annotations
            coco_annotations['annotations'].append(annotation_entry)

# Create category entries
for class_label, class_id in class_labels.items():
    category_entry = {
        'id': class_id,
        'name': class_label,
        'supercategory': 'object'
    }
    coco_annotations['categories'].append(category_entry)

# Save the COCO annotations to a JSON file
with open(output_file, 'w') as f:
    json.dump(coco_annotations, f)


In [2]:
#Updated for normalisation
import os
import xml.etree.ElementTree as ET
import json

# Path to the directory containing the XML annotations
xml_dir = r"C:\Users\gokul\Desktop\CSIS_SS23\F_RCNN_dataset\val\labels"

# Output file path for the COCO annotations
output_file = r'C:\Users\gokul\Desktop\CSIS_SS23\F_RCNN_dataset\val\val_annotations.json'

# Initialize the COCO annotations dictionary
coco_annotations = {
    'images': [],
    'annotations': [],
    'categories': []
}

# Define your class labels and their corresponding IDs
class_labels = {'window': 1}

# Iterate over the XML files
for filename in os.listdir(xml_dir):
    if filename.endswith('.xml'):
        xml_path = os.path.join(xml_dir, filename)

        # Parse the XML file
        tree = ET.parse(xml_path)
        root = tree.getroot()

        # Extract image-level information
        image_id = len(coco_annotations['images']) + 1
        image_width = int(root.find('size/width').text)
        image_height = int(root.find('size/height').text)
        image_file_name = root.find('filename').text

        # Create image entry
        image_entry = {
            'id': image_id,
            'width': image_width,
            'height': image_height,
            'file_name': image_file_name
        }

        # Append the image entry to the COCO annotations
        coco_annotations['images'].append(image_entry)

        # Extract object-level information
        for obj in root.findall('object'):
            class_label = obj.find('name').text

            # Skip objects that are not in your class labels
            if class_label not in class_labels:
                continue

            class_id = class_labels[class_label]

            bbox = obj.find('bndbox')
            xmin = int(bbox.find('xmin').text) / image_width
            ymin = int(bbox.find('ymin').text) / image_height
            xmax = int(bbox.find('xmax').text) / image_width
            ymax = int(bbox.find('ymax').text) / image_height

            # Create annotation entry
            annotation_entry = {
                'id': len(coco_annotations['annotations']) + 1,
                'image_id': image_id,
                'category_id': class_id,
                'bbox': [xmin, ymin, xmax - xmin, ymax - ymin],
                'area': (xmax - xmin) * (ymax - ymin),
                'iscrowd': 0
            }

            # Append the annotation entry to the COCO annotations
            coco_annotations['annotations'].append(annotation_entry)

# Create category entries
for class_label, class_id in class_labels.items():
    category_entry = {
        'id': class_id,
        'name': class_label,
        'supercategory': 'object'
    }
    coco_annotations['categories'].append(category_entry)

# Save the COCO annotations to a JSON file
with open(output_file, 'w') as f:
    json.dump(coco_annotations, f)


In [5]:
import torch
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.transforms import functional as F
from torch.utils.data import DataLoader
from torchvision.datasets import CocoDetection
from torch.optim import SGD
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import Dataset
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.transforms import Compose, ToTensor, Normalize

# Check if GPU is available and set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

# Path to the training dataset annotations in COCO format
train_annotations_path = r"C:\Users\gokul\Desktop\CSIS_SS23\F_RCNN_dataset\train\train_annotations.json"

# Path to the validation dataset annotations in COCO format
val_annotations_path = r"C:\Users\gokul\Desktop\CSIS_SS23\F_RCNN_dataset\val\val_annotations.json"

# Path to the root directory of the training images
train_image_root = r"C:\Users\gokul\Desktop\CSIS_SS23\F_RCNN_dataset\train\images"

# Path to the root directory of the validation images
val_image_root = r"C:\Users\gokul\Desktop\CSIS_SS23\F_RCNN_dataset\val\images"

# Define the number of classes (including the background class)
num_classes = 2

# Define the batch size for training
batch_size = 3

cuda


In [None]:
class CustomDataset(Dataset):
    def __init__(self, image_root, annotations_path, transforms=None):
        self.image_root = image_root
        self.annotations_path = annotations_path
        self.transforms = transforms
        self.annotations = self.load_annotations()

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        image_path = self.annotations[idx]['image_path']
        targets = self.annotations[idx]['targets']

        # Load image
        image = Image.open(image_path).convert("RGB")

        # Apply any necessary transformations
        image = self.transforms(image)

        return image, targets

    def load_annotations(self):
        with open(self.annotations_path, 'r') as f:
            annotations = json.load(f)

        # Process annotations and return the dataset
        dataset = []
        for annotation in annotations['annotations']:
            image_filename = str(annotation['image_id'])
            image_path = self.image_root + image_filename
            targets = annotation['category_id']  # Modify the key based on your JSON structure
            dataset.append({'image_path': image_path, 'targets': targets})

        return dataset

transform = Compose([ToTensor(), Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
# Create the training dataset instance
train_dataset = CustomDataset(train_image_root, train_annotations_path, transforms=transform)

# Create the validation dataset instance
val_dataset = CustomDataset(val_image_root, val_annotations_path, transforms=transform)

# Define the data loaders for training and validation
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=8, collate_fn=lambda x: tuple(zip(*x)))
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=8, collate_fn=lambda x: tuple(zip(*x)))

# Load the pre-trained Faster R-CNN model
model = fasterrcnn_resnet50_fpn(weights='FasterRCNN_ResNet50_FPN_Weights.DEFAULT')

# Replace the classifier with a new one
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

# Move the model to the GPU
model.to(device)

# Set the model to training mode
model.train()

# Define the optimizer and learning rate scheduler
optimizer = SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = StepLR(optimizer, step_size=3, gamma=0.1)
# Define the number of training epochs
num_epochs = 10

train_iterator = iter(train_loader)
batch_images, batch_targets = next(train_iterator)

train_iterator = iter(train_loader)
batch_images, batch_targets = next(train_iterator)

# Print the shape of the first few images and targets in the batch
num_samples = min(5, len(batch_images))  # Adjust the number of samples to display
for i in range(num_samples):
    print(f"Image Shape: {batch_images[i].shape}")
    print(f"Targets: {batch_targets[i]}")
    print()

In [None]:
# Training loop
for epoch in range(num_epochs):
    # Print current epoch
    print(f"Epoch [{epoch+1}/{num_epochs}]")
    
    # Training
    for batch_idx, (images, targets) in enumerate(train_loader):
        print('Entered into this loop')
        # Move the images and targets to the GPU
        images = [F.to_tensor(img).to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        
        # Forward pass
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        
        # Backward pass and optimization
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        # Print training status
        print(f"Batch [{batch_idx+1}/{len(train_loader)}], Loss: {losses.item():.4f}")

    # Update the learning rate
    lr_scheduler.step()

    # Validation
    with torch.no_grad():
        for batch_idx, (images, targets) in enumerate(val_loader):
            # Move the images and targets to the GPU
            images = [F.to_tensor(img).to(device) for img in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            
            # Forward pass
            model(images, targets)

            # Print validation status
            print(f"Validation Batch [{batch_idx+1}/{len(val_loader)}]")

        # Perform any necessary evaluation or logging for validation
    
    # Save the model checkpoint
    torch.save(model.state_dict(), f'faster_rcnn_epoch_{epoch + 1}.pth')


Epoch [1/10]
