Data Preprocessing

In [1]:
import os
import numpy as np
from PIL import Image

def parse_annotation(labels_path):
    objects = []
    with open(labels_path, 'r') as file:
        for line in file.readlines():
            parts = line.strip().split()
            class_id = int(parts[0])
            coordinates = list(map(float, parts[1:]))  # Convert all to float
            xs = coordinates[0::2]  # Extract all x coordinates
            ys = coordinates[1::2]  # Extract all y coordinates
            xmin, xmax = min(xs), max(xs)
            ymin, ymax = min(ys), max(ys)
            objects.append((class_id, xmin, ymin, xmax, ymax))
    return objects

# def check_annotation_example(annotation_dir):
#     example_annotation_file = next(os.path.join(annotation_dir, f) for f in os.listdir(annotation_dir) if f.endswith('.txt'))
#     with open(example_annotation_file, 'r') as file:
#         for i, line in enumerate(file):
#             print(f"Line {i + 1}: {line.strip()}")
#             if i >= 5:  # Print only the first 5 lines
#                 break
# 
# # Call this function to check the format
# annotation_dir = os.path.join(r'C:/Users/SirM/Desktop/Swayam/Intro to Deep Learning/Intro-to-Deep-Learning/Final Project/PKLot.v1-raw.yolov8-obb', 'train', 'labels')  # Adjust as necessary
# check_annotation_example(annotation_dir)

def resize_image(image, new_width, new_height, boxes):
    width, height = image.size
    resize_ratio_w = new_width / width
    resize_ratio_h = new_height / height
    image = image.resize((new_width, new_height))
    
    resized_boxes = []
    for box in boxes:
        class_id, x1, y1, x2, y2 = box
        resized_boxes.append((
            class_id,
            x1 * resize_ratio_w,
            y1 * resize_ratio_h,
            x2 * resize_ratio_w,
            y2 * resize_ratio_h
        ))
    return image, resized_boxes

def preprocess_image(image_path, annotation_path, target_size=(128, 128)):
    image = Image.open(image_path)
    boxes = parse_annotation(annotation_path)
    
    image, boxes = resize_image(image, target_size[0], target_size[1], boxes)
    image_np = np.array(image) / 255.0  # Normalize pixel values
    
    return image_np, boxes

def process_directory(data_dir, annotation_dir, target_size=(128, 128)):
    processed_data = []
    for img_filename in os.listdir(data_dir):
        if img_filename.endswith('.jpg'):
            image_path = os.path.join(data_dir, img_filename)
            annotation_path = os.path.join(annotation_dir, img_filename.replace('.jpg', '.txt'))
            processed_image, processed_boxes = preprocess_image(image_path, annotation_path, target_size)
            processed_data.append((processed_image, processed_boxes))
    return processed_data

# Example usage
dataset_base = r'C:/Users/SirM/Desktop/Swayam/Intro to Deep Learning/Intro-to-Deep-Learning/Final Project/PKLot.v1-raw.yolov8-obb'
partitions = ['train', 'valid', 'test']
target_size = (128, 128)  # Change as required by your model

all_data = {}
for part in partitions:
    print(f"Processing {part} data...")
    images_dir = os.path.join(dataset_base, part, 'images')
    annotations_dir = os.path.join(dataset_base, part, 'labels')  # Adjust if different
    all_data[part] = process_directory(images_dir, annotations_dir, target_size)
    print(f"Finished processing {len(all_data[part])} images from {part} set.")


Processing train data...
Finished processing 8691 images from train set.
Processing valid data...
Finished processing 2483 images from valid set.
Processing test data...
Finished processing 1242 images from test set.


Data Augmentation And Basic Resnet50 Model

In [9]:
import os
import torch
from torch.utils.data import Dataset
from PIL import Image
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

class PKLotDataset(Dataset):
    def __init__(self, image_dir, annotation_dir, transform=None):
        self.image_dir = image_dir
        self.annotation_dir = annotation_dir
        self.transform = transform
        self.images = [os.path.join(image_dir, img) for img in os.listdir(image_dir) if img.endswith('.jpg')]

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image_path = self.images[idx]
        annotation_path = os.path.join(self.annotation_dir, os.path.basename(image_path).replace('.jpg', '.txt'))

        image = Image.open(image_path).convert('RGB')
        if self.transform:
            image = self.transform(image)

        # Load and format your bounding boxes and labels correctly
        # Placeholder for actual annotation loading code
        boxes = torch.tensor([[0, 0, 100, 100]], dtype=torch.float32)  # Example box
        labels = torch.tensor([1], dtype=torch.int64)  # Example label

        target = {'boxes': boxes, 'labels': labels}

        return image, target
    
transform = transforms.Compose([
    transforms.Resize((128, 128)),  # Resize the image
    transforms.ToTensor()           # Convert the resized PIL Image to a tensor
])

# Create instances of the PKLotDataset
train_dataset = PKLotDataset(
    image_dir=r'C:/Users/SirM/Desktop/Swayam/Intro to Deep Learning/Intro-to-Deep-Learning/Final Project/PKLot.v1-raw.yolov8-obb/train/images',
    annotation_dir=r'C:/Users/SirM/Desktop/Swayam/Intro to Deep Learning/Intro-to-Deep-Learning/Final Project/PKLot.v1-raw.yolov8-obb/train/labels',
    transform=transforms.Compose([
        transforms.Resize((128, 128)),  # Resize the image to the desired size
        transforms.ToTensor()  # Then convert it to a PyTorch Tensor
    ]),
    target_size=(128, 128)  # You may define the target size here if different from transform
)

val_dataset = PKLotDataset(
    image_dir=r'C:/Users/SirM/Desktop/Swayam/Intro to Deep Learning/Intro-to-Deep-Learning/Final Project/PKLot.v1-raw.yolov8-obb/valid/images',
    annotation_dir=r'C:/Users/SirM/Desktop/Swayam/Intro to Deep Learning/Intro-to-Deep-Learning/Final Project/PKLot.v1-raw.yolov8-obb/valid/labels',
    transform=transforms.Compose([
        transforms.Resize((128, 128)),
        transforms.ToTensor()
    ]),
    target_size=(128, 128)
)

test_dataset = PKLotDataset(
    image_dir=r'C:/Users/SirM/Desktop/Swayam/Intro to Deep Learning/Intro-to-Deep-Learning/Final Project/PKLot.v1-raw.yolov8-obb/test/images',
    annotation_dir=r'C:/Users/SirM/Desktop/Swayam/Intro to Deep Learning/Intro-to-Deep-Learning/Final Project/PKLot.v1-raw.yolov8-obb/test/labels',
    transform=transforms.Compose([
        transforms.Resize((128, 128)),
        transforms.ToTensor()
    ]),
    target_size=(128, 128)
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hyperparameters
num_epochs = 50
learning_rate = 0.001
batch_size = 16

# Create data loaders
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False)

# Load a pre-trained Faster R-CNN model
model = fasterrcnn_resnet50_fpn(pretrained=True)
num_classes = 2  # Number of classes (e.g., empty, occupied)

# Replace the classifier with a new one
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
model.to(device)

# Loss and optimizer
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
for epoch in range(num_epochs):
    model.train()
    for images, targets in train_loader:
        images = [image.to(device) for image in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        optimizer.zero_grad()
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        losses.backward()
        optimizer.step()

# Save the model
torch.save(model.state_dict(), 'model.pth')

TypeError: PKLotDataset.__init__() got an unexpected keyword argument 'target_size'

Semantics CNN