In [None]:
# Import Libraries
import os
import cv2
import torch
import numpy as np
import matplotlib.pyplot as plt
import json 
import pandas as pd
from pathlib import Path
from torch import nn, optim
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image


# Crop the images

### Crop function

In [None]:
def crop_out_background(images_dir, labels_dir, output_dir):
    """
    Process all images in a directory and crop out the background using bounding boxes.

    Parameters:
        images_dir (str): Directory containing the images.
        labels_dir (str): Directory containing the bounding box labels in text format.
        output_dir (str): Directory to save the cropped images.
    """
    os.makedirs(output_dir, exist_ok=True)

    # Loop through all image files in the images_dir
    for image_path in Path(images_dir).glob("*.jpg"):
        # Get the corresponding label file
        label_path = Path(labels_dir) / f"{image_path.stem}.txt"

        # Check if the label file exists
        if not label_path.exists():
            print(f"No label found for {image_path.name}. Skipping.")
            continue

        # Load the image
        image = cv2.imread(str(image_path))
        if image is None:
            print(f"Could not read the image at {image_path}. Skipping.")
            continue

        # Get image dimensions
        img_height, img_width = image.shape[:2]

        # Create a blank mask with the same dimensions as the image
        mask = np.zeros(image.shape[:2], dtype=np.uint8)

        # Read the bounding boxes from the label file
        bounding_boxes = []
        with open(label_path, 'r') as file:
            for line in file:
                class_id, x_center, y_center, width, height = map(float, line.strip().split())
                bounding_boxes.append([x_center, y_center, width, height])

        # Draw white rectangles for each bounding box on the mask
        for (x_center, y_center, width, height) in bounding_boxes:
            # Convert normalized coordinates to pixel values
            x = int((x_center - width / 2) * img_width)
            y = int((y_center - height / 2) * img_height)
            w = int(width * img_width)
            h = int(height * img_height)

            # Draw the rectangle on the mask
            cv2.rectangle(mask, (x, y), (x + w, y + h), 255, thickness=-1)

        # Apply the mask to the image
        result = cv2.bitwise_and(image, image, mask=mask)

        # Save the result to the output directory
        output_path = Path(output_dir) / image_path.name
        cv2.imwrite(str(output_path), result)

### Function call

In [None]:
images_dir = '../data/with_labels/images'      # Directory with images
labels_dir = '../data/processed/labels'      # Directory with bounding box labels (text files)
output_dir = '../data/with_labels/images_cropped'     # Directory to save cropped images

crop_out_background(images_dir, labels_dir, output_dir)

# Flip orientation model 

In [None]:
# Dataset Class for Orientation Classification
class OrientationDataset(Dataset):
    def __init__(self, csv_file, images_dir, transform=None):
        self.labels = pd.read_csv(csv_file)
        self.images_dir = images_dir
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        image_path = os.path.join(self.images_dir, self.labels.iloc[idx, 0])
        label = 0 if self.labels.iloc[idx, 1] == "left" else 1  # 0 = left, 1 = right
        image = Image.open(image_path)

        if self.transform:
            image = self.transform(image)

        return image, label


In [None]:
# Define Transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


## Training the model

In [None]:
# Training Function
def train_orientation_classifier(train_csv, train_images_dir, num_epochs=10, checkpoint_path="orientation_classifier_checkpoint.pth"):
    # Dataset and DataLoader
    train_dataset = OrientationDataset(train_csv, train_images_dir, transform=transform)
    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)

    # Model
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = models.resnet18(pretrained=True)
    model.fc = nn.Linear(model.fc.in_features, 2)  # Binary classification
    model = model.to(device)

    # Loss and Optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Training Loop
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss:.4f}")

    # Save Checkpoint
    save_checkpoint(model, optimizer, num_epochs, path=checkpoint_path)
    print("Training complete. Model checkpoint saved.")
    return model


# Save Checkpoint Function
def save_checkpoint(model, optimizer, epoch, path):
    checkpoint = {
        "epoch": epoch,
        "model_state_dict": model.state_dict(),
        "optimizer_state_dict": optimizer.state_dict(),
    }
    torch.save(checkpoint, path)
    print(f"Checkpoint saved to {path}")


In [None]:
# Paths for Training Data (Kaggle paths)
train_csv = "data/manual_labeling/labels.csv"
train_images_dir = "data/manual_labeling/images"
checkpoint_path = "orientation_classifier_checkpoint.pth"

# Train the Model
model = train_orientation_classifier(train_csv, train_images_dir, num_epochs=10, checkpoint_path=checkpoint_path)


## Load the checkpoint of the model

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

def load_checkpoint(path, model, optimizer=None):
    """
    Load a model checkpoint and map it to the appropriate device (CPU or GPU).

    Parameters:
        path (str): Path to the checkpoint file.
        model (torch.nn.Module): Model to load the weights into.
        optimizer (torch.optim.Optimizer, optional): Optimizer to load the state.
    """
    checkpoint = torch.load(path, map_location=device)  # Ensure mapping to the correct device
    model.load_state_dict(checkpoint["model_state_dict"])
    if optimizer:
        optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
    print(f"Checkpoint loaded from {path}")
    return checkpoint.get("epoch", None)

# Example: Loading the trained model
model = models.resnet18(pretrained=False)
model.fc = nn.Linear(model.fc.in_features, 2)  # Adjust for binary classification
model = model.to(device)

checkpoint_path = "../checkpoints/orientation_classifier_checkpoint.pth"
load_checkpoint(checkpoint_path, model)
model.eval()


In [None]:
def predict_and_flip_images_with_labels(model, images_dir, labels_dir, output_image_dir, output_label_dir, desired_orientation="left"):
    """
    Predict the orientation of images, flip them if necessary, and adjust the corresponding labels.

    Parameters:
        model (torch.nn.Module): Trained orientation model.
        images_dir (str): Directory containing input images.
        labels_dir (str): Directory containing corresponding JSON annotations (LabelMe format).
        output_image_dir (str): Directory to save flipped images.
        output_label_dir (str): Directory to save updated labels.
        desired_orientation (str): Desired car orientation ("left" or "right").
    """
    os.makedirs(output_image_dir, exist_ok=True)
    os.makedirs(output_label_dir, exist_ok=True)
    predictions = []

    model.eval()
    with torch.no_grad():
        for image_path in Path(images_dir).glob("*.jpg"):
            image = Image.open(image_path)
            image_tensor = transform(image).unsqueeze(0).to(device)

            # Predict orientation
            output = model(image_tensor)
            _, predicted = torch.max(output, 1)
            label = "left" if predicted.item() == 0 else "right"

            # Flip image if needed
            original_image = cv2.imread(str(image_path))
            h, w = original_image.shape[:2]  # Get image dimensions
            if label != desired_orientation:
                flipped_image = cv2.flip(original_image, 1)  # Flip horizontally
            else:
                flipped_image = original_image

            # Save flipped (or unchanged) image
            output_image_path = Path(output_image_dir) / image_path.name
            cv2.imwrite(str(output_image_path), flipped_image)

            # Update labels if available
            label_path = Path(labels_dir) / f"{image_path.stem}.json"
            if label_path.exists():
                with open(label_path, "r") as f:
                    label_data = json.load(f)

                if label != desired_orientation:
                    # Flip label points horizontally
                    for shape in label_data["shapes"]:
                        for point in shape["points"]:
                            point[0] = w - point[0]  # Adjust x-coordinate for flipping

                # Save updated label
                output_label_path = Path(output_label_dir) / label_path.name
                with open(output_label_path, "w") as f:
                    json.dump(label_data, f, indent=4)

            print(f"Processed: {image_path.name}")

            # Save prediction for reference
            predictions.append({"image": image_path.name, "label": label})

    # Save predictions to CSV
    pd.DataFrame(predictions).to_csv(Path(output_image_dir) / "predictions.csv", index=False)
    print(f"Predictions, flipped images, and updated labels saved in {output_image_dir} and {output_label_dir}.")


In [None]:
desired_orientation = "left"

# Directory containing all JSON labels
labels_dir = "../data/with_labels/json" 

'''
# Training images
training_images_dir = "../data/split/train/images"
training_flipped_output_dir = "../data/split/train/images_flipped"
training_labels_flipped_output_dir = "../data/split/train/labels_flipped"
predict_and_flip_images_with_labels(
    model,
    training_images_dir,
    labels_dir,
    training_flipped_output_dir,
    training_labels_flipped_output_dir,
    desired_orientation
)

# Validation images
validation_images_dir = "../data/split/val/images"
validation_flipped_output_dir = "../data/split/val/images_flipped"
validation_labels_flipped_output_dir = "../data/split/val/labels_flipped"
predict_and_flip_images_with_labels(
    model,
    validation_images_dir,
    labels_dir,
    validation_flipped_output_dir,
    validation_labels_flipped_output_dir,
    desired_orientation
)

# Test images
test_images_dir = "../data/split/test/images"
test_flipped_output_dir = "../data/split/test/images_flipped"
test_labels_flipped_output_dir = "../data/split/test/labels_flipped"
predict_and_flip_images_with_labels(
    model,
    test_images_dir,
    labels_dir,
    test_flipped_output_dir,
    test_labels_flipped_output_dir,
    desired_orientation
)
'''

images_dir = "../data/with_labels/images_cropped"
output_image_dir = "../data/with_labels/images_flipped"
output_label_dir = "../data/with_labels/json_flipped"
predict_and_flip_images_with_labels(
    model,
    images_dir,
    labels_dir,
    output_image_dir,
    output_label_dir,
    desired_orientation
)
