# Load data

In [4]:
import numpy as np
from PIL import Image
from tqdm import tqdm


# Helper function to parse the tripod sequence file
def parse_tripod_seq_file(file_path):
    with open(file_path, 'r') as file:
        lines = file.read().splitlines()
        image_dims = list(map(int, lines[0].split()))
        num_frames = list(map(int, lines[1].split()))
        frames_360 = list(map(int, lines[4].split()))
        frontal_frames = list(map(int, lines[5].split()))
        rotation_sense = list(map(int, lines[6].split()))
    return image_dims, num_frames, frames_360, frontal_frames, rotation_sense


# Function to load and resize an image using PIL
def load_and_resize_image(filename, img_height, img_width):
    # Open the image file
    img = Image.open(filename)
    # Resize the image
    img = img.resize((img_width, img_height))
    # Convert the image to a numpy array
    img_array = np.array(img)
    return img_array


# Function to load and preprocess image and bbox data
def load_and_preprocess_data(base_path, sequence_ids, img_width, img_height,
                             frames_per_seq, frames_360, frontal_frames, rotation_sense):
    data = []
    labels = []
    bboxes = []

    for i, seq_id in enumerate(tqdm(sequence_ids, desc='Loading sequences')):
        num_frames = frames_per_seq[i]
        num_frames_360 = frames_360[i]
        frontal_frame = frontal_frames[i]
        sense = rotation_sense[i]
        bbox_path = f"{base_path}/bbox_{seq_id:02d}.txt"
        bbox_data = np.loadtxt(bbox_path, delimiter=' ')

        # for frame_id in tqdm(range(1, num_frames + 1), desc=f'Processing seq {seq_id}', leave=False):
        for frame_id in range(1, num_frames + 1):
            filename = f"{base_path}/tripod_seq_{seq_id:02d}_{frame_id:03d}.jpg"

            img = load_and_resize_image(filename, img_height, img_width)

            # img /= 255.0  # Normalize to [0, 1]

            relative_position = (frame_id - frontal_frame) % num_frames_360
            rotation_angle = relative_position * (360 / num_frames_360) * sense

            data.append(img)
            labels.append(rotation_angle)
            bboxes.append(bbox_data[frame_id - 1])  # Add bbox data

    return np.array(data), np.array(labels), np.array(bboxes)


def map_angle_to_0_360(angle):
    return angle % 360


def load_data():
    file_path = r'./data/epfl-gims08/tripod-seq/tripod-seq.txt'
    base_path = r'./data/epfl-gims08/tripod-seq'
    train_sequence_ids = list(range(1, 11))  # Sequences 1-10 for training
    test_sequence_ids = list(range(11, 21))  # Sequences 11-20 for testing

    image_dims, num_frames, frames_360, frontal_frames, rotation_sense = parse_tripod_seq_file(file_path)
    img_width, img_height = image_dims[1], image_dims[2]

    # Load data
    train_images, train_labels, train_bboxes = load_and_preprocess_data(
        base_path, train_sequence_ids, img_width, img_height, num_frames[:10], frames_360[:10], frontal_frames[:10],
        rotation_sense[:10])
    test_images, test_labels, test_bboxes = load_and_preprocess_data(
        base_path, test_sequence_ids, img_width, img_height, num_frames[10:], frames_360[10:], frontal_frames[10:],
        rotation_sense[10:])
    
    train_labels = [map_angle_to_0_360(angle) for angle in train_labels]
    test_labels = [map_angle_to_0_360(angle) for angle in test_labels]

    return train_images, train_labels, train_bboxes, test_images, test_labels, test_bboxes


# Train

In [5]:
import argparse
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
import torchvision.models as models
from torchvision import transforms

import load_data


class VehicleDataset(Dataset):
    def __init__(self, images, labels, bboxes):
        self.images = images
        self.labels = labels
        self.bboxes = bboxes
        self.transforms = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = Image.fromarray(self.images[idx])
        label = self.labels[idx]
        bbox = self.bboxes[idx]
        cropped_image = image.crop((bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]))
        image_tensor = self.transforms(cropped_image)
        return image_tensor, torch.tensor([label])


class AnglePredictor(nn.Module):
    def __init__(self):
        super(AnglePredictor, self).__init__()
        self.resnet_model = models.resnet50(pretrained=True)

        # Freeze all layers first
        for param in self.resnet_model.parameters():
            param.requires_grad = False

        # Unfreeze the last 15 layers
        num_layers = len(list(self.resnet_model.children()))
        layers_to_unfreeze = list(self.resnet_model.children())[num_layers - 15:]
        for layer in layers_to_unfreeze:
            for param in layer.parameters():
                param.requires_grad = True

        # Modify the ResNet model to not include the final fully connected layer
        self.features = nn.Sequential(*list(self.resnet_model.children())[:-1])

        # Expanded regression head
        self.regression_head = nn.Sequential(
            nn.Flatten(),
            nn.Linear(2048, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, 1)  # Outputting a single value for angle
        )

    def forward(self, pixel_values):
        features = self.features(pixel_values)
        angle = self.regression_head(features)
        return angle


def get_data_loaders(train_images, train_labels, train_bboxes, test_images, test_labels, test_bboxes, batch_size=4):
    train_dataset = VehicleDataset(train_images, train_labels, train_bboxes)
    test_dataset = VehicleDataset(test_images, test_labels, test_bboxes)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    return train_loader, test_loader


def evaluate(model, test_loader, device):
    model.eval()
    criterion = nn.L1Loss()
    total_loss = 0.0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels.float())
            total_loss += loss.item()
    average_loss = total_loss / len(test_loader)
    print(f"Test Loss: {average_loss}")
    return average_loss


def train_model(model, train_loader, test_loader, num_epochs=10):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.MSELoss()

    for epoch in tqdm(range(num_epochs)):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels.float())
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        print(f"Epoch {epoch + 1}, Loss: {running_loss / len(train_loader)}")
        evaluate(model, test_loader, device)


def main():
    # parser = argparse.ArgumentParser()
    # parser.add_argument("--bz", default=4, type=int, help="batch size")
    # parser.add_argument("--epoch", default=100, type=int)
    #
    # args = parser.parse_args()
    # print(args)

    bz = 4
    epoch = 30

    train_images, train_labels, train_bboxes, test_images, test_labels, test_bboxes = load_data.load_data()
    train_loader, test_loader = get_data_loaders(
        train_images, train_labels, train_bboxes, test_images, test_labels, test_bboxes, bz,
    )

    model = AnglePredictor()
    train_model(model, train_loader, test_loader, num_epochs=epoch)

# Main

In [6]:
main()

Loading sequences: 100%|██████████| 10/10 [00:02<00:00,  4.04it/s]
Loading sequences: 100%|██████████| 10/10 [00:04<00:00,  2.22it/s]
  0%|          | 0/30 [00:00<?, ?it/s]

Epoch 1, Loss: 6974.360015713966


  3%|▎         | 1/30 [00:15<07:42, 15.95s/it]

Test Loss: 59.117274958746776
Epoch 2, Loss: 5021.028327321198


  7%|▋         | 2/30 [00:26<06:01, 12.92s/it]

Test Loss: 54.33486444268908
Epoch 3, Loss: 3477.4082488496424


 10%|█         | 3/30 [00:40<06:04, 13.50s/it]

Test Loss: 47.92814289842333
Epoch 4, Loss: 3284.282176816261


 13%|█▎        | 4/30 [00:51<05:22, 12.42s/it]

Test Loss: 51.496084986414225
Epoch 5, Loss: 2689.7687944056624


 17%|█▋        | 5/30 [01:04<05:10, 12.40s/it]

Test Loss: 47.77960257530212
Epoch 6, Loss: 2148.249236759897


 20%|██        | 6/30 [01:21<05:36, 14.02s/it]

Test Loss: 55.63437574931553
Epoch 7, Loss: 1686.4527199276422


 23%|██▎       | 7/30 [01:37<05:36, 14.65s/it]

Test Loss: 52.83587896823883
Epoch 8, Loss: 2136.594041888997


 27%|██▋       | 8/30 [01:52<05:29, 14.99s/it]

Test Loss: 46.92001065186092
Epoch 9, Loss: 2139.6221013085315


 30%|███       | 9/30 [02:09<05:23, 15.39s/it]

Test Loss: 49.78110225881849
Epoch 10, Loss: 1387.9519858408783


 33%|███▎      | 10/30 [02:26<05:18, 15.91s/it]

Test Loss: 57.80732863289969
Epoch 11, Loss: 1496.9738524873378


 37%|███▋      | 11/30 [02:42<05:04, 16.03s/it]

Test Loss: 48.60942623274667
Epoch 12, Loss: 933.9934881622509


 40%|████      | 12/30 [02:59<04:50, 16.17s/it]

Test Loss: 50.19071375642504
Epoch 13, Loss: 875.7337358684863


 43%|████▎     | 13/30 [03:15<04:36, 16.28s/it]

Test Loss: 46.5194636123521
Epoch 14, Loss: 884.1041913889222


 47%|████▋     | 14/30 [03:29<04:10, 15.66s/it]

Test Loss: 44.59114382352148
Epoch 15, Loss: 798.3635876348463


 50%|█████     | 15/30 [03:41<03:36, 14.44s/it]

Test Loss: 47.65998021619661
Epoch 16, Loss: 856.9130673036736


 53%|█████▎    | 16/30 [03:57<03:29, 15.00s/it]

Test Loss: 49.56666464975902
Epoch 17, Loss: 783.6413563291906


 57%|█████▋    | 17/30 [04:10<03:08, 14.48s/it]

Test Loss: 45.35184080260141
Epoch 18, Loss: 680.1430963807187


 60%|██████    | 18/30 [04:21<02:40, 13.40s/it]

Test Loss: 45.15794289963586
Epoch 19, Loss: 871.705000977597


 63%|██████▎   | 19/30 [04:33<02:20, 12.77s/it]

Test Loss: 47.27036942243576
Epoch 20, Loss: 854.5469924215543


 67%|██████▋   | 20/30 [04:48<02:13, 13.40s/it]

Test Loss: 47.091832340615134
Epoch 21, Loss: 865.4591229844901


 70%|███████   | 21/30 [04:58<01:53, 12.62s/it]

Test Loss: 46.33852631705148
Epoch 22, Loss: 888.3969518887794


 73%|███████▎  | 22/30 [05:09<01:36, 12.07s/it]

Test Loss: 45.64767815555845
Epoch 23, Loss: 814.7963433896081


 77%|███████▋  | 23/30 [05:23<01:28, 12.59s/it]

Test Loss: 46.26126361489296
Epoch 24, Loss: 715.2617471921242


 80%|████████  | 24/30 [05:39<01:21, 13.66s/it]

Test Loss: 45.30663699422564
Epoch 25, Loss: 550.1137787867401


 83%|████████▎ | 25/30 [05:55<01:11, 14.36s/it]

Test Loss: 46.579953144277845
Epoch 26, Loss: 542.3718723782039


 87%|████████▋ | 26/30 [06:11<00:58, 14.72s/it]

Test Loss: 42.935851386615205
Epoch 27, Loss: 783.3600385504254


 90%|█████████ | 27/30 [06:27<00:45, 15.23s/it]

Test Loss: 45.63759851029941
Epoch 28, Loss: 584.7962400210106


 93%|█████████▎| 28/30 [06:45<00:31, 15.95s/it]

Test Loss: 41.27066947051457
Epoch 29, Loss: 623.5585474887137


 97%|█████████▋| 29/30 [07:01<00:15, 15.99s/it]

Test Loss: 43.14339837857655
Epoch 30, Loss: 518.1580571174621


100%|██████████| 30/30 [07:17<00:00, 14.60s/it]

Test Loss: 47.302770141192845



