# Training the Extended Mask2Former UAV-SOD Drone Dataset

In [None]:
# Import libraries
import pandas as pd
import os, json
import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from src.data_set_up import SOD_Data
from models.extended_mask2former_model import ExtendedMask2Former
from models.efpn_backbone.anchors import Anchors
from src.helpers import train, validate, test


# Import data paths
map_path = "src/code_map.json"
data_info_path = "src/data_info/uav_data_preprocessing.json"
base_dir = "data/uav_sod_data/"

### Set up GPU growth

In [None]:
# Set device we are going to load the model and the data
device = "mps" if torch.backends.mps.is_available() else "cpu"

### Set up basic static data

- Get the number of classes
- Get the mean and standard deviation 
- Create the data paths for the [train, test, validation]

In [None]:
# Load the classes of the UAV-SOD Drone dataset
map = open(map_path)
data = json.load(map)
classes = data["UAV_SOD_DRONE"]["CATEGORY_ID_TO_NAME"]
map.close() 

# The number of classes plus the background
number_classes = len(classes) + 1


# Load the mean and standard deviation for the train data
map = open(data_info_path)
data = json.load(map)
mean = data["uav_data"]["mean"]
standard_deviation = data["uav_data"]["std"]
map.close() 


# Define train, test and validation path
train_path = os.path.join(base_dir, "train")
test_path = os.path.join(base_dir, "test")
validation_path = os.path.join(base_dir, "validation")

### Dataset - Dataloader
- Collate function
- Data transformations
- DataLoader and Dataset

In [None]:
# Data transform function
data_transform = {
    "train": transforms.Compose([
            transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
            transforms.ToTensor(),
            transforms.Normalize(mean=mean, std=standard_deviation)]),

    "test": transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=mean, std=standard_deviation)]), 
            
    "validation": transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=mean, std=standard_deviation)]) 
}


# Dataset and DataLoader
train_dataset      = SOD_Data(train_path +"/images", train_path + "/annotations", data_transform["train"])
test_dataset       = SOD_Data(test_path + "/images", test_path  + "/annotations", data_transform["test"])
validation_dataset = SOD_Data(validation_path + "/images", validation_path + "/annotations", data_transform["validation"])

train_loader      = DataLoader(train_dataset, batch_size=1, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
test_loader       = DataLoader(test_dataset, batch_size=1, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))
validation_loader = DataLoader(validation_dataset, batch_size=1, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

Heuristics

In [None]:
bbox_stats = train_dataset.analyze_bounding_boxes()


mean_width = bbox_stats['mean_width']
mean_height = bbox_stats['mean_height']

std_width = bbox_stats['std_width']
std_height = bbox_stats['std_height']



# Print statistics
print("Aspect Ratios:", sorted(set(bbox_stats['aspect_ratios'])))
print("Mean Width:", bbox_stats['mean_width'])
print("Mean Height:", bbox_stats['mean_height'])
print("Width Std Dev:", bbox_stats['std_width'])
print("Height Std Dev:", bbox_stats['std_height'])

In [None]:
feature_map_shapes = [(150, 150), (75, 75), (38, 38), (19, 19), (10, 10)]

scales = [mean_width - std_width, mean_width, mean_width + std_width, mean_height - std_height, mean_height, mean_height + std_height]
scales = sorted(set([max(int(scale), 1) for scale in scales]))

aspect_ratios = [0.75, 1.0, 1.25]

anchors = torch.tensor(Anchors.generate_anchors(feature_map_shapes, scales, aspect_ratios), dtype=torch.float32).to(device)

In [None]:
model = ExtendedMask2Former(num_classes=number_classes).to(device)

# Hyperparameters
num_epochs = 1
learning_rate = 0.001
batch_size = 4


optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

In [None]:
metrics_df = pd.DataFrame(columns=['epoch', 'train_loss', 'val_loss', 'precision', 'recall', 'AP', 'mAP'])



for epoch in range(num_epochs):
    train_loss, train_metrics, train_mAP = train(model, train_loader, optimizer, device, anchors, number_classes)
    # val_loss, val_metrics, val_mAP = validate(model, validation_loader, device, anchors, number_classes)
    
    # Log metrics to DataFrame
    # metrics_df = metrics_df.append({epoch + 1, train_loss, val_loss, train_metrics['precision'],  train_metrics['recall'], train_metrics['AP'],  train_mAP}, ignore_index=True) # type: ignore
    
    # print(f'Epoch {epoch+1}/{num_epochs}, Training Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}, mAP: {train_mAP:.4f}')
    
    scheduler.step()

