# Transformer Model

## Imports

In [1]:
# Add Utils to path
import sys
sys.path.append('../Utils')  # Adds higher directory to python modules path.

# Utils
from image_enhancement_functions import histogram_equalization, clahe, color_balance_adjustment, min_max_contrast_enhancement
from custom_image_dataset import CustomImageDataset

# Pytorch
import torch
from torch import nn
from torchvision import models
from torchvision import transforms
from torch.utils.data import DataLoader

## Dataset

In [2]:
# Image directory
img_dir = '../ExDark/ExDark'

# Define transformations
transform = transforms.Compose([histogram_equalization,
                                clahe,
                                color_balance_adjustment,
                                min_max_contrast_enhancement,
                                transforms.ToPILImage(),
                                transforms.Resize((224, 224)),
                                transforms.ToTensor()])

# Create dataset
dataset = CustomImageDataset(img_dir=img_dir, transform=transform)

# Split
train_size = int(0.8 * len(dataset))
val_size = int(0.1 * len(dataset))
test_size = len(dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])

# Create dataloaders
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=True)

## Model

In [3]:
class LocalizationTransformer(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.resnet = models.resnet50()
        self.resnet.fc = nn.Identity()  # Remove the final layer
        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=2048, nhead=8), 
            num_layers=1
        )
        self.fc = nn.Linear(2048, num_classes * 4)

    def forward(self, x):
        x = self.resnet(x)
        x = self.transformer(x.unsqueeze(0))
        x = self.fc(x.squeeze(0))
        return x.view(x.size(0), -1, 4)  # Reshape to (batch_size, num_classes, 4)

## Train & Eval Functions

In [4]:
def train(model, dataloader, criterion, optimizer, num_epochs, lr_scheduler):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for i, (images, bounding_boxes) in enumerate(dataloader):
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, bounding_boxes)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            lr_scheduler.step()
        print(f"Epoch {epoch + 1}, Loss: {running_loss / len(dataloader)}")

In [5]:
def evaluate(model, dataloader):
    model.eval()
    # Implement evaluation logic to assess the model's performance (e.g., using mean average precision or other relevant metrics).

## Training

In [6]:
# Parameters
lr = 0.001
gamma = 0.1
step_size = 3
num_epochs = 10
num_classes = len(dataset.classes)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Create the model
model = LocalizationTransformer(num_classes)
model.to(device)

# Define a loss function and an optimizer
criterion = nn.MS
optimizer = torch.optim.Adam(model.parameters(), lr)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size, gamma)

# Training loop
train(model, train_dataloader, criterion, optimizer, num_epochs, lr_scheduler)


  return F.smooth_l1_loss(input, target, reduction=self.reduction, beta=self.beta)


RuntimeError: The size of tensor a (4) must match the size of tensor b (32) at non-singleton dimension 2