In [None]:
!pip install fiftyone

In [None]:
!wandb login

In [None]:
import fiftyone as fo
from fiftyone.utils.huggingface import load_from_hub
import numpy as np
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.optim.lr_scheduler import CosineAnnealingLR
import torchvision.transforms.v2 as transforms
from torch.utils.data import DataLoader
import wandb

from src import datasets
from src import models
from src import training
from src import visualization

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Load Data 

In [None]:
# hyperparameters will be the same for all experiments to make them comparable
IMG_SIZE = 64
BATCH_SIZE = 32
EPOCHS = 30

START_LR = 1e-3
END_LR = 1e-6

In [None]:
# load fiftyone dataset from huggingface
dataset = load_from_hub(
    "MatthiasCr/multimodal-shapes-subset", 
    name="multimodal-shapes-subset",
    # fewer workers and greater batch size to hopefully avoid getting rate limited
    num_workers=2,
    batch_size=1000
)

Now I convert this fiftyone dataset to torch datasets using the already existing tags for the train / val split. I also create dataloaders for train and valid, as well as a separate dataloader to use for predictions on the valid dataset. For all dataloaders with shuffle=True I specify a generator with fixed seed to make the shuffling deterministic.

In [None]:
img_transforms = transforms.Compose([
    transforms.Resize(IMG_SIZE),
    transforms.ToImage(),
    transforms.ToDtype(torch.float32, scale=True),
])

train_dataset = datasets.MultimodalDataset(dataset, "train", img_transforms)
val_dataset = datasets.MultimodalDataset(dataset, "val", img_transforms)

# use generator with fixed seed for reproducible shuffling
generator = torch.Generator()
generator.manual_seed(51)

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, generator=generator)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, drop_last=True)

# loader to conduct sample predictions
log_loader = DataLoader(val_dataset, batch_size=5, shuffle=True, num_workers=0, generator=generator)

steps_per_epoch = len(train_dataloader)

## Experiments

In [None]:
def log_experiment(model, best_model, fusion_type, device, output_name):
    num_params = sum(p.numel() for p in model.parameters())
    embedding_size = model.get_embedding_size()

    optim = Adam(model.parameters(), lr=START_LR)
    scheduler = CosineAnnealingLR(optim, T_max=EPOCHS * steps_per_epoch, eta_min=END_LR)
    loss_func = nn.BCEWithLogitsLoss()

    # init wandb run and log config hyperparameters
    run = training.initWandbRun(
        fusion_type, embedding_size, EPOCHS, BATCH_SIZE, num_params, "Adam", "Cosine Annealing", START_LR, END_LR
    )

    # train and log loss
    train_loss, val_loss = training.train_model(
        model, optim, loss_func, EPOCHS, train_dataloader, val_dataloader, device, run, scheduler=scheduler, output_name=output_name
    )

    # load best model
    model_save_path = f"../checkpoints/{output_name}.pt"
    device = "cuda" if torch.cuda.is_available() else "cpu"
    best_model.load_state_dict(torch.load(model_save_path, map_location=device))
    best_model = best_model.to(device)

    # predict on 5 batches of each 5 samples = 25 preditions. Log predictions to wandb
    training.log_predictions(best_model, log_loader, device, run, num_batches=5)
    
    run.finish()
    return train_loss, val_loss

### Late Fusion

In [None]:
late_model = models.LateFusionNet().to(device)
late_model_best = models.LateFusionNet().to(device)
late_train_loss, late_val_loss = log_experiment(late_model, late_model_best, "late", device, output_name="task3_late")

visualization.plot_loss(EPOCHS,
    {
        "Late train Loss": late_train_loss,
        "Late Val Loss": late_val_loss
    }
)

### Intermediate Fusion

In [None]:
cat_model = models.IntermediateFusionNet(fusion_type="cat").to(device)
cat_model_best = models.IntermediateFusionNet(fusion_type="cat").to(device)
cat_train_loss, cat_val_loss = log_experiment(cat_model, cat_model_best, "intermediate (concatenation)", device, output_name="task3_cat")

add_model = models.IntermediateFusionNet(fusion_type="add").to(device)
add_model_best = models.IntermediateFusionNet(fusion_type="add").to(device)
add_train_loss, add_val_loss = log_experiment(add_model, add_model_best, "intermediate (addition)", device, output_name="task3_add")

had_model = models.IntermediateFusionNet(fusion_type="had").to(device)
had_train_loss, had_val_loss = log_experiment(had_model, "intermediate (hadamard)", device, output_name="task3_had")

## Analysis

We can now compare how these models performed on the validation dataset:

In [None]:
visualization.plot_loss(EPOCHS,
    {
        "Concat Valid Loss": cat_val_loss,
        "Addition Valid Loss": add_val_loss,
        "Hadamard Valid Loss": had_val_loss,
        "Late Valid Loss": late_val_loss
    }
)

We can also visualize and analyze the experiments in WandB:

![](../results/wandb-t3-graphs.png)
![](../results/wandb-t3-table.png)
