The following notebook recreates some of our results on a tiny subset of the data. The full dataset is available on the DTU HPC at location /dtu/blackhole/12/145234

In [38]:
# Cell 1: Import Required Libraries
import os
import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import DataLoader, ConcatDataset
from torchvision import transforms, models
from tqdm import tqdm
from sklearn.metrics import classification_report

from split import deterministic_split, random_split
from image_dataset import ImageDataset

# Set random seed for reproducibility
def set_seed(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    torch.backends.cudnn.deterministic = True

set_seed(42)

In [None]:
# Cell 2: Train Model Function
def train_model(datasets, variants, model_name, base_dir="small_dataset", num_epochs=10, batch_size=4, learning_rate=0.001):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    
    train_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    # Prepare datasets
    _datasets = []
    for dataset, variant in zip(datasets, variants):
        target_dir = os.path.join(base_dir, f"{dataset}/{variant}")
        
        if variant == "real-fewshot":
            target_dir = os.path.join(target_dir, "best" if dataset == "cars" else "seed0")
            train_files, _ = deterministic_split(target_dir, test_ratio=1/6)
        else:
            target_dir = os.path.join(target_dir, "train")
            train_files, _ = random_split(target_dir, train_count=25)
        
        dataset_obj = ImageDataset(file_list=train_files, transform=train_transform, synthetic_label=1 if variant != "real-fewshot" else 0)
        _datasets.append(dataset_obj)
    
    combined_dataset = ConcatDataset(_datasets)
    dataloader_train = DataLoader(combined_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
    
    model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, 2)
    model = model.to(device)
    
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct_predictions = 0

        for inputs, labels in tqdm(dataloader_train, desc=f"Epoch {epoch+1}/{num_epochs}"):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            _, preds = torch.max(outputs, 1)
            running_loss += loss.item() * inputs.size(0)
            correct_predictions += torch.sum(preds == labels.data)
        
        print(f"Epoch {epoch+1}/{num_epochs} - Loss: {running_loss / len(combined_dataset):.4f}, Accuracy: {correct_predictions.double() / len(combined_dataset):.4f}")
    
    os.makedirs("models", exist_ok=True)
    torch.save(model.state_dict(), f"models/{model_name}.pth")
    print(f"Model saved to models/{model_name}.pth")

In [None]:
# Cell 3: Evaluate Model Function
def evaluate_model(model_name, datasets, variants, base_dir="small_dataset", batch_size=32):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    
    test_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    _datasets = []
    for dataset, variant in zip(datasets, variants):
        target_dir = os.path.join(base_dir, f"{dataset}/{variant}")
        
        if variant == "real-fewshot":
            target_dir = os.path.join(target_dir, "best" if dataset == "cars" else "seed0")
            _, test_files = deterministic_split(target_dir, test_ratio=1/6)
        else:
            target_dir = os.path.join(target_dir, "train")
            _, test_files = random_split(target_dir, train_count=25)
        
        dataset_obj = ImageDataset(file_list=test_files, transform=test_transform, synthetic_label=1 if variant != "real-fewshot" else 0)
        _datasets.append(dataset_obj)
    
    combined_dataset = ConcatDataset(_datasets)
    dataloader_test = DataLoader(combined_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
    
    model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
    model.fc = nn.Linear(model.fc.in_features, 2)
    model.load_state_dict(torch.load(f"models/{model_name}.pth", map_location=device))
    model = model.to(device)
    model.eval()
    
    all_preds, all_labels = [], []
    with torch.no_grad():
        for inputs, labels in tqdm(dataloader_test, desc="Inference"):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    report = classification_report(all_labels, all_preds, output_dict=True, target_names=["Real", "Synthetic"])
    metrics = {
        "accuracy": report["accuracy"],
        "class_0": report["Real"],
        "class_1": report["Synthetic"]
    }
    return metrics

In [42]:
# Cell 4: Main Script to Train and Evaluate Models
train_datasets = [
    [("cars", "sd2.1"), ("cars", "real-fewshot")],
    [("pets", "sd2.1"), ("pets", "real-fewshot")],
    [("cars", "sd2.1"), ("pets", "sd2.1"), ("cars", "real-fewshot"), ("pets", "real-fewshot")]
]

eval_datasets = [
    [("cars", "dd-fewshot"), ("cars", "real-fewshot")],
    [("cars", "sd2.1"), ("cars", "real-fewshot")],
    [("pets", "sd2.1"), ("pets", "real-fewshot")],
    [("pets", "dd-fewshot"), ("pets", "real-fewshot")]
]

results = {}

for train_combination in train_datasets:
    combined_name = "__".join([f"{dataset}_{variant}" for dataset, variant in train_combination])
    model_name = f"combined_{combined_name}"
    
    if not os.path.exists(f"models/{model_name}.pth"):
        train_model(
            datasets=[dataset for dataset, variant in train_combination],
            variants=[variant for dataset, variant in train_combination],
            model_name=model_name
        )
    
    for eval_combination in eval_datasets:
        eval_name = "__".join([f"{dataset}_{variant}" for dataset, variant in eval_combination])
        metrics = evaluate_model(
            model_name=model_name,
            datasets=[dataset for dataset, variant in eval_combination],
            variants=[variant for dataset, variant in eval_combination]
        )
        results[(model_name, eval_name)] = metrics

# Save results to CSV
import csv
with open("results.csv", "w") as f:
    writer = csv.writer(f)
    writer.writerow(["Trained_On", "Evaluated_On", "Accuracy", "Class", "Precision", "Recall", "F1-Score"])
    for (trained_on, evaluated_on), metrics in results.items():
        for cls, cls_metrics in metrics.items():
            writer.writerow([trained_on, evaluated_on, metrics["accuracy"], cls, cls_metrics["precision"], cls_metrics["recall"], cls_metrics["f1-score"]])


Dataset small_dataset\cars/sd2.1\train
Number of training files: 25
Number of test files: 5
Data dir: small_dataset\cars/real-fewshot\best
Number of training files: 24
Number of test files: 6


Epoch 1/10: 100%|██████████| 49/49 [00:15<00:00,  3.26it/s]


Epoch 1/10 - Loss: 0.9542, Accuracy: 0.3878


Epoch 2/10: 100%|██████████| 49/49 [00:14<00:00,  3.40it/s]


Epoch 2/10 - Loss: 0.7627, Accuracy: 0.4898


Epoch 3/10: 100%|██████████| 49/49 [00:13<00:00,  3.51it/s]


Epoch 3/10 - Loss: 0.8829, Accuracy: 0.5918


Epoch 4/10: 100%|██████████| 49/49 [00:14<00:00,  3.27it/s]


Epoch 4/10 - Loss: 0.8259, Accuracy: 0.4694


Epoch 5/10: 100%|██████████| 49/49 [00:15<00:00,  3.19it/s]


Epoch 5/10 - Loss: 0.7732, Accuracy: 0.5102


Epoch 6/10: 100%|██████████| 49/49 [00:17<00:00,  2.74it/s]


Epoch 6/10 - Loss: 0.8154, Accuracy: 0.3673


Epoch 7/10: 100%|██████████| 49/49 [00:13<00:00,  3.53it/s]


Epoch 7/10 - Loss: 0.7712, Accuracy: 0.4898


Epoch 8/10: 100%|██████████| 49/49 [00:14<00:00,  3.45it/s]


Epoch 8/10 - Loss: 0.8197, Accuracy: 0.4694


Epoch 9/10: 100%|██████████| 49/49 [00:13<00:00,  3.60it/s]


Epoch 9/10 - Loss: 0.8490, Accuracy: 0.4898


Epoch 10/10: 100%|██████████| 49/49 [00:14<00:00,  3.49it/s]


Epoch 10/10 - Loss: 0.7639, Accuracy: 0.5102
Model saved to models/combined_cars_sd2.1__cars_real-fewshot.pth
Dataset small_dataset\cars/dd-fewshot\train
Number of training files: 25
Number of test files: 5
Data dir: small_dataset\cars/real-fewshot\best
Number of training files: 24
Number of test files: 6


  model.load_state_dict(torch.load(f"models/{model_name}.pth", map_location=device))
Inference:   0%|          | 0/1 [00:11<?, ?it/s]


RuntimeError: DataLoader worker (pid(s) 25496, 26792, 2956, 6356) exited unexpectedly