# Baseline Evaluation (Multi-Task ResNet18)

This notebook trains a multi-task baseline model on the labeled dataset and exports:
- baseline_metrics.csv (per-epoch train loss + validation accuracies)
- baseline_loss_vs_valacc.png
- baseline_valacc_per_task.png
- baseline_final_val_acc_summary.csv

## Drive access

The dataset (images and labels) is stored on Google Drive. The next cell mounts Drive to access the files.

In [None]:
from google.colab import drive
drive.mount("/content/drive")

In [None]:
import os
import glob
import pandas as pd

BASE = "/content/drive/MyDrive/dataset_projet"
CSV_PATH = os.path.join(BASE, "labels.csv")
IMAGES_DIR = BASE
RESULTS_DIR = os.path.join(BASE, "baseline_results")
os.makedirs(RESULTS_DIR, exist_ok=True)

print("BASE exists:", os.path.exists(BASE))
print("CSV exists:", os.path.exists(CSV_PATH))

img_count = 0
for ext in ("*.jpg", "*.jpeg", "*.png", "*.webp"):
    img_count += len(glob.glob(os.path.join(IMAGES_DIR, ext)))
print("Images in folder:", img_count)

df = pd.read_csv(CSV_PATH)
print("Rows in CSV:", len(df))
print("Columns:", df.columns.tolist())

## Data preparation

We load the labeling CSV, remove unnamed helper columns, identify the filename column, and prepare task columns.

In [None]:
import numpy as np

df = df.loc[:, ~df.columns.astype(str).str.match(r"^Unnamed")].copy()

possible_filename_cols = ["filename", "image_filename", "image", "img"]
filename_col = next((c for c in possible_filename_cols if c in df.columns), df.columns[0])

ignore_cols = {filename_col, "source_folder", "original_filename"}
label_cols = [c for c in df.columns if c not in ignore_cols]

df[filename_col] = df[filename_col].astype(str).str.strip()

for c in label_cols:
    df[c] = df[c].astype(str).str.strip()
    df[c] = df[c].replace(["nan", "NaN", "None", ""], np.nan).fillna("Unknown")

df = df[[filename_col] + label_cols].copy()

print("Filename column:", filename_col)
print("Label columns:", label_cols)
print(df.head(3))

## Dataset and transforms

Images are loaded from the dataset folder. Labels are encoded per task.
Binary tasks use a single output neuron; multi-class tasks use N outputs.

In [None]:
import torch
from torch.utils.data import Dataset
from PIL import Image
from sklearn.preprocessing import LabelEncoder

class ZoomBehaviorDataset(Dataset):
    def __init__(self, df, img_dir, filename_col, label_cols, transform=None):
        self.df = df.reset_index(drop=True).copy()
        self.img_dir = img_dir
        self.filename_col = filename_col
        self.label_cols = list(label_cols)
        self.transform = transform

        self.tasks_config = {}
        self.label_encoders = {}

        for col in self.label_cols:
            le = LabelEncoder()
            self.df[col] = le.fit_transform(self.df[col].astype(str))
            self.label_encoders[col] = le
            num_classes = len(le.classes_)
            self.tasks_config[col] = 1 if num_classes == 2 else num_classes

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        fname = self.df.loc[idx, self.filename_col]
        img_path = os.path.join(self.img_dir, fname)

        image = Image.open(img_path).convert("RGB")
        if self.transform is not None:
            image = self.transform(image)

        labels = {}
        for col in self.label_cols:
            v = int(self.df.loc[idx, col])
            if self.tasks_config[col] == 1:
                labels[col] = torch.tensor(float(v), dtype=torch.float32)
            else:
                labels[col] = torch.tensor(v, dtype=torch.long)

        return image, labels

## Model

We use a ResNet18 backbone with a separate linear head per task.

In [None]:
import torch.nn as nn
from torchvision import models

class MultiTaskResNet(nn.Module):
    def __init__(self, tasks_config, pretrained=True):
        super().__init__()
        backbone = models.resnet18(weights="IMAGENET1K_V1" if pretrained else None)
        self.features = nn.Sequential(*list(backbone.children())[:-1])
        num_features = backbone.fc.in_features

        self.heads = nn.ModuleDict()
        for task_name, num_outputs in tasks_config.items():
            self.heads[task_name] = nn.Linear(num_features, num_outputs)

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        return {task: head(x) for task, head in self.heads.items()}

## Training and evaluation

We train on an 80/20 split and log:
- Train loss
- Validation accuracy per task
- Validation average accuracy across tasks

In [None]:
from torch.utils.data import DataLoader, random_split
from torchvision import transforms
import torch.optim as optim

def train_one_epoch(model, dataloader, optimizer, device):
    model.train()
    bce = nn.BCEWithLogitsLoss()
    ce = nn.CrossEntropyLoss()
    total_loss = 0.0

    for images, labels_dict in dataloader:
        images = images.to(device)
        labels_dict = {k: v.to(device) for k, v in labels_dict.items()}

        optimizer.zero_grad()
        outputs = model(images)

        loss = 0.0
        for task, out in outputs.items():
            target = labels_dict[task]
            if out.shape[1] == 1:
                loss = loss + bce(out.squeeze(1), target)
            else:
                loss = loss + ce(out, target)

        loss.backward()
        optimizer.step()
        total_loss += float(loss.item())

    return total_loss / max(1, len(dataloader))

@torch.no_grad()
def evaluate(model, dataloader, device):
    model.eval()
    accs = { }

    for images, labels_dict in dataloader:
        images = images.to(device)
        labels_dict = {k: v.to(device) for k, v in labels_dict.items()}
        outputs = model(images)

        for task, out in outputs.items():
            target = labels_dict[task]
            if out.shape[1] == 1:
                preds = (torch.sigmoid(out.squeeze(1)) > 0.5).float()
                correct = (preds == target).float().mean().item()
            else:
                preds = torch.argmax(out, dim=1)
                correct = (preds == target).float().mean().item()

            accs.setdefault(task, []).append(correct)

    final = {k: float(np.mean(v)) for k, v in accs.items()}
    avg = float(np.mean(list(final.values()))) if final else 0.0
    return avg, final

## Run baseline training


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

BATCH_SIZE = 32
LR = 1e-3
NUM_EPOCHS = 10
TRAIN_RATIO = 0.8
SEED = 42

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

full_dataset = ZoomBehaviorDataset(df, IMAGES_DIR, filename_col, label_cols, transform=transform)

train_size = int(TRAIN_RATIO * len(full_dataset))
val_size = len(full_dataset) - train_size

g = torch.Generator().manual_seed(SEED)
train_ds, val_ds = random_split(full_dataset, [train_size, val_size], generator=g)

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False)

model = MultiTaskResNet(full_dataset.tasks_config, pretrained=True).to(device)
optimizer = optim.Adam(model.parameters(), lr=LR)

print("Tasks config:", full_dataset.tasks_config)
print("Train size:", len(train_ds), "Val size:", len(val_ds))

In [None]:
history = []

for epoch in range(NUM_EPOCHS):
    train_loss = train_one_epoch(model, train_loader, optimizer, device)
    val_avg_acc, val_task_acc = evaluate(model, val_loader, device)

    row = {
        "Epoch": epoch + 1,
        "Train_Loss": float(train_loss),
        "Val_Avg_Accuracy": float(val_avg_acc)
    }
    for task, acc in val_task_acc.items():
        row[f"Val_Acc_{task}"] = float(acc)

    history.append(row)
    print(row)

## Export results


In [None]:
df_metrics = pd.DataFrame(history)
metrics_path = os.path.join(RESULTS_DIR, "baseline_metrics.csv")
df_metrics.to_csv(metrics_path, index=False)
metrics_path

In [None]:
import matplotlib.pyplot as plt

dfm = pd.read_csv(metrics_path)

plt.figure(figsize=(10, 5))
ax1 = plt.gca()
ax2 = ax1.twinx()

ax1.plot(dfm["Epoch"], dfm["Train_Loss"], marker="o", linewidth=2)
ax2.plot(dfm["Epoch"], dfm["Val_Avg_Accuracy"], marker="s", linewidth=2)

ax1.set_xlabel("Epoch")
ax1.set_ylabel("Train Loss")
ax2.set_ylabel("Validation Avg Accuracy")
ax1.set_title("Baseline: Train Loss vs Validation Avg Accuracy")
ax1.grid(True, alpha=0.3)

plt.tight_layout()
png1 = os.path.join(RESULTS_DIR, "baseline_loss_vs_valacc.png")
plt.savefig(png1, dpi=300)
png1

In [None]:
task_cols = [c for c in dfm.columns if c.startswith("Val_Acc_")]

if task_cols:
    plt.figure(figsize=(10, 5))
    for c in task_cols:
        plt.plot(dfm["Epoch"], dfm[c], marker="o", linewidth=2, label=c.replace("Val_Acc_", ""))
    plt.xlabel("Epoch")
    plt.ylabel("Validation Accuracy")
    plt.title("Baseline: Validation Accuracy per Task")
    plt.grid(True, alpha=0.3)
    plt.legend(loc="best", fontsize=8)
    plt.tight_layout()
    png2 = os.path.join(RESULTS_DIR, "baseline_valacc_per_task.png")
    plt.savefig(png2, dpi=300)
    print(png2)
else:
    print("No per-task columns found.")

In [None]:
last = dfm.iloc[-1]
task_cols = [c for c in dfm.columns if c.startswith("Val_Acc_")]

summary = pd.DataFrame(
    [{"Task": c.replace("Val_Acc_", ""), "Final Val Acc": float(last[c])} for c in task_cols]
).sort_values("Final Val Acc", ascending=False).reset_index(drop=True)

summary_path = os.path.join(RESULTS_DIR, "baseline_final_val_acc_summary.csv")
summary.to_csv(summary_path, index=False)

summary