In [2]:
import json
import random
import numpy as np
import pandas as pd
from pathlib import Path
from PIL import Image, ImageDraw

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler

import torchvision.transforms as T
import torchvision.models as models

from sklearn.model_selection import GroupShuffleSplit
from sklearn.metrics import (
    accuracy_score, f1_score, confusion_matrix, classification_report,
    balanced_accuracy_score
)
from tqdm import tqdm

JSON_PATH = Path("LS_export_26.12.json")
LABELS_CSV = Path("label_type_gender.csv")

TEST_SIZE = 0.2
BATCH_SIZE = 16
EPOCHS = 30
LR = 1e-4
PATIENCE = 5
MIN_DELTA = 1e-4

PAD = 0.0
MIN_CROP_PX = 16 

LAMBDA_OCC = 0.2

RUNS_DIR = Path("runs_gender_Bpp")
RUNS_DIR.mkdir(parents=True, exist_ok=True)

SEEDS = [42, 43, 44]

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", device)

#

def seed_everything(seed: int):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    try:
        torch.use_deterministic_algorithms(True)
    except Exception:
        pass

def seed_worker(worker_id: int):
    worker_seed = (torch.initial_seed() + worker_id) % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

#

def bbox_xywh_to_xyxy(b):
    x, y, w, h = b
    return (x, y, x + w, y + h)

def inter_area_xyxy(a, b):
    ax1, ay1, ax2, ay2 = a
    bx1, by1, bx2, by2 = b
    inter_x1 = max(ax1, bx1)
    inter_y1 = max(ay1, by1)
    inter_x2 = min(ax2, bx2)
    inter_y2 = min(ay2, by2)
    iw = max(0.0, inter_x2 - inter_x1)
    ih = max(0.0, inter_y2 - inter_y1)
    return iw * ih

def area_xyxy(b):
    x1, y1, x2, y2 = b
    return max(0.0, x2 - x1) * max(0.0, y2 - y1)

def ioh_xyxy(fig_xyxy, head_xyxy):

    ah = area_xyxy(head_xyxy)
    if ah <= 0:
        return 0.0
    inter = inter_area_xyxy(fig_xyxy, head_xyxy)
    return inter / ah

def head_region_overlap(fig_xyxy, head_xyxy, head_top_frac=0.45):
    fx1, fy1, fx2, fy2 = fig_xyxy
    hx1, hy1, hx2, hy2 = head_xyxy
    head_limit = fy1 + (fy2 - fy1) * head_top_frac
    cy = (hy1 + hy2) / 2.0
    return cy <= head_limit

#

print("\nLoading tasks + building df...")
with open(JSON_PATH, "r", encoding="utf-8") as f:
    tasks = json.load(f)

label_df = pd.read_csv(LABELS_CSV, encoding="utf-8")
label2gender = dict(zip(label_df["label"], label_df["gender"]))

occ_labels = set(
    label_df.loc[
        label_df["type"].astype(str).str.contains("attribute_occluding_hair", case=False, na=False),
        "label"
    ].tolist()
)
print("Occluding hair labels:", sorted(list(occ_labels)))

rows = []
missing_images = 0

IOH_THR = 0.30
REQUIRE_HEAD_REGION = True
HEAD_TOP_FRAC = 0.45

for task in tasks:
    task_id = task.get("id")
    data = task.get("data", {}) or {}
    image_path = data.get("image_local_path")
    museum_number = data.get("Museum number")

    if not image_path:
        missing_images += 1
        continue

    image_path = Path(image_path)
    if not image_path.exists():
        missing_images += 1
        continue

    figure_boxes = []
    headwear_boxes = []

    for ann in task.get("annotations", []):
        for r in ann.get("result", []):
            if r.get("type") != "rectanglelabels":
                continue
            value = r.get("value", {}) or {}
            labels = value.get("rectanglelabels", [])
            if not labels:
                continue

            lbl = labels[0]
            x = float(value["x"])
            y = float(value["y"])
            w = float(value["width"])
            h = float(value["height"])
            box_xyxy = bbox_xywh_to_xyxy((x, y, w, h))

            if lbl in occ_labels:
                headwear_boxes.append((lbl, box_xyxy))
                continue

            g = label2gender.get(lbl)
            if g in {"male", "female"}:
                figure_boxes.append((lbl, g, box_xyxy))

    for fig_label, fig_gender, fig_xyxy in figure_boxes:
        occ = 0
        for head_label, head_xyxy in headwear_boxes:
            if REQUIRE_HEAD_REGION and not head_region_overlap(fig_xyxy, head_xyxy, head_top_frac=HEAD_TOP_FRAC):
                continue
            if ioh_xyxy(fig_xyxy, head_xyxy) >= IOH_THR:
                occ = 1
                break

        fx1, fy1, fx2, fy2 = fig_xyxy
        rows.append({
            "task_id": task_id,
            "museum_number": museum_number if museum_number is not None else str(task_id),
            "image_path": str(image_path),
            "label": fig_label,
            "gender": fig_gender,
            "occluding_hair": int(occ),
            "x": float(fx1),
            "y": float(fy1),
            "w": float(fx2 - fx1),
            "h": float(fy2 - fy1),
        })

df = pd.DataFrame(rows)
print("Missing images:", missing_images)
print("Total figure bboxes:", len(df))
print("Gender counts:\n", df["gender"].value_counts())
print("Occluding hair counts:\n", df["occluding_hair"].value_counts())

#

class MultiTaskBboxDataset(Dataset):
    def __init__(self, df: pd.DataFrame, transform=None, pad: float = 0.0):
        self.df = df.reset_index(drop=True)
        self.transform = transform
        self.pad = pad

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = Image.open(row.image_path).convert("RGB")
        W, H = img.size

        x1 = row.x / 100.0 * W
        y1 = row.y / 100.0 * H
        x2 = (row.x + row.w) / 100.0 * W
        y2 = (row.y + row.h) / 100.0 * H

        if (x2 - x1) < MIN_CROP_PX or (y2 - y1) < MIN_CROP_PX:
            raise ValueError(
                f"Too-small crop at idx={idx}: "
                f"w={(x2-x1):.2f}px h={(y2-y1):.2f}px "
                f"task_id={row.task_id} image={row.image_path}"
            )

        if self.pad > 0:
            pad_x = self.pad * (x2 - x1)
            pad_y = self.pad * (y2 - y1)
            x1 = max(0, x1 - pad_x)
            y1 = max(0, y1 - pad_y)
            x2 = min(W, x2 + pad_x)
            y2 = min(H, y2 + pad_y)

        crop = img.crop((x1, y1, x2, y2))
        if self.transform:
            crop = self.transform(crop)

        y_gender = 1 if row.gender == "male" else 0
        y_occ = float(row.occluding_hair)

        meta = {
            "task_id": row.task_id,
            "museum_number": row.museum_number,
            "image_path": row.image_path,
            "label": row.label,
            "gender": row.gender,
            "occluding_hair": int(row.occluding_hair),
            "x": float(row.x),
            "y": float(row.y),
            "w": float(row.w),
            "h": float(row.h),
        }

        return crop, y_gender, y_occ, meta

def collate_keep_meta(batch):
    xs = torch.stack([b[0] for b in batch], dim=0)
    yg = torch.tensor([b[1] for b in batch], dtype=torch.long)
    yo = torch.tensor([b[2] for b in batch], dtype=torch.float32)
    metas = [b[3] for b in batch]
    return xs, yg, yo, metas

#

transform_train = T.Compose([
    T.Resize((224, 224)),
    T.RandomHorizontalFlip(),
    T.RandomAffine(degrees=5, translate=(0.03, 0.03), scale=(0.95, 1.05)),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

transform_val = T.Compose([
    T.Resize((224, 224)),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

#

def save_error_crop(meta, pred_label, prob_male, out_dir):
    img = Image.open(meta["image_path"]).convert("RGB")
    W, H = img.size

    x1 = meta["x"] / 100.0 * W
    y1 = meta["y"] / 100.0 * H
    x2 = (meta["x"] + meta["w"]) / 100.0 * W
    y2 = (meta["y"] + meta["h"]) / 100.0 * H

    if PAD > 0:
        pad_x = PAD * (x2 - x1)
        pad_y = PAD * (y2 - y1)
        x1 = max(0, x1 - pad_x)
        y1 = max(0, y1 - pad_y)
        x2 = min(W, x2 + pad_x)
        y2 = min(H, y2 + pad_y)

    crop = img.crop((x1, y1, x2, y2))

    draw = ImageDraw.Draw(crop)
    caption = (
        f"true={meta['gender']} pred={'male' if pred_label==1 else 'female'} "
        f"p_male={prob_male:.2f} occ={meta['occluding_hair']}"
    )
    draw.rectangle([0, 0, crop.size[0], 18], fill=(0, 0, 0))
    draw.text((3, 2), caption, fill=(255, 255, 255))

    fname = (
        f"task{meta['task_id']}_{Path(meta['image_path']).stem}_"
        f"{meta['label']}_occ{meta['occluding_hair']}_"
        f"x{meta['x']:.1f}_y{meta['y']:.1f}_w{meta['w']:.1f}_h{meta['h']:.1f}.jpg"
    )
    crop.save(out_dir / fname)

#

def subgroup_metrics(y_true, y_pred, occ_flags, out_path_txt=None, title_prefix=""):
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    occ_flags = np.array(occ_flags)

    lines = []

    def one(mask, name):
        yt = y_true[mask]
        yp = y_pred[mask]
        if len(yt) == 0:
            lines.append(f"\n=== {name}: no samples ===\n")
            return
        acc = accuracy_score(yt, yp)
        bal = balanced_accuracy_score(yt, yp)
        f1m = f1_score(yt, yp, average="macro")
        rep = classification_report(yt, yp, target_names=["female", "male"], digits=3, zero_division=0)
        cm = confusion_matrix(yt, yp, labels=[0, 1])

        lines.append(f"\n=== {name} (n={len(yt)}) ===")
        lines.append(f"accuracy: {acc:.3f}")
        lines.append(f"balanced_accuracy: {bal:.3f}")
        lines.append(f"macro_f1: {f1m:.3f}")
        lines.append("confusion:\n" + str(cm))
        lines.append(rep)

    one(occ_flags == 0, "NON-OCCLUDING hair (occ=0)")
    one(occ_flags == 1, "OCCLUDING hair (occ=1)")

    text = (title_prefix + "\n" if title_prefix else "") + "\n".join(lines)
    print(text)

    if out_path_txt is not None:
        Path(out_path_txt).write_text(text, encoding="utf-8")

#

def run_one_seed(seed: int):
    seed_everything(seed)
    gen = torch.Generator()
    gen.manual_seed(seed)

    run_dir = RUNS_DIR / f"seed_{seed}"
    run_dir.mkdir(parents=True, exist_ok=True)

    best_path = run_dir / "best_model.pt"
    err_dir = run_dir / "errors"
    err_fp = err_dir / "female_as_male"
    err_fm = err_dir / "male_as_female"
    err_fp.mkdir(parents=True, exist_ok=True)
    err_fm.mkdir(parents=True, exist_ok=True)

    gss = GroupShuffleSplit(n_splits=1, test_size=TEST_SIZE, random_state=seed)
    train_idx, val_idx = next(gss.split(df, groups=df["museum_number"]))
    train_df = df.iloc[train_idx].reset_index(drop=True)
    val_df = df.iloc[val_idx].reset_index(drop=True)

    print(f"\n=== SEED {seed} ===")
    print("Train:", len(train_df), "Val:", len(val_df))
    print("Val occ counts:\n", val_df["occluding_hair"].value_counts())

    train_ds = MultiTaskBboxDataset(train_df, transform=transform_train, pad=PAD)
    val_ds = MultiTaskBboxDataset(val_df, transform=transform_val, pad=PAD)

    train_gender = train_df["gender"].map({"female": 0, "male": 1}).astype(int).to_numpy()
    class_counts = np.bincount(train_gender, minlength=2).astype(float)
    class_w = 1.0 / np.maximum(class_counts, 1.0)
    sample_w = class_w[train_gender]
    sample_w = torch.tensor(sample_w, dtype=torch.double)

    sampler = WeightedRandomSampler(
        weights=sample_w,
        num_samples=len(sample_w),
        replacement=True
    )

    train_loader = DataLoader(
        train_ds,
        batch_size=BATCH_SIZE,
        shuffle=False,
        sampler=sampler,
        num_workers=0,
        collate_fn=collate_keep_meta,
        worker_init_fn=seed_worker,
        generator=gen,
    )

    val_loader = DataLoader(
        val_ds,
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=0,
        collate_fn=collate_keep_meta,
        worker_init_fn=seed_worker,
        generator=gen,
    )

    n_female = int((train_df["gender"] == "female").sum())
    n_male = int((train_df["gender"] == "male").sum())
    w_female = n_male / (n_female + n_male)
    w_male = n_female / (n_female + n_male)
    gender_weights = torch.tensor([w_female, w_male], dtype=torch.float32).to(device)
    loss_gender = nn.CrossEntropyLoss(weight=gender_weights)

    n_pos = int(train_df["occluding_hair"].sum())
    n_neg = int(len(train_df) - n_pos)
    pos_weight = torch.tensor([n_neg / max(1, n_pos)], dtype=torch.float32).to(device)
    loss_occ = nn.BCEWithLogitsLoss(pos_weight=pos_weight)

    backbone = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
    in_features = backbone.fc.in_features
    backbone.fc = nn.Identity()

    head_gender = nn.Linear(in_features, 2)
    head_occ = nn.Linear(in_features, 1)

    class MultiHead(nn.Module):
        def __init__(self, backbone, head_gender, head_occ):
            super().__init__()
            self.backbone = backbone
            self.head_gender = head_gender
            self.head_occ = head_occ

        def forward(self, x):
            feats = self.backbone(x)
            lg = self.head_gender(feats)
            lo = self.head_occ(feats).squeeze(1)
            return lg, lo

    model = MultiHead(backbone, head_gender, head_occ).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=LR)

    def train_one_epoch():
        model.train()
        total = 0.0
        for x, yg, yo, _ in tqdm(train_loader, desc="train", leave=False):
            x = x.to(device)
            yg = yg.to(device)
            yo = yo.to(device)

            optimizer.zero_grad(set_to_none=True)
            logits_g, logits_o = model(x)

            lg = loss_gender(logits_g, yg)
            lo = loss_occ(logits_o, yo)
            loss = lg + LAMBDA_OCC * lo

            loss.backward()
            optimizer.step()
            total += loss.item() * x.size(0)

        return total / len(train_loader.dataset)

    @torch.no_grad()
    def eval_epoch(save_errors=False):
        model.eval()
        y_true, y_pred = [], []
        occ_flags = []
        occ_true, occ_pred = [], []

        for x, yg, yo, metas in tqdm(val_loader, desc="val", leave=False):
            x = x.to(device)
            logits_g, logits_o = model(x)

            probs_g = torch.softmax(logits_g, dim=1).cpu().numpy()
            preds_g = probs_g.argmax(axis=1)

            preds_o = (torch.sigmoid(logits_o).cpu().numpy() >= 0.5).astype(int)

            for i in range(len(preds_g)):
                true_g = int(yg[i].item())
                pred_g = int(preds_g[i])
                pmale = float(probs_g[i][1])
                meta = metas[i]

                y_true.append(true_g)
                y_pred.append(pred_g)
                occ_flags.append(int(meta["occluding_hair"]))

                occ_true.append(int(yo[i].item()))
                occ_pred.append(int(preds_o[i]))

                if save_errors and true_g != pred_g:
                    if true_g == 0 and pred_g == 1:
                        save_error_crop(meta, pred_g, pmale, err_fp)
                    elif true_g == 1 and pred_g == 0:
                        save_error_crop(meta, pred_g, pmale, err_fm)

        acc = accuracy_score(y_true, y_pred)
        bal = balanced_accuracy_score(y_true, y_pred)
        f1m = f1_score(y_true, y_pred, average="macro")

        cm = confusion_matrix(y_true, y_pred, labels=[0, 1])
        female_recall = cm[0, 0] / max(1, cm[0, 0] + cm[0, 1])

        occ_acc = accuracy_score(occ_true, occ_pred)
        return acc, bal, f1m, female_recall, occ_acc, y_true, y_pred, occ_flags

    best_f1 = -1.0
    best_epoch = 0
    pat_left = PATIENCE
    hist = []

    for epoch in range(1, EPOCHS + 1):
        tr_loss = train_one_epoch()
        acc, bal, f1m, fem_rec, occ_acc, y_true, y_pred, occ_flags = eval_epoch(save_errors=False)

        hist.append({
            "epoch": epoch,
            "train_loss": float(tr_loss),
            "val_acc": float(acc),
            "val_bal_acc": float(bal),
            "val_f1_macro": float(f1m),
            "val_female_recall": float(fem_rec),
            "val_occ_acc": float(occ_acc),
        })

        print(
            f"Epoch {epoch:02d} | loss={tr_loss:.4f} | "
            f"acc={acc:.3f} | bal_acc={bal:.3f} | f1m={f1m:.3f} | fem_rec={fem_rec:.3f} | occ_acc={occ_acc:.3f}"
        )

        if f1m > best_f1 + MIN_DELTA:
            best_f1 = f1m
            best_epoch = epoch
            pat_left = PATIENCE
            torch.save(model.state_dict(), best_path)
        else:
            pat_left -= 1
            if pat_left <= 0:
                break

    pd.DataFrame(hist).to_csv(run_dir / "history.csv", index=False, encoding="utf-8")

    model.load_state_dict(torch.load(best_path, map_location=device))
    acc, bal, f1m, fem_rec, occ_acc, y_true, y_pred, occ_flags = eval_epoch(save_errors=True)

    cm = confusion_matrix(y_true, y_pred, labels=[0, 1])
    rep = classification_report(y_true, y_pred, target_names=["female", "male"], digits=3, zero_division=0)

    out_txt = run_dir / "report.txt"
    with open(out_txt, "w", encoding="utf-8") as f:
        f.write(f"SEED={seed}\n")
        f.write(f"BEST_EPOCH={best_epoch}\n")
        f.write(f"val_acc={acc:.3f}\n")
        f.write(f"val_bal_acc={bal:.3f}\n")
        f.write(f"val_macro_f1={f1m:.3f}\n")
        f.write(f"val_female_recall={fem_rec:.3f}\n")
        f.write(f"val_occ_acc={occ_acc:.3f}\n\n")
        f.write("CONFUSION MATRIX (gender)\n")
        f.write(str(cm) + "\n\n")
        f.write(rep + "\n")

    subgroup_metrics(
        y_true, y_pred, occ_flags,
        out_path_txt=run_dir / "subgroup_report.txt",
        title_prefix=f"SEED {seed} | BEST_EPOCH {best_epoch} | macroF1={f1m:.3f}"
    )

    return {
        "seed": seed,
        "best_epoch": best_epoch,
        "val_acc": acc,
        "val_bal_acc": bal,
        "val_f1_macro": f1m,
        "val_female_recall": fem_rec,
        "val_occ_acc": occ_acc,
        "val_occ_n": int(np.sum(np.array(occ_flags) == 1)),
        "val_total": len(occ_flags),
        "run_dir": str(run_dir),
    }

#

all_res = []
for s in SEEDS:
    all_res.append(run_one_seed(s))

res_df = pd.DataFrame(all_res)
res_df.to_csv(RUNS_DIR / "summary_runs.csv", index=False, encoding="utf-8")

print("\n=== SUMMARY (3 seeds) ===")
print(res_df[["seed","best_epoch","val_acc","val_bal_acc","val_f1_macro","val_female_recall","val_occ_acc","val_occ_n","val_total"]])

print("\nAverages:")
print(res_df[["val_acc","val_bal_acc","val_f1_macro","val_female_recall","val_occ_acc"]].mean(numeric_only=True))

print("\nStd:")
print(res_df[["val_acc","val_bal_acc","val_f1_macro","val_female_recall","val_occ_acc"]].std(numeric_only=True))

print("\nOutputs in:", RUNS_DIR.resolve())


Device: cpu

Loading tasks + building df...
Occluding hair labels: ['Phrygian cap with lappets', 'ampyx', 'cap', 'opisthosphendone', 'petasos', 'pilos', 'saccos']
Missing images: 0
Total figure bboxes: 520
Gender counts:
 gender
male      359
female    161
Name: count, dtype: int64
Occluding hair counts:
 occluding_hair
0    476
1     44
Name: count, dtype: int64

=== SEED 42 ===
Train: 425 Val: 95
Val occ counts:
 occluding_hair
0    85
1    10
Name: count, dtype: int64


                                                      

Epoch 01 | loss=0.7210 | acc=0.642 | bal_acc=0.669 | f1m=0.635 | fem_rec=0.750 | occ_acc=0.484


                                                      

Epoch 02 | loss=0.4182 | acc=0.747 | bal_acc=0.748 | f1m=0.732 | fem_rec=0.750 | occ_acc=0.653


                                                      

Epoch 03 | loss=0.2861 | acc=0.789 | bal_acc=0.734 | f1m=0.747 | fem_rec=0.562 | occ_acc=0.905


                                                      

Epoch 04 | loss=0.2469 | acc=0.811 | bal_acc=0.719 | f1m=0.742 | fem_rec=0.438 | occ_acc=0.863


                                                      

Epoch 05 | loss=0.1502 | acc=0.758 | bal_acc=0.748 | f1m=0.738 | fem_rec=0.719 | occ_acc=0.842


                                                      

Epoch 06 | loss=0.1027 | acc=0.758 | bal_acc=0.756 | f1m=0.741 | fem_rec=0.750 | occ_acc=0.811


                                                      

Epoch 07 | loss=0.1375 | acc=0.789 | bal_acc=0.718 | f1m=0.735 | fem_rec=0.500 | occ_acc=0.863


                                                      

Epoch 08 | loss=0.1622 | acc=0.779 | bal_acc=0.695 | f1m=0.711 | fem_rec=0.438 | occ_acc=0.874


                                                  

SEED 42 | BEST_EPOCH 3 | macroF1=0.747

=== NON-OCCLUDING hair (occ=0) (n=85) ===
accuracy: 0.776
balanced_accuracy: 0.669
macro_f1: 0.684
confusion:
[[10 13]
 [ 6 56]]
              precision    recall  f1-score   support

      female      0.625     0.435     0.513        23
        male      0.812     0.903     0.855        62

    accuracy                          0.776        85
   macro avg      0.718     0.669     0.684        85
weighted avg      0.761     0.776     0.762        85


=== OCCLUDING hair (occ=1) (n=10) ===
accuracy: 0.900
balanced_accuracy: 0.944
macro_f1: 0.804
confusion:
[[8 1]
 [0 1]]
              precision    recall  f1-score   support

      female      1.000     0.889     0.941         9
        male      0.500     1.000     0.667         1

    accuracy                          0.900        10
   macro avg      0.750     0.944     0.804        10
weighted avg      0.950     0.900     0.914        10


=== SEED 43 ===
Train: 437 Val: 83
Val occ counts:
 oc

                                                      

Epoch 01 | loss=0.7245 | acc=0.735 | bal_acc=0.774 | f1m=0.730 | fem_rec=0.893 | occ_acc=0.892


                                                      

Epoch 02 | loss=0.4902 | acc=0.675 | bal_acc=0.737 | f1m=0.674 | fem_rec=0.929 | occ_acc=0.711


                                                      

Epoch 03 | loss=0.3320 | acc=0.699 | bal_acc=0.755 | f1m=0.697 | fem_rec=0.929 | occ_acc=0.819


                                                      

Epoch 04 | loss=0.1911 | acc=0.831 | bal_acc=0.820 | f1m=0.814 | fem_rec=0.786 | occ_acc=0.952


                                                      

Epoch 05 | loss=0.1370 | acc=0.819 | bal_acc=0.811 | f1m=0.803 | fem_rec=0.786 | occ_acc=0.916


                                                      

Epoch 06 | loss=0.0936 | acc=0.819 | bal_acc=0.837 | f1m=0.810 | fem_rec=0.893 | occ_acc=0.904


                                                      

Epoch 07 | loss=0.1269 | acc=0.807 | bal_acc=0.828 | f1m=0.799 | fem_rec=0.893 | occ_acc=0.916


                                                      

Epoch 08 | loss=0.1151 | acc=0.795 | bal_acc=0.802 | f1m=0.783 | fem_rec=0.821 | occ_acc=0.928


                                                      

Epoch 09 | loss=0.0796 | acc=0.819 | bal_acc=0.776 | f1m=0.788 | fem_rec=0.643 | occ_acc=0.916


                                                  

SEED 43 | BEST_EPOCH 4 | macroF1=0.814

=== NON-OCCLUDING hair (occ=0) (n=76) ===
accuracy: 0.829
balanced_accuracy: 0.808
macro_f1: 0.804
confusion:
[[18  6]
 [ 7 45]]
              precision    recall  f1-score   support

      female      0.720     0.750     0.735        24
        male      0.882     0.865     0.874        52

    accuracy                          0.829        76
   macro avg      0.801     0.808     0.804        76
weighted avg      0.831     0.829     0.830        76


=== OCCLUDING hair (occ=1) (n=7) ===
accuracy: 0.857
balanced_accuracy: 0.833
macro_f1: 0.844
confusion:
[[4 0]
 [1 2]]
              precision    recall  f1-score   support

      female      0.800     1.000     0.889         4
        male      1.000     0.667     0.800         3

    accuracy                          0.857         7
   macro avg      0.900     0.833     0.844         7
weighted avg      0.886     0.857     0.851         7


=== SEED 44 ===
Train: 409 Val: 111
Val occ counts:
 oc

                                                      

Epoch 01 | loss=0.6223 | acc=0.685 | bal_acc=0.700 | f1m=0.656 | fem_rec=0.733 | occ_acc=0.703


                                                      

Epoch 02 | loss=0.4071 | acc=0.658 | bal_acc=0.713 | f1m=0.642 | fem_rec=0.833 | occ_acc=0.712


                                                      

Epoch 03 | loss=0.2851 | acc=0.730 | bal_acc=0.699 | f1m=0.682 | fem_rec=0.633 | occ_acc=0.919


                                                      

Epoch 04 | loss=0.2262 | acc=0.739 | bal_acc=0.727 | f1m=0.700 | fem_rec=0.700 | occ_acc=0.883


                                                      

Epoch 05 | loss=0.1501 | acc=0.721 | bal_acc=0.725 | f1m=0.688 | fem_rec=0.733 | occ_acc=0.883


                                                      

Epoch 06 | loss=0.1120 | acc=0.766 | bal_acc=0.745 | f1m=0.724 | fem_rec=0.700 | occ_acc=0.937


                                                      

Epoch 07 | loss=0.0972 | acc=0.712 | bal_acc=0.719 | f1m=0.680 | fem_rec=0.733 | occ_acc=0.928


                                                      

Epoch 08 | loss=0.0743 | acc=0.811 | bal_acc=0.744 | f1m=0.752 | fem_rec=0.600 | occ_acc=0.892


                                                      

Epoch 09 | loss=0.0699 | acc=0.712 | bal_acc=0.740 | f1m=0.687 | fem_rec=0.800 | occ_acc=0.901


                                                      

Epoch 10 | loss=0.0521 | acc=0.775 | bal_acc=0.762 | f1m=0.737 | fem_rec=0.733 | occ_acc=0.910


                                                      

Epoch 11 | loss=0.0884 | acc=0.793 | bal_acc=0.701 | f1m=0.715 | fem_rec=0.500 | occ_acc=0.919


                                                      

Epoch 12 | loss=0.0651 | acc=0.748 | bal_acc=0.712 | f1m=0.698 | fem_rec=0.633 | occ_acc=0.928


                                                      

Epoch 13 | loss=0.0853 | acc=0.595 | bal_acc=0.701 | f1m=0.591 | fem_rec=0.933 | occ_acc=0.910


                                                  

SEED 44 | BEST_EPOCH 8 | macroF1=0.752

=== NON-OCCLUDING hair (occ=0) (n=103) ===
accuracy: 0.816
balanced_accuracy: 0.735
macro_f1: 0.738
confusion:
[[14 10]
 [ 9 70]]
              precision    recall  f1-score   support

      female      0.609     0.583     0.596        24
        male      0.875     0.886     0.881        79

    accuracy                          0.816       103
   macro avg      0.742     0.735     0.738       103
weighted avg      0.813     0.816     0.814       103


=== OCCLUDING hair (occ=1) (n=8) ===
accuracy: 0.750
balanced_accuracy: 0.833
macro_f1: 0.733
confusion:
[[4 2]
 [0 2]]
              precision    recall  f1-score   support

      female      1.000     0.667     0.800         6
        male      0.500     1.000     0.667         2

    accuracy                          0.750         8
   macro avg      0.750     0.833     0.733         8
weighted avg      0.875     0.750     0.767         8


=== SUMMARY (3 seeds) ===
   seed  best_epoch   val_ac

