In [28]:
# Add local_lib to sys.path so packages installed there are found
import sys
import os
sys.path.insert(0, os.path.join(os.getcwd(), 'local_lib'))

In [29]:
# Cell 1 - Imports, environment setup and choose experiment

import os
import sys

import torch
from sklearn.metrics import classification_report

# --------- detect if running on Colab ---------
try:
    import google.colab  # type: ignore
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

if IN_COLAB:
    from google.colab import drive  # type: ignore
    drive.mount("/content/drive")

    # Path to your project on Google Drive
    PROJECT_ROOT = "/content/drive/MyDrive/[2025-2026] AN2DL/challenge2"
else:
    # Local project root (adjust if needed)
    PROJECT_ROOT = os.getcwd()  # or a fixed path

# Change working directory to project root and make sure we can import "includes"
os.chdir(PROJECT_ROOT)
if PROJECT_ROOT not in sys.path:
    sys.path.append(PROJECT_ROOT)

print("Project root:", PROJECT_ROOT)
print("Working dir:", os.getcwd())
print("Python path contains 'includes'? ->", "includes" in os.listdir("."))

# --------- now we can import from includes ---------
from includes.config import EXPERIMENTS
from includes.data_utils import (
    load_labels_and_split,
    get_transforms,
    create_dataloaders,
)
from includes.model_utils import (
    build_model,
    create_criterion_optimizer_scheduler,
    train_model,
    evaluate,
)
from includes.inference_utils import (
    create_test_loader,
    run_inference_and_save,
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# --------- choose which experiment to run ---------
# available:
#   "baseline"
#   "resnet50_big"
#   "resnet50_big_masks"
#   "challenge_2-2"
EXP_NAME = "resnet50_big_masks"
cfg = EXPERIMENTS[EXP_NAME]


# Override project_root and out_dir according to environment
cfg.project_root = PROJECT_ROOT

# Save submissions to:
#   local:  <PROJECT_ROOT>/out
#   Colab:  /content/drive/.../challenge2/out
cfg.out_dir = "out"

cfg


Project root: c:\Users\danie\ANN
Working dir: c:\Users\danie\ANN
Python path contains 'includes'? -> True
Using device: cpu


TrainingConfig(exp_name='resnet50_big_masks', project_root='c:\\Users\\danie\\ANN', data_dir='data', out_dir='out', train_img_dir='train_data', test_img_dir='test_data', labels_csv='train_labels.csv', mask_dir=None, backbone='resnet50', img_size=384, batch_size=16, num_workers=4, val_size=0.2, random_seed=42, mask_mode='crop_bbox', lr=0.0001, weight_decay=0.0001, epochs=50, use_scheduler=True, use_masks=True)

In [30]:
# Cell 2 - Dataframes and loaders

train_df, val_df, unique_labels, label_to_idx, idx_to_label = load_labels_and_split(cfg)
print("Labels:", unique_labels)

train_t, val_t = get_transforms(cfg)
train_loader, val_loader = create_dataloaders(cfg, train_df, val_df, train_t, val_t)


Labels: ['HER2(+)', 'Luminal A', 'Luminal B', 'Triple negative']


In [31]:
# Cell 3 - Model, criterion, optimizer, scheduler

model = build_model(cfg, num_classes=len(unique_labels), device=device)
criterion, optimizer, scheduler = create_criterion_optimizer_scheduler(
    cfg, model, train_df, device
)


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to C:\Users\danie/.cache\torch\hub\checkpoints\resnet50-0676ba61.pth


100.0%
100.0%


In [7]:
# Cell 4 - Training

best_state, history = train_model(
    cfg, model, train_loader, val_loader, criterion, optimizer, scheduler, device
)
model.load_state_dict(best_state)

# Final report on validation
val_loss, val_acc, y_true, y_pred = evaluate(model, val_loader, criterion, device)
print("Val acc:", val_acc)
print(classification_report(y_true, y_pred, target_names=unique_labels))


Starting training - experiment: test
Backbone: resnet18  |  img_size: 224  |  epochs: 1

--------------------------------------------------------------
[Epoch 1/1]
    [Batch  71/71] loss=1.3981  f1=0.3082
  Train - loss: 1.3981  |  f1: 0.3082
  Val   - loss: 1.3969  |  f1: 0.3104
  >> New best model! val_f1 improved to 0.3104

Training finished for experiment: test
Best validation F1 (macro): 0.3104
Val acc: 0.3104267413794574
                 precision    recall  f1-score   support

        HER2(+)       0.46      0.24      0.31        80
      Luminal A       0.61      0.23      0.33        83
      Luminal B       0.36      0.66      0.46        89
Triple negative       0.11      0.16      0.13        31

       accuracy                           0.36       283
      macro avg       0.39      0.32      0.31       283
   weighted avg       0.43      0.36      0.35       283



In [None]:
# Cell 5 - Inference and submission

test_loader, test_files = create_test_loader(cfg, val_t)
run_inference_and_save(cfg, model, test_loader, idx_to_label, device)

In [32]:
# Optional: Run augmented training experiment
# Imports for augmentation pipeline
from includes.augmentation_pipeline import run_augmented_experiment

# Configure augmentation parameters (adjust as needed)
AUG_PARAMS = {
    "flip_p": 0.5,                    # Horizontal flip probability
    "rotation_degrees": 15,           # Max rotation angle
    "use_color_jitter": True,         # Enable color jitter
    "random_erasing_p": 0.2,          # Random erasing probability
    "num_workers": 0,                 # 0 for Windows notebooks
    "save_submission": True,          # Save submission CSV
    "submission_name": "submission_augmented.csv",
}

# Run complete augmented training pipeline
aug_model, aug_history = run_augmented_experiment(
    cfg=cfg,
    train_df=train_df,
    val_df=val_df,
    unique_labels=unique_labels,
    idx_to_label=idx_to_label,
    device=device,
    val_t=val_t,
    **AUG_PARAMS
)


STEP 1: Loading images into memory
Loading 1129 images...
  Loaded 100/1129 images
  Loaded 100/1129 images
  Loaded 200/1129 images
  Loaded 200/1129 images
  Loaded 300/1129 images
  Loaded 300/1129 images
  Loaded 400/1129 images
  Loaded 400/1129 images
  Loaded 500/1129 images
  Loaded 500/1129 images
  Loaded 600/1129 images
  Loaded 600/1129 images
  Loaded 700/1129 images
  Loaded 700/1129 images
  Loaded 800/1129 images
  Loaded 800/1129 images
  Loaded 900/1129 images
  Loaded 900/1129 images
  Loaded 1000/1129 images
  Loaded 1000/1129 images
  Loaded 1100/1129 images
  Loaded 1100/1129 images
  Loaded 1129/1129 images
  Loaded 1129/1129 images
Loaded 1129 images with shape (1129, 384, 384, 3)
Loading 283 images...
Loaded 1129 images with shape (1129, 384, 384, 3)
Loading 283 images...
  Loaded 100/283 images
  Loaded 100/283 images
  Loaded 200/283 images
  Loaded 200/283 images
  Loaded 283/283 images
Loaded 283 images with shape (283, 384, 384, 3)

Train set: (1129, 384,

KeyboardInterrupt: 

In [None]:
# Cell 5 - Inference and submission

test_loader, test_files = create_test_loader(cfg, val_t)
run_inference_and_save(cfg, aug_model, test_loader, idx_to_label, device, output_csv="submission_augmented.csv")