## SETUP

In [1]:
# MUST be first cell: set multiprocessing method for Windows
import torch.multiprocessing as mp
try:
    mp.set_start_method("spawn", force=True)
except RuntimeError:
    pass  # Already set

In [2]:
import os
import sys
from pathlib import Path

# FOR LOCAL USE THIS LINES
current = Path.cwd()
src_path = current / "src" if (current / "src").exists() else current.parent

# FOR COLAB USE THIS LINE INSTEAD
# BRANCH_NAME = "main"  # Change this to switch branches
# !git clone -b {BRANCH_NAME} https://github.com/MatteoCamillo-code/GeoLoc-CVCS.git
# !cd /content/GeoLoc-CVCS && git pull origin {BRANCH_NAME} && cd ..
# src_path = Path("/content/GeoLoc-CVCS/src").resolve()

sys.path.insert(0, str(src_path))

from utils.paths import find_project_root

# Set working directory and sys.path properly
project_root = find_project_root(src_path)
data_dir = project_root / "data"
history_dir = project_root / "outputs" / "history"
checkpoint_dir = project_root / "outputs" / "checkpoints"
os.chdir(project_root)
sys.path.insert(0, str(project_root / "src"))
print("CWD:", Path.cwd())

CWD: F:\InfTech\Prodotti\Python\GeoLocGit\GeoLoc-CVCS


In [3]:
import pandas as pd
import torch
import torch.nn as nn
from torch.optim.lr_scheduler import StepLR

from configs.baseline_multi_head import TrainConfig

cfg = TrainConfig()
if cfg.backbone == "resnet50":
    from torchvision.models import resnet50, ResNet50_Weights
elif cfg.backbone == "inceptionv4":
    import timm

from utils.seed import seed_everything
from utils.paths import get_current_version

from models.multi_head_classifier import MultiHeadClassifier


In [4]:
cfg = TrainConfig()
seed_everything(cfg.seed)

device = cfg.device if torch.cuda.is_available() else "cpu"
print("Device:", device)


Device: cuda


## DATA LOADER

In [5]:
import kagglehub

path = kagglehub.dataset_download("josht000/osv-mini-129k")
path = path + "/osv5m"
print("Path to dataset files:", path)

image_root = path + "/train_images"

  from .autonotebook import tqdm as notebook_tqdm


Path to dataset files: C:\Users\camil\.cache\kagglehub\datasets\josht000\osv-mini-129k\versions\1/osv5m


In [6]:
train_val_path = data_dir / "metadata/s2-geo-cells/train_val_split_geocells.csv"
cell_centers_path = data_dir / "metadata/s2-geo-cells/cell_center_dataset.csv"
cells_hierarchy_path = data_dir / "metadata/s2-geo-cells/cell_hierarchy_dataset.csv"

train_val_meta = pd.read_csv(train_val_path)
cell_centers_df = pd.read_csv(cell_centers_path)
cells_hierarchy_df = pd.read_csv(cells_hierarchy_path)

# remove duplicates with same cell_id
cell_centers_df = cell_centers_df.drop_duplicates(subset='cell_id_token')

# Set S2 cell ID as index (assumes first column or 'cell_id' column)
if 'cell_id_token' in cell_centers_df.columns:
    cell_centers_df = cell_centers_df.set_index('cell_id_token')
else:
    # Set first column as index if it contains cell IDs
    cell_centers_df = cell_centers_df.set_index(cell_centers_df.columns[0])
    

print("Train/val CSV:", train_val_path)
print("Cell centers CSV:", cell_centers_path)
print("Cells hierarchy CSV:", cells_hierarchy_path)

Train/val CSV: F:\InfTech\Prodotti\Python\GeoLocGit\GeoLoc-CVCS\data\metadata\s2-geo-cells\train_val_split_geocells.csv
Cell centers CSV: F:\InfTech\Prodotti\Python\GeoLocGit\GeoLoc-CVCS\data\metadata\s2-geo-cells\cell_center_dataset.csv
Cells hierarchy CSV: F:\InfTech\Prodotti\Python\GeoLocGit\GeoLoc-CVCS\data\metadata\s2-geo-cells\cell_hierarchy_dataset.csv


In [7]:
from dataset.dataloader_utils import create_dataloaders

IMG_SIZE = 224

# Create all dataloaders with a single function call
loader_dict = create_dataloaders(
    image_root=image_root,
    csv_path=train_val_path,
    batch_size=cfg.batch_size,
    num_workers=cfg.num_workers,
    img_size=IMG_SIZE,
    seed=cfg.seed,
    train_subset_pct=cfg.train_size_pct,
    val_subset_pct=cfg.val_size_pct,
    scenes=cfg.scenes,
    augment=True,
    prefetch_factor=4,
    persistent_workers=True if cfg.num_workers > 0 else False,
    coarse_label_idx=cfg.coarse_label_idx,
)

##  LOAD MODEL 

In [8]:
# LOAD MODEL FROM CHECKPOINT

from src.utils.checkpointing import load_checkpoint


scene = cfg.scenes[0]
base_name = f"{cfg.model_name}_{scene}"
version = get_current_version(history_dir, base_name)
ckpt_path = checkpoint_dir / f"{base_name}_v{version}.pt"
print("Checkpoint:", ckpt_path)

backbone = None
FEAT_DIM = 0
if cfg.backbone == "inceptionv4":
    backbone = timm.create_model('inception_v4', pretrained=True)
    FEAT_DIM = 1536  # inception_v4 feature dimension
elif cfg.backbone == "resnet50":
    weights = ResNet50_Weights.IMAGENET1K_V2
    backbone = resnet50(weights=weights)
    FEAT_DIM = 2048  # resnet50 feature dimension  
    
backbone = nn.Sequential(*list(backbone.children())[:-1], nn.Flatten(1))
num_classes = list(map(
    lambda idx: len(loader_dict[scene]["label_maps"][f"label_config_{idx + 1}"]),
    cfg.coarse_label_idx
))
model = MultiHeadClassifier(
    backbone=backbone,
    feat_dim=FEAT_DIM,
    head_dims=num_classes,
    dropout=cfg.dropout,
    coarse_level_idx=cfg.coarse_label_idx,
).to(device)

ckpt = load_checkpoint(str(ckpt_path), model, map_location=device)
model.eval()
print("Loaded epoch:", ckpt.get("epoch"))

Checkpoint: F:\InfTech\Prodotti\Python\GeoLocGit\GeoLoc-CVCS\outputs\checkpoints\baseline_multi_head_total_v0.pt
Loaded epoch: 28


In [9]:
from src.training.losses import CrossEntropyWithLabelSmoothing
criterion = CrossEntropyWithLabelSmoothing(ignore_index=-1, smoothing=cfg.label_smoothing)

In [10]:
# start inference code here
from pprint import pprint

from src.training.engine import evaluate

evaluations = {}
for scene in cfg.scenes:
    evaluation = evaluate(
        model,
        loader_dict[scene]["val_loader"],
        cell_centers_df,
        cells_hierarchy_df,
        loader_dict[scene]["label_maps"],
        criterion,
        cfg.device,
        gps_method=cfg.gps_method,
        amp=cfg.amp,
        use_tqdm=cfg.use_tqdm,
    )
    evaluations[scene] = evaluation
    
print("Evaluations:")
pprint(evaluations, width=120, sort_dicts=False)

                                            

TypeError: unhashable type: 'numpy.ndarray'