In [None]:
import os
import sys
from pathlib import Path

# FOR LOCAL USE THIS LINES
# current = Path.cwd()
# src_path = current / "src" if (current / "src").exists() else current.parent

# FOR COLAB USE THIS LINE INSTEAD
BRANCH_NAME = "classification_resnet_refactoring"  # Change this to switch branches
!git clone -b {BRANCH_NAME} https://github.com/MatteoCamillo-code/GeoLoc-CVCS.git
!cd /content/GeoLoc-CVCS && git pull origin {BRANCH_NAME} && cd ..
src_path = Path("/content/GeoLoc-CVCS/src").resolve()

sys.path.insert(0, str(src_path))

from utils.paths import find_project_root

# Set working directory and sys.path properly
project_root = find_project_root(src_path)
data_dir = project_root / "data"
history_dir = project_root / "outputs" / "history"
os.chdir(project_root)
sys.path.insert(0, str(project_root / "src"))
print("CWD:", Path.cwd())

In [2]:
import os
import glob
import time
import pandas as pd
import torch
from tqdm import tqdm
from torch.utils.data import DataLoader

from classifier_model import SceneClassifier
from data_utils import ProjectPaths, SceneDataset, collate_fn
from merger_utils import update_original_file

# Initialize paths and model
paths = ProjectPaths()
classifier = SceneClassifier()

ModuleNotFoundError: No module named 'classifier_model'

In [None]:
# Configuration
batch_size = 64
num_workers = 2

# Search for images in the path defined in paths.base_image_path
image_files = glob.glob(os.path.join(paths.base_image_path, "**", "*.jpg"), recursive=True) + \
              glob.glob(os.path.join(paths.base_image_path, "**", "*.jpeg"), recursive=True) + \
              glob.glob(os.path.join(paths.base_image_path, "**", "*.png"), recursive=True)

print(f"Found {len(image_files)} images.")

dataset = SceneDataset(image_files, transform=classifier.transform)
dataloader = DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, 
                        pin_memory=True, collate_fn=collate_fn, shuffle=False)

results = []
start_time = time.time()

with torch.inference_mode():
    for imgs, paths_list in tqdm(dataloader):
        if imgs.nelement() == 0: continue
        imgs = imgs.to(classifier.device)
        logits = classifier.model(imgs)
        probs = torch.nn.functional.softmax(logits, dim=1).cpu().numpy()
        places_probs = np.matmul(probs, classifier.hierarchy_places3)
        preds = np.argmax(places_probs, axis=1)

        for path, pred_idx in zip(paths_list, preds):
            results.append({
                'filename': os.path.splitext(os.path.basename(path))[0],
                'true_label': os.path.basename(os.path.dirname(path)),
                'predicted_label': classifier.label_int_to_str(pred_idx)
            })

df_results = pd.DataFrame(results)
print(f"Process completed in {time.time() - start_time:.2f}s")

In [None]:
# 1. Update CSV files
update_original_file(df_results, paths.original_train_csv, "train_with_predictions.csv")
update_original_file(df_results, paths.original_test_csv, "test_with_predictions.csv")

# 2. Example: Classify one image and get its label back
sample_image = image_files[0]
label = classifier.classify_one_image(sample_image)
print(f"Single Image Classification: {sample_image} -> Label: {label}")