In [16]:
from pathlib import Path 

CSV_PATH   = Path("../data/Data_Entry_2017.csv").resolve()
IMAGE_DIR  = Path("../images-224").resolve()
OUTPUT_DIR = Path("../output").resolve()

print(f"CSV:    {CSV_PATH}")
print(f"Images: {IMAGE_DIR}")
print(f"Output: {OUTPUT_DIR}")


CSV:    /data/Data_Entry_2017.csv
Images: /images-224
Output: /output


In [None]:
import os
import pandas as pd

csv_path = r"C:\Users\albaa\Desktop\X-Ray-Chest\Data_Entry_2017.csv"
dataset = pd.read_csv(csv_path)


print(dataset.shape)
print(dataset.head())



FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\albaa\\Desktop\\X-Ray-Chest\\Data_Entry_2017.csv'

In [14]:
import os

# entra esplicitamente nella cartella src
os.chdir("C://Users//albaa//Desktop//X-Ray-Chest//src")

# verifica
print("CWD:", os.getcwd())
print("Files:", os.listdir("."))



FileNotFoundError: [Errno 2] No such file or directory: 'C://Users//albaa//Desktop//X-Ray-Chest//src'

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim

from dataloader import create_dataloader
from dataset import Dataset
from model import MultiLabelResNet


ModuleNotFoundError: No module named 'dataloader'

In [None]:
# ═══════════════════════════════════════════════════════════════════════════════
# CELLA 3 — PIPELINE DATI
# ═══════════════════════════════════════════════════════════════════════════════

dataset = Dataset(CSV_PATH)
dataset.load_data()
dataset.clean_column_names()
dataset.clean_labels(column_name="findinglabels")
dataset.one_hot_encode_labels(column="label")
dataset.select_relevant_columns()

train_df, val_df, test_df = dataset.patient_split(test_size=0.15, val_size=0.15)

label_columns = [c for c in dataset.data.columns if c not in ["image", "patientid"]]
num_classes = len(label_columns)

print(f"Train: {len(train_df)} | Val: {len(val_df)} | Test: {len(test_df)}")
print(f"Classi: {num_classes}")

train_loader, val_loader, test_loader = create_dataloader(
    train_df, val_df, test_df,
    image_dir=IMAGE_DIR,
    batch_size=32,
    num_workers=2
)

In [None]:
# ═══════════════════════════════════════════════════════════════════════════════
# CELLA 4 — MODELLO
# ═══════════════════════════════════════════════════════════════════════════════

model = MultiLabelResNet(num_classes=num_classes, pretrained=True)
model = model.to(device)

print(f"Modello: ResNet50 → {num_classes} classi")
print(f"Parametri: {sum(p.numel() for p in model.parameters()):,}")

In [None]:
# ═══════════════════════════════════════════════════════════════════════════════
# CELLA 5 — TRAINING
# ═══════════════════════════════════════════════════════════════════════════════

NUM_EPOCHS = 10
LR = 1e-4

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=LR)

best_val_loss = float('inf')
best_state = None

for epoch in range(NUM_EPOCHS):
    # Train
    model.train()
    train_loss = 0.0
    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1} [Train]", leave=False):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        loss = criterion(model(images), labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    train_loss /= len(train_loader)
    
    # Validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc=f"Epoch {epoch+1} [Val]", leave=False):
            images, labels = images.to(device), labels.to(device)
            val_loss += criterion(model(images), labels).item()
    val_loss /= len(val_loader)
    
    # Best model
    marker = ""
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_state = model.state_dict().copy()
        marker = " ★"
    
    print(f"Epoch {epoch+1:02d} | Train: {train_loss:.4f} | Val: {val_loss:.4f}{marker}")

print(f"\nBest val loss: {best_val_loss:.4f}")

In [None]:
# ═══════════════════════════════════════════════════════════════════════════════
# CELLA 6 — SALVATAGGIO
# ═══════════════════════════════════════════════════════════════════════════════

os.makedirs(OUTPUT_DIR, exist_ok=True)

model_path = os.path.join(OUTPUT_DIR, "best_model.pth")
torch.save({
    'model_state_dict': best_state,
    'num_classes': num_classes,
    'label_columns': label_columns,
    'val_loss': best_val_loss
}, model_path)

print(f"Modello salvato: {model_path}")