<a href="https://colab.research.google.com/github/Dylan-Geraci/neuroimaging-tumor-detector/blob/main/notebooks/02_model_training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Model Training

## Notebook Setup

Import Libraries

In [8]:
# --- Standard ---
import os, json, math, time
from collections import Counter

# --- Numerical / data ---
import numpy as np
import pandas as pd

# --- Imaging & plotting ---
from PIL import Image

# --- Torch / ML ---
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# --- Metrics ---
from sklearn.metrics import f1_score, accuracy_score

# --- Vision ---
from torchvision import transforms

# --- Pretrained models ---
import timm

Set Training Data Path

In [2]:
drive.mount('/content/drive')
TRAIN_PATH = "/content/drive/MyDrive/neuro-imaging/data/Training"

Mounted at /content/drive


In [6]:
USE_SAVED_SPLITS = False
SPLITS_DIR = "/content/drive/MyDrive/neuro-imaging/splits"

## Path and Constants

In [9]:
TRAIN_PATH = "/content/drive/MyDrive/neuro-imaging/data/Training"
SPLITS_DIR = "/content/drive/MyDrive/neuro-imaging/splits"
OUT_DIR    = "/content/drive/MyDrive/neuro-imaging/models"
os.makedirs(OUT_DIR, exist_ok=True)

IMG_SIZE = 224
BATCH_SIZE = 32
LR = 3e-4
EPOCHS = 10
PATIENCE = 2
SEED = 42

## Reproducing

In [10]:
def set_seed(seed=SEED):
    import random
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(SEED)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

Device: cpu


## Load Splits and Class Maps

In [11]:
def _read_paths(txt_path):
    with open(txt_path, "r") as f:
        return [ln.strip() for ln in f if ln.strip()]

with open(os.path.join(SPLITS_DIR, "class_to_idx.json"), "r") as f:
    class_to_idx = json.load(f)
idx_to_class = {v:k for k,v in class_to_idx.items()}
num_classes = len(class_to_idx)
print("Classes:", [idx_to_class[i] for i in range(num_classes)])

train_paths = _read_paths(os.path.join(SPLITS_DIR, "train.txt"))
val_paths   = _read_paths(os.path.join(SPLITS_DIR, "val.txt"))
print(f"Loaded {len(train_paths)} train, {len(val_paths)} val files")


Classes: ['glioma', 'meningioma', 'notumor', 'pituitary']
Loaded 4855 train, 857 val files
