In [2]:
# ============================================================
# CELL X: NumPy compatibility patch for MAPS
# ============================================================
# MAPS uses np.Inf, which was removed in NumPy >= 2.0
# This patch restores it safely.

import numpy as np

if not hasattr(np, "Inf"):
    np.Inf = np.inf
    print("ü©π Patched np.Inf -> np.inf for MAPS compatibility")
else:
    print("‚úÖ np.Inf already exists")


ü©π Patched np.Inf -> np.inf for MAPS compatibility


In [3]:
# ============================================================
# CELL 0: Clone original MAPS repository
# ============================================================

MAPS_GITHUB_URL = "https://github.com/mahmoodlab/MAPS.git"
MAPS_DIR = "/content/MAPS"

# Clone only if not already present
if not os.path.exists(MAPS_DIR):
    !git clone {MAPS_GITHUB_URL} {MAPS_DIR}
else:
    print("üì¶ MAPS repo already exists")

# Add MAPS to Python path
import sys
if MAPS_DIR not in sys.path:
    sys.path.insert(0, MAPS_DIR)

print("‚úÖ MAPS repo ready and added to sys.path")


Cloning into '/content/MAPS'...
remote: Enumerating objects: 299, done.[K
remote: Counting objects: 100% (67/67), done.[K
remote: Compressing objects: 100% (45/45), done.[K
remote: Total 299 (delta 19), reused 43 (delta 7), pack-reused 232 (from 1)[K
Receiving objects: 100% (299/299), 157.54 MiB | 35.17 MiB/s, done.
Resolving deltas: 100% (95/95), done.
Updating files: 100% (137/137), done.
‚úÖ MAPS repo ready and added to sys.path


In [1]:
# ============================================================
# CELL 1: Environment Setup
# ============================================================
# Purpose:
# - Mount Google Drive
# - Define project paths
# - Import core libraries
# - Sanity-check environment
#
# NOTE:
# - Do NOT do any pip installs in later cells.
# - If you install dependencies here, expect ONE runtime restart.
# ============================================================

# ---------- 1. Mount Google Drive ----------
from google.colab import drive
drive.mount('/content/drive')

# ---------- 2. Define project root ----------
# Adjust this path ONLY if your repo lives elsewhere
PROJECT_ROOT = "/content/drive/MyDrive/MAPS-update-main"

import os
assert os.path.exists(PROJECT_ROOT), (
    f"‚ùå Project root not found at {PROJECT_ROOT}\n"
    "Check the path or move the repo into Drive."
)

print(f"‚úÖ Project root found: {PROJECT_ROOT}")

# ---------- 3. Define data paths ----------
DATA_PROCESSED = os.path.join(PROJECT_ROOT, "cHL_CODEX_processed")
DATA_SPATIAL   = os.path.join(PROJECT_ROOT, "cHL_CODEX_spatial_features")

assert os.path.exists(DATA_PROCESSED), "‚ùå cHL_CODEX_processed not found"
assert os.path.exists(DATA_SPATIAL),   "‚ùå cHL_CODEX_spatial_features not found"

print("‚úÖ Data directories found")

# ---------- 4. Core imports ----------
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix

# ---------- 5. Torch check (for later MAPS training) ----------
import torch

print("‚úÖ Core imports successful")
print(f"üñ•Ô∏è CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"üñ•Ô∏è GPU: {torch.cuda.get_device_name(0)}")

# ---------- 6. Global random seed (reproducibility) ----------
SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

print("üîí Random seed set")
print("üöÄ Cell 1 complete")


Mounted at /content/drive
‚úÖ Project root found: /content/drive/MyDrive/MAPS-update-main
‚úÖ Data directories found
‚úÖ Core imports successful
üñ•Ô∏è CUDA available: True
üñ•Ô∏è GPU: Tesla T4
üîí Random seed set
üöÄ Cell 1 complete


In [5]:
# ============================================================
# CELL 2: Load datasets and sanity checks
# ============================================================
# Purpose:
# - Load processed spatial CSVs
# - Verify shapes and columns
# - Confirm required fields exist before KNN
# ============================================================

# ---------- 1. File paths ----------
TRAIN_PATH = os.path.join(DATA_SPATIAL, "train.csv")
VALID_PATH = os.path.join(DATA_SPATIAL, "valid.csv")
CLASS_NAMES_PATH = os.path.join(DATA_SPATIAL, "class_names.csv")

for p in [TRAIN_PATH, VALID_PATH, CLASS_NAMES_PATH]:
    assert os.path.exists(p), f"‚ùå Missing file: {p}"

print("‚úÖ All CSV files found")

# ---------- 2. Load CSVs ----------
train_df = pd.read_csv(TRAIN_PATH)
valid_df = pd.read_csv(VALID_PATH)
class_names = pd.read_csv(CLASS_NAMES_PATH)

print(f"üìä Train shape: {train_df.shape}")
print(f"üìä Valid shape: {valid_df.shape}")
print(f"üìä Num classes: {len(class_names)}")

# ---------- 3. Required columns ----------
REQUIRED_COLS = [
    "X_cent",
    "Y_cent",
    "cell_label",
    "cellSize"
]

missing_train = [c for c in REQUIRED_COLS if c not in train_df.columns]
missing_valid = [c for c in REQUIRED_COLS if c not in valid_df.columns]

assert len(missing_train) == 0, f"‚ùå Train missing columns: {missing_train}"
assert len(missing_valid) == 0, f"‚ùå Valid missing columns: {missing_valid}"

print("‚úÖ Required columns present")

# ---------- 4. Identify marker columns ----------
# Marker columns = everything except labels and spatial/meta fields
EXCLUDE_COLS = set([
    "X_cent",
    "Y_cent",
    "cell_label",
    "cellSize",
    "local_density",
    "distance_to_center"
])

marker_cols = [c for c in train_df.columns if c not in EXCLUDE_COLS]

print(f"üß¨ Number of marker columns: {len(marker_cols)}")
print("üß¨ Example markers:", marker_cols[:10])

# ---------- 5. Label sanity check ----------
num_unique_labels = train_df["cell_label"].nunique()
assert num_unique_labels == len(class_names), (
    f"‚ùå Label mismatch: train has {num_unique_labels}, "
    f"class_names has {len(class_names)}"
)

print("üè∑Ô∏è Label sanity check passed")

# ---------- 6. Coordinate sanity check ----------
print("üìç Coordinate ranges (train):")
print("  X_cent:", train_df["X_cent"].min(), "‚Üí", train_df["X_cent"].max())
print("  Y_cent:", train_df["Y_cent"].min(), "‚Üí", train_df["Y_cent"].max())

# ---------- 7. Final confirmation ----------
print("üöÄ Cell 2 complete: data loaded and validated")


‚úÖ All CSV files found
üìä Train shape: (114984, 55)
üìä Valid shape: (28746, 55)
üìä Num classes: 16
‚úÖ Required columns present
üß¨ Number of marker columns: 49
üß¨ Example markers: ['BCL.2', 'CCR6', 'CD11b', 'CD11c', 'CD15', 'CD16', 'CD162', 'CD163', 'CD2', 'CD20']
üè∑Ô∏è Label sanity check passed
üìç Coordinate ranges (train):
  X_cent: 3 ‚Üí 8082
  Y_cent: 2 ‚Üí 8006
üöÄ Cell 2 complete: data loaded and validated


In [6]:
# ============================================================
# CELL 3: Define feature groups
# ============================================================
# Purpose:
# - Explicitly define which features are used where
# - Freeze marker lists for reproducibility
# ============================================================

# ---------- 1. Core columns ----------
SPATIAL_COORDS = ["X_cent", "Y_cent"]
LABEL_COL = "cell_label"
META_COLS = ["cellSize"]

print("üìå Spatial coords:", SPATIAL_COORDS)
print("üìå Label column:", LABEL_COL)
print("üìå Meta columns:", META_COLS)

# ---------- 2. Marker columns (protein expression) ----------
# Exclude spatial, label, and engineered spatial features
EXCLUDE_COLS = set(SPATIAL_COORDS + META_COLS + [
    LABEL_COL,
    "local_density",
    "distance_to_center"
])

marker_cols = [c for c in train_df.columns if c not in EXCLUDE_COLS]

print(f"üß¨ Total protein markers available: {len(marker_cols)}")

# ---------- 3. Select markers for KNN neighborhood summaries ----------
# We intentionally keep this small and biologically meaningful

KNN_MARKERS = [
    "CD4",     # Helper T cells
    "CD8",     # Cytotoxic T cells
    "CD20",    # B cells
    "CD68",    # Macrophages
    "CD163"    # M2 macrophages
]

# Verify all selected markers exist in the dataset
missing_knn_markers = [m for m in KNN_MARKERS if m not in marker_cols]
assert len(missing_knn_markers) == 0, (
    f"‚ùå Missing KNN markers: {missing_knn_markers}"
)

print("üß¨ Markers used for KNN summaries:", KNN_MARKERS)

# ---------- 4. Feature accounting ----------
BASE_FEATURES = marker_cols + META_COLS + [
    "local_density",
    "distance_to_center"
]

print(f"üìê Baseline feature count (before KNN): {len(BASE_FEATURES)}")

# Planned KNN features
PLANNED_KNN_FEATURES = (
    ["knn_mean_dist", "knn_std_dist"] +
    [f"knn_mean_{m}" for m in KNN_MARKERS]
)

print("üß© Planned KNN features:")
for f in PLANNED_KNN_FEATURES:
    print("  -", f)

print(f"üìê KNN feature count: {len(PLANNED_KNN_FEATURES)}")
print(f"üìê Total features after KNN: {len(BASE_FEATURES) + len(PLANNED_KNN_FEATURES)}")

# ---------- 5. Final confirmation ----------
print("üöÄ Cell 3 complete: feature groups defined")


üìå Spatial coords: ['X_cent', 'Y_cent']
üìå Label column: cell_label
üìå Meta columns: ['cellSize']
üß¨ Total protein markers available: 49
üß¨ Markers used for KNN summaries: ['CD4', 'CD8', 'CD20', 'CD68', 'CD163']
üìê Baseline feature count (before KNN): 52
üß© Planned KNN features:
  - knn_mean_dist
  - knn_std_dist
  - knn_mean_CD4
  - knn_mean_CD8
  - knn_mean_CD20
  - knn_mean_CD68
  - knn_mean_CD163
üìê KNN feature count: 7
üìê Total features after KNN: 59
üöÄ Cell 3 complete: feature groups defined


In [7]:
# ============================================================
# CELL 4: Fit spatial KNN on TRAIN set
# ============================================================
# Purpose:
# - Build spatial neighborhoods using (X_cent, Y_cent)
# - TRAIN set only (no leakage)
# - Inspect distances for sanity before feature computation
# ============================================================

# ---------- 1. KNN parameters ----------
K = 5  # start simple, do NOT tune yet
print(f"üî¢ Using K = {K} spatial neighbors")

# ---------- 2. Extract spatial coordinates (TRAIN) ----------
train_coords = train_df[SPATIAL_COORDS].values

print("üìç Train coordinates shape:", train_coords.shape)

# ---------- 3. Fit KNN ----------
# n_neighbors = K + 1 because the closest neighbor is the cell itself
knn = NearestNeighbors(
    n_neighbors=K + 1,
    metric="euclidean",
    algorithm="ball_tree"
)

knn.fit(train_coords)

# ---------- 4. Query neighbors ----------
distances, indices = knn.kneighbors(train_coords)

print("üìê Raw distance matrix shape:", distances.shape)
print("üìê Raw index matrix shape:", indices.shape)

# ---------- 5. Drop self-neighbor ----------
# distances[:, 0] == 0 (distance to itself)
train_knn_distances = distances[:, 1:]
train_knn_indices = indices[:, 1:]

print("üìê KNN distances (no self):", train_knn_distances.shape)
print("üìê KNN indices (no self):", train_knn_indices.shape)

# ---------- 6. Sanity checks ----------
# Check that no self-indices remain
assert not np.any(train_knn_indices == np.arange(len(train_df))[:, None]), \
    "‚ùå Self-neighbors still present"

# Check distance statistics
print("üìä Distance statistics (TRAIN):")
print("  Mean:", np.mean(train_knn_distances))
print("  Std :", np.std(train_knn_distances))
print("  Min :", np.min(train_knn_distances))
print("  Max :", np.max(train_knn_distances))

# ---------- 7. Inspect a few neighborhoods ----------
for i in [0, 1, 2]:
    print(f"\nüîç Cell {i} neighbors:")
    print("  Indices :", train_knn_indices[i])
    print("  Distances:", np.round(train_knn_distances[i], 2))

# ---------- 8. Final confirmation ----------
print("üöÄ Cell 4 complete: spatial KNN fitted on TRAIN set")


üî¢ Using K = 5 spatial neighbors
üìç Train coordinates shape: (114984, 2)
üìê Raw distance matrix shape: (114984, 6)
üìê Raw index matrix shape: (114984, 6)
üìê KNN distances (no self): (114984, 5)
üìê KNN indices (no self): (114984, 5)
üìä Distance statistics (TRAIN):
  Mean: 22.714084337068496
  Std : 7.63694344975816
  Min : 5.0
  Max : 297.6390431378249

üîç Cell 0 neighbors:
  Indices : [176 213 466 663 451]
  Distances: [18.97 23.09 26.63 32.06 35.23]

üîç Cell 1 neighbors:
  Indices : [266 267 112 577 357]
  Distances: [11.66 14.42 23.35 27.66 29.68]

üîç Cell 2 neighbors:
  Indices : [239 537 594 807 334]
  Distances: [15.26 26.25 36.24 40.52 42.06]
üöÄ Cell 4 complete: spatial KNN fitted on TRAIN set


In [8]:
# ============================================================
# CELL 5: Compute label-free KNN features (TRAIN set)
# ============================================================
# Purpose:
# - Generate per-cell KNN spatial summary features
# - Label-free, safe for training
# - Append features to train_df
# ============================================================

# ---------- 1. Initialize feature containers ----------
n_cells = train_df.shape[0]

knn_mean_dist = np.zeros(n_cells)
knn_std_dist  = np.zeros(n_cells)

# One array per marker
knn_mean_marker = {
    m: np.zeros(n_cells) for m in KNN_MARKERS
}

# ---------- 2. Compute KNN distance features ----------
knn_mean_dist[:] = train_knn_distances.mean(axis=1)
knn_std_dist[:]  = train_knn_distances.std(axis=1)

# ---------- 3. Compute KNN marker-mean features ----------
for i in range(n_cells):
    neighbors = train_knn_indices[i]
    for m in KNN_MARKERS:
        knn_mean_marker[m][i] = train_df.loc[neighbors, m].mean()

# ---------- 4. Append features to DataFrame ----------
train_knn_df = train_df.copy()

train_knn_df["knn_mean_dist"] = knn_mean_dist
train_knn_df["knn_std_dist"]  = knn_std_dist

for m in KNN_MARKERS:
    train_knn_df[f"knn_mean_{m}"] = knn_mean_marker[m]

# ---------- 5. Sanity checks ----------
print("üìê Train shape before KNN:", train_df.shape)
print("üìê Train shape after  KNN:", train_knn_df.shape)

print("\nüß™ Example KNN features (first 5 rows):")
display(
    train_knn_df[
        ["knn_mean_dist", "knn_std_dist"] +
        [f"knn_mean_{m}" for m in KNN_MARKERS]
    ].head()
)

# ---------- 6. Final confirmation ----------
print("üöÄ Cell 5 complete: KNN features computed for TRAIN set")


üìê Train shape before KNN: (114984, 55)
üìê Train shape after  KNN: (114984, 62)

üß™ Example KNN features (first 5 rows):


Unnamed: 0,knn_mean_dist,knn_std_dist,knn_mean_CD4,knn_mean_CD8,knn_mean_CD20,knn_mean_CD68,knn_mean_CD163
0,27.195556,5.882322,0.012218,0.052724,0.768365,7.2e-05,0.01137
1,21.353924,7.142123,0.273338,0.14274,0.003212,0.027929,0.020681
2,32.065914,10.048739,0.191928,0.002662,0.037227,0.042715,0.017929
3,21.366459,4.987426,0.06455,0.14724,7e-06,0.46982,0.212313
4,27.775427,9.06232,0.354901,0.033046,0.150913,0.154144,0.015737


üöÄ Cell 5 complete: KNN features computed for TRAIN set


In [9]:
# ============================================================
# CELL 6: Compute label-free KNN features (VALID set)
# ============================================================
# Purpose:
# - Compute KNN features for validation data
# - KNN fit and query are done on VALID set only
# - Ensure column alignment with TRAIN KNN dataset
# ============================================================

# ---------- 1. Extract spatial coordinates (VALID) ----------
valid_coords = valid_df[SPATIAL_COORDS].values
print("üìç Valid coordinates shape:", valid_coords.shape)

# ---------- 2. Fit KNN on VALID set ----------
knn_valid = NearestNeighbors(
    n_neighbors=K + 1,   # self + K neighbors
    metric="euclidean",
    algorithm="ball_tree"
)

knn_valid.fit(valid_coords)

# ---------- 3. Query neighbors ----------
valid_distances, valid_indices = knn_valid.kneighbors(valid_coords)

# Drop self-neighbor
valid_knn_distances = valid_distances[:, 1:]
valid_knn_indices   = valid_indices[:, 1:]

print("üìê Valid KNN distances shape:", valid_knn_distances.shape)
print("üìê Valid KNN indices shape:", valid_knn_indices.shape)

# ---------- 4. Distance feature computation ----------
n_valid = valid_df.shape[0]

valid_knn_mean_dist = valid_knn_distances.mean(axis=1)
valid_knn_std_dist  = valid_knn_distances.std(axis=1)

# ---------- 5. Marker-mean feature computation ----------
valid_knn_mean_marker = {
    m: np.zeros(n_valid) for m in KNN_MARKERS
}

for i in range(n_valid):
    neighbors = valid_knn_indices[i]
    for m in KNN_MARKERS:
        valid_knn_mean_marker[m][i] = valid_df.loc[neighbors, m].mean()

# ---------- 6. Append features to VALID DataFrame ----------
valid_knn_df = valid_df.copy()

valid_knn_df["knn_mean_dist"] = valid_knn_mean_dist
valid_knn_df["knn_std_dist"]  = valid_knn_std_dist

for m in KNN_MARKERS:
    valid_knn_df[f"knn_mean_{m}"] = valid_knn_mean_marker[m]

# ---------- 7. Sanity checks ----------
print("üìê Valid shape before KNN:", valid_df.shape)
print("üìê Valid shape after  KNN:", valid_knn_df.shape)

print("\nüß™ Example VALID KNN features (first 5 rows):")
display(
    valid_knn_df[
        ["knn_mean_dist", "knn_std_dist"] +
        [f"knn_mean_{m}" for m in KNN_MARKERS]
    ].head()
)

# ---------- 8. Column alignment check ----------
assert list(train_knn_df.columns) == list(valid_knn_df.columns), (
    "‚ùå Train and Valid columns do not match after KNN feature generation"
)

print("‚úÖ Train/Valid column alignment confirmed")
print("üöÄ Cell 6 complete: KNN features computed for VALID set")


üìç Valid coordinates shape: (28746, 2)
üìê Valid KNN distances shape: (28746, 5)
üìê Valid KNN indices shape: (28746, 5)
üìê Valid shape before KNN: (28746, 55)
üìê Valid shape after  KNN: (28746, 62)

üß™ Example VALID KNN features (first 5 rows):


Unnamed: 0,knn_mean_dist,knn_std_dist,knn_mean_CD4,knn_mean_CD8,knn_mean_CD20,knn_mean_CD68,knn_mean_CD163
0,33.216251,20.636877,0.180109,0.01428,0.11658,0.105766,0.001879
1,42.482285,16.53649,0.165847,0.254602,0.191149,0.330677,0.170045
2,46.031657,3.080026,0.352765,0.074465,0.152807,0.244423,0.017929
3,25.853591,10.742058,0.326376,0.011663,0.329317,0.044188,0.021225
4,70.536415,3.288482,0.144776,0.163763,0.408329,0.167229,0.009162


‚úÖ Train/Valid column alignment confirmed
üöÄ Cell 6 complete: KNN features computed for VALID set


In [10]:
# ============================================================
# CELL 7: Save KNN-augmented datasets
# ============================================================
# Purpose:
# - Persist KNN-augmented TRAIN/VALID CSVs
# - Keep results reproducible and reusable
# ============================================================

# ---------- 1. Define output directory ----------
OUTPUT_DIR = os.path.join(PROJECT_ROOT, "cHL_CODEX_spatial_knn_features")
os.makedirs(OUTPUT_DIR, exist_ok=True)

print(f"üìÅ Output directory: {OUTPUT_DIR}")

# ---------- 2. Define output paths ----------
TRAIN_OUT_PATH = os.path.join(OUTPUT_DIR, "train.csv")
VALID_OUT_PATH = os.path.join(OUTPUT_DIR, "valid.csv")
CLASS_OUT_PATH = os.path.join(OUTPUT_DIR, "class_names.csv")

# ---------- 3. Save CSVs ----------
train_knn_df.to_csv(TRAIN_OUT_PATH, index=False)
valid_knn_df.to_csv(VALID_OUT_PATH, index=False)

# Copy class names unchanged for consistency
class_names.to_csv(CLASS_OUT_PATH, index=False)

print("üíæ Saved files:")
print("  -", TRAIN_OUT_PATH)
print("  -", VALID_OUT_PATH)
print("  -", CLASS_OUT_PATH)

# ---------- 4. Verification ----------
# Reload and check shapes
_train_check = pd.read_csv(TRAIN_OUT_PATH)
_valid_check = pd.read_csv(VALID_OUT_PATH)

assert _train_check.shape == train_knn_df.shape, "‚ùå Train CSV shape mismatch after save"
assert _valid_check.shape == valid_knn_df.shape, "‚ùå Valid CSV shape mismatch after save"

print("‚úÖ Reload verification passed")

# ---------- 5. Summary ----------
print("\nüìê Feature summary:")
print("  Baseline features :", len(train_df.columns))
print("  KNN features added:", len(PLANNED_KNN_FEATURES))
print("  Total features    :", len(train_knn_df.columns))

print("üöÄ Cell 7 complete: KNN-augmented datasets saved")


üìÅ Output directory: /content/drive/MyDrive/MAPS-update-main/cHL_CODEX_spatial_knn_features
üíæ Saved files:
  - /content/drive/MyDrive/MAPS-update-main/cHL_CODEX_spatial_knn_features/train.csv
  - /content/drive/MyDrive/MAPS-update-main/cHL_CODEX_spatial_knn_features/valid.csv
  - /content/drive/MyDrive/MAPS-update-main/cHL_CODEX_spatial_knn_features/class_names.csv
‚úÖ Reload verification passed

üìê Feature summary:
  Baseline features : 55
  KNN features added: 7
  Total features    : 62
üöÄ Cell 7 complete: KNN-augmented datasets saved


In [11]:
# ============================================================
# CELL 7.5: Make MAPS importable in Colab
# ============================================================
# Purpose:
# - Ensure original MAPS repo is discoverable by Python
# - No pip install, no runtime restart
# ============================================================

import sys
import os

MAPS_REPO_PATH = "/content/MAPS"

assert os.path.exists(MAPS_REPO_PATH), (
    "‚ùå MAPS repo not found at /content/MAPS\n"
    "Run CELL 0 (git clone MAPS) first."
)

# Add MAPS repo to Python path
if MAPS_REPO_PATH not in sys.path:
    sys.path.insert(0, MAPS_REPO_PATH)

print("üì¶ MAPS repo added to sys.path")

# Test imports
try:
    from maps.cell_phenotyping.trainer import Trainer
    from maps.cell_phenotyping.datasets import CellExpressionCSV
    print("‚úÖ MAPS imports successful")
except Exception as e:
    print("‚ùå MAPS import failed")
    raise e


üì¶ MAPS repo added to sys.path
‚úÖ MAPS imports successful


In [12]:
# ============================================================
# CELL 8: Baseline MLP training (repo-aligned)
# ============================================================
# Uses:
# - CellExpressionCSV (dataset utility)
# - Custom MLP (same style as teammate)
# - Manual train / validate loop
# ============================================================

import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score, f1_score
from tqdm import tqdm
import tempfile
import os # Import os module to set environment variable

# Disable torch.compile to avoid Triton-related compatibility issues
# This is a workaround for the 'AttributeError: module 'triton.backends' has no attribute 'compiler''
os.environ['TORCH_COMPILE_DISABLE'] = '1'

# ---------- 1. Paths ----------
TRAIN_CSV_ORIGINAL = os.path.join(DATA_SPATIAL, "train.csv")
VALID_CSV_ORIGINAL = os.path.join(DATA_SPATIAL, "valid.csv")

assert os.path.exists(TRAIN_CSV_ORIGINAL)
assert os.path.exists(VALID_CSV_ORIGINAL)

print("üìÇ Original Baseline CSVs:")
print(" ", TRAIN_CSV_ORIGINAL)
print(" ", VALID_CSV_ORIGINAL)

# ---------- 1.5. Filter DataFrames to use only BASE_FEATURES + LABEL_COL ----------
# The CellExpressionCSV class by default takes all columns except LABEL_COL as features.
# To ensure it only uses BASE_FEATURES, we create filtered temporary CSVs.

train_df_filtered = train_df[BASE_FEATURES + [LABEL_COL]]
valid_df_filtered = valid_df[BASE_FEATURES + [LABEL_COL]]

with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp_train_file:
    train_df_filtered.to_csv(tmp_train_file.name, index=False)
    TRAIN_CSV = tmp_train_file.name

with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp_valid_file:
    valid_df_filtered.to_csv(tmp_valid_file.name, index=False)
    VALID_CSV = tmp_valid_file.name

print("üìÇ Filtered Temp CSVs for dataset loading:")
print(" ", TRAIN_CSV)
print(" ", VALID_CSV)

# ---------- 2. Dataset + DataLoader ----------
from maps.cell_phenotyping.datasets import CellExpressionCSV

BATCH_SIZE = 256
NUM_WORKERS = 2

train_dataset = CellExpressionCSV(TRAIN_CSV, is_train=True)
valid_dataset = CellExpressionCSV(
    VALID_CSV,
    is_train=False,
    mean=train_dataset.mean,
    std=train_dataset.std
)

train_loader = CellExpressionCSV.get_data_loader(
    train_dataset,
    batch_size=BATCH_SIZE,
    is_train=True,
    num_workers=NUM_WORKERS
)

valid_loader = CellExpressionCSV.get_data_loader(
    valid_dataset,
    batch_size=BATCH_SIZE,
    is_train=False,
    num_workers=NUM_WORKERS
)

print(f"üìä Train samples: {len(train_dataset)}")
print(f"üìä Valid samples: {len(valid_dataset)}")

# ---------- 3. Model ----------
from maps.cell_phenotyping.networks import MLP

# Manually define NUM_FEATURES and NUM_CLASSES based on previous cells
# This now correctly aligns with the filtered dataframes.
NUM_FEATURES = len(BASE_FEATURES)
NUM_CLASSES  = len(class_names)

model = MLP(
    input_dim=NUM_FEATURES,
    hidden_dim=512,
    num_classes=NUM_CLASSES,
    dropout=0.10
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

print(f"üß† Model: MLP (6-layer implicit)")
print(f"üßÆ Input dim: {NUM_FEATURES}, Classes: {NUM_CLASSES}")

# ---------- 4. Optimizer & Loss ----------
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# ---------- 5. Training loop ----------
MAX_EPOCHS = 50
PATIENCE = 5

best_val_loss = float("inf")
patience_counter = 0

for epoch in range(1, MAX_EPOCHS + 1):
    # ---- Train ----
    model.train()
    train_loss = 0.0

    for x, y in tqdm(train_loader, desc=f"Epoch {epoch} [Train]", leave=False):
        x, y = x.to(device), y.to(device)
        x = x.to(torch.float32) # Ensure input is float32

        optimizer.zero_grad()
        logits, _ = model(x)
        loss = criterion(logits, y)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    train_loss /= len(train_loader)

    # ---- Validate ----
    model.eval()
    val_loss = 0.0
    all_preds, all_labels = [], []

    with torch.no_grad():
        for x, y in tqdm(valid_loader, desc=f"Epoch {epoch} [Valid]", leave=False):
            x, y = x.to(device), y.to(device)
            x = x.to(torch.float32) # Ensure input is float32
            logits, _ = model(x)
            loss = criterion(logits, y)

            val_loss += loss.item()
            all_preds.append(logits.argmax(dim=1).cpu())
            all_labels.append(y.cpu())

    val_loss /= len(valid_loader)
    all_preds = torch.cat(all_preds)
    all_labels = torch.cat(all_labels)

    val_acc = accuracy_score(all_labels, all_preds)
    val_f1  = f1_score(all_labels, all_preds, average="macro")

    print(
        f"Epoch {epoch:02d} | "
        f"Train Loss: {train_loss:.4f} | "
        f"Val Loss: {val_loss:.4f} | "
        f"Val Acc: {val_acc:.4f} | "
        f"Val F1: {val_f1:.4f}"
    )

    # ---- Early stopping ----
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience_counter = 0
        best_state = model.state_dict()
    else:
        patience_counter += 1
        if patience_counter >= PATIENCE:
            print("‚èπÔ∏è Early stopping triggered")
            break

# ---------- 6. Load best model ----------
model.load_state_dict(best_state)
print("‚úÖ Best model loaded")

# ---------- 7. Final evaluation ----------
model.eval()
all_preds, all_labels = [], []

with torch.no_grad():
    for x, y in valid_loader:
        x, y = x.to(device), y.to(device)
        x = x.to(torch.float32) # Ensure input is float32
        logits, _ = model(x)
        all_preds.append(logits.argmax(dim=1).cpu())
        all_labels.append(y.cpu())

all_preds = torch.cat(all_preds)
all_labels = torch.cat(all_labels)

baseline_acc = accuracy_score(all_labels, all_preds)
baseline_f1  = f1_score(all_labels, all_preds, average="macro")

print("\nüìà BASELINE RESULTS")
print(f"Accuracy : {baseline_acc:.4f}")
print(f"Macro F1 : {baseline_f1:.4f}")

print("üöÄ Cell 8 complete: baseline MLP trained")

# ---------- 8. Clean up temporary CSVs ----------
os.remove(TRAIN_CSV)
os.remove(VALID_CSV)
print("üßπ Cleaned up temporary CSV files.")

üìÇ Original Baseline CSVs:
  /content/drive/MyDrive/MAPS-update-main/cHL_CODEX_spatial_features/train.csv
  /content/drive/MyDrive/MAPS-update-main/cHL_CODEX_spatial_features/valid.csv
üìÇ Filtered Temp CSVs for dataset loading:
  /tmp/tmpt7k32uis.csv
  /tmp/tmpza5p2y6l.csv
üìä Train samples: 114984
üìä Valid samples: 28746
üß† Model: MLP (6-layer implicit)
üßÆ Input dim: 52, Classes: 16




Epoch 01 | Train Loss: 2.6698 | Val Loss: 2.2550 | Val Acc: 0.2590 | Val F1: 0.2263




Epoch 02 | Train Loss: 1.9804 | Val Loss: 1.9638 | Val Acc: 0.3568 | Val F1: 0.3345




Epoch 03 | Train Loss: 1.7923 | Val Loss: 1.8624 | Val Acc: 0.3615 | Val F1: 0.3569




Epoch 04 | Train Loss: 1.5749 | Val Loss: 1.5394 | Val Acc: 0.4547 | Val F1: 0.4624




Epoch 05 | Train Loss: 1.3524 | Val Loss: 1.4546 | Val Acc: 0.4953 | Val F1: 0.5139




Epoch 06 | Train Loss: 1.2452 | Val Loss: 1.4157 | Val Acc: 0.5225 | Val F1: 0.5386




Epoch 07 | Train Loss: 1.1720 | Val Loss: 1.3168 | Val Acc: 0.5560 | Val F1: 0.5737




Epoch 08 | Train Loss: 1.0869 | Val Loss: 1.2118 | Val Acc: 0.5916 | Val F1: 0.6047




Epoch 09 | Train Loss: 1.0117 | Val Loss: 1.1434 | Val Acc: 0.6140 | Val F1: 0.6227




Epoch 10 | Train Loss: 0.9699 | Val Loss: 1.1259 | Val Acc: 0.6237 | Val F1: 0.6307




Epoch 11 | Train Loss: 0.9197 | Val Loss: 1.0633 | Val Acc: 0.6414 | Val F1: 0.6406




Epoch 12 | Train Loss: 0.8869 | Val Loss: 1.0158 | Val Acc: 0.6567 | Val F1: 0.6569




Epoch 13 | Train Loss: 0.8569 | Val Loss: 1.0168 | Val Acc: 0.6559 | Val F1: 0.6601




Epoch 14 | Train Loss: 0.8248 | Val Loss: 0.9487 | Val Acc: 0.6759 | Val F1: 0.6737




Epoch 15 | Train Loss: 0.7944 | Val Loss: 0.9280 | Val Acc: 0.6849 | Val F1: 0.6817




Epoch 16 | Train Loss: 0.7630 | Val Loss: 0.9506 | Val Acc: 0.6750 | Val F1: 0.6793




Epoch 17 | Train Loss: 0.7411 | Val Loss: 0.9383 | Val Acc: 0.6764 | Val F1: 0.6823




Epoch 18 | Train Loss: 0.7230 | Val Loss: 0.8925 | Val Acc: 0.6904 | Val F1: 0.6875




Epoch 19 | Train Loss: 0.7031 | Val Loss: 0.8524 | Val Acc: 0.7082 | Val F1: 0.7063




Epoch 20 | Train Loss: 0.6902 | Val Loss: 0.8479 | Val Acc: 0.7065 | Val F1: 0.7066




Epoch 21 | Train Loss: 0.6772 | Val Loss: 0.8584 | Val Acc: 0.7038 | Val F1: 0.7058




Epoch 22 | Train Loss: 0.6667 | Val Loss: 0.8494 | Val Acc: 0.7039 | Val F1: 0.7030




Epoch 23 | Train Loss: 0.6527 | Val Loss: 0.7950 | Val Acc: 0.7239 | Val F1: 0.7212




Epoch 24 | Train Loss: 0.6407 | Val Loss: 0.8175 | Val Acc: 0.7169 | Val F1: 0.7177




Epoch 25 | Train Loss: 0.6299 | Val Loss: 0.7878 | Val Acc: 0.7247 | Val F1: 0.7242




Epoch 26 | Train Loss: 0.6223 | Val Loss: 0.7794 | Val Acc: 0.7273 | Val F1: 0.7246




Epoch 27 | Train Loss: 0.6074 | Val Loss: 0.7515 | Val Acc: 0.7365 | Val F1: 0.7312




Epoch 28 | Train Loss: 0.5943 | Val Loss: 0.7724 | Val Acc: 0.7296 | Val F1: 0.7295




Epoch 29 | Train Loss: 0.5937 | Val Loss: 0.7421 | Val Acc: 0.7374 | Val F1: 0.7321




Epoch 30 | Train Loss: 0.5808 | Val Loss: 0.7146 | Val Acc: 0.7496 | Val F1: 0.7434




Epoch 31 | Train Loss: 0.5687 | Val Loss: 0.7252 | Val Acc: 0.7459 | Val F1: 0.7400




Epoch 32 | Train Loss: 0.5644 | Val Loss: 0.7030 | Val Acc: 0.7507 | Val F1: 0.7428




Epoch 33 | Train Loss: 0.5559 | Val Loss: 0.6886 | Val Acc: 0.7583 | Val F1: 0.7517




Epoch 34 | Train Loss: 0.5496 | Val Loss: 0.6893 | Val Acc: 0.7573 | Val F1: 0.7492




Epoch 35 | Train Loss: 0.5378 | Val Loss: 0.6692 | Val Acc: 0.7635 | Val F1: 0.7556




Epoch 36 | Train Loss: 0.5304 | Val Loss: 0.6761 | Val Acc: 0.7593 | Val F1: 0.7516




Epoch 37 | Train Loss: 0.5190 | Val Loss: 0.6766 | Val Acc: 0.7592 | Val F1: 0.7518




Epoch 38 | Train Loss: 0.5217 | Val Loss: 0.6809 | Val Acc: 0.7566 | Val F1: 0.7503




Epoch 39 | Train Loss: 0.5157 | Val Loss: 0.6555 | Val Acc: 0.7671 | Val F1: 0.7600




Epoch 40 | Train Loss: 0.5066 | Val Loss: 0.6327 | Val Acc: 0.7764 | Val F1: 0.7672




Epoch 41 | Train Loss: 0.4970 | Val Loss: 0.6307 | Val Acc: 0.7742 | Val F1: 0.7664




Epoch 42 | Train Loss: 0.4958 | Val Loss: 0.6200 | Val Acc: 0.7775 | Val F1: 0.7693




Epoch 43 | Train Loss: 0.4868 | Val Loss: 0.6274 | Val Acc: 0.7763 | Val F1: 0.7691




Epoch 44 | Train Loss: 0.4819 | Val Loss: 0.6171 | Val Acc: 0.7794 | Val F1: 0.7705




Epoch 45 | Train Loss: 0.4757 | Val Loss: 0.6241 | Val Acc: 0.7749 | Val F1: 0.7673




Epoch 46 | Train Loss: 0.4779 | Val Loss: 0.6524 | Val Acc: 0.7672 | Val F1: 0.7630




Epoch 47 | Train Loss: 0.4637 | Val Loss: 0.6303 | Val Acc: 0.7727 | Val F1: 0.7655




Epoch 48 | Train Loss: 0.4691 | Val Loss: 0.5912 | Val Acc: 0.7877 | Val F1: 0.7779




Epoch 49 | Train Loss: 0.4598 | Val Loss: 0.5878 | Val Acc: 0.7859 | Val F1: 0.7764


                                                                   

Epoch 50 | Train Loss: 0.4603 | Val Loss: 0.5827 | Val Acc: 0.7892 | Val F1: 0.7801
‚úÖ Best model loaded





üìà BASELINE RESULTS
Accuracy : 0.7892
Macro F1 : 0.7801
üöÄ Cell 8 complete: baseline MLP trained
üßπ Cleaned up temporary CSV files.


CELL 9 ‚Äî KNN-Augmented Baseline (Spatial Features ‚Üí MLP)

Goal:
Create KNN spatial summary features ‚Üí append to input ‚Üí retrain same MLP ‚Üí compare against baseline (0.7892 / 0.7801).

No architecture changes. No optimizer changes.

In [13]:
# ============================================================
# CELL 9.1: KNN feature configuration
# ============================================================

from sklearn.neighbors import NearestNeighbors
import numpy as np

K = 5  # spatial neighbors (keep fixed for first experiment)

# Choose a SMALL, biologically meaningful subset
KNN_MARKERS = [
    "CD4",
    "CD8",
    "CD20",
    "CD68",
    "CD45"
]

print("üî¢ KNN configuration")
print("K =", K)
print("Markers used for KNN summaries:", KNN_MARKERS)


üî¢ KNN configuration
K = 5
Markers used for KNN summaries: ['CD4', 'CD8', 'CD20', 'CD68', 'CD45']


In [14]:
# ============================================================
# CELL 9.2: Compute KNN spatial features
# ============================================================

def add_knn_features(df, k, marker_cols):
    coords = df[["X_cent", "Y_cent"]].values

    knn = NearestNeighbors(n_neighbors=k + 1, metric="euclidean")
    knn.fit(coords)

    distances, indices = knn.kneighbors(coords)

    # Drop self-neighbor
    distances = distances[:, 1:]
    indices = indices[:, 1:]

    # Distance features
    df["knn_mean_dist"] = distances.mean(axis=1)
    df["knn_std_dist"]  = distances.std(axis=1)

    # Marker neighborhood means
    for m in marker_cols:
        df[f"knn_mean_{m}"] = np.array([
            df.iloc[neighbors][m].mean()
            for neighbors in indices
        ])

    return df


In [15]:
# ============================================================
# CELL 9.3: Apply KNN features
# ============================================================

train_df_knn = add_knn_features(train_df.copy(), K, KNN_MARKERS)
valid_df_knn = add_knn_features(valid_df.copy(), K, KNN_MARKERS)

KNN_FEATURES = (
    ["knn_mean_dist", "knn_std_dist"] +
    [f"knn_mean_{m}" for m in KNN_MARKERS]
)

print("üß¨ KNN features added:")
for f in KNN_FEATURES:
    print(" ", f)

print("Total new features:", len(KNN_FEATURES))


üß¨ KNN features added:
  knn_mean_dist
  knn_std_dist
  knn_mean_CD4
  knn_mean_CD8
  knn_mean_CD20
  knn_mean_CD68
  knn_mean_CD45
Total new features: 7


In [16]:
# ============================================================
# CELL 9.4: Create KNN-augmented CSVs for training
# ============================================================

import tempfile

FEATURES_WITH_KNN = BASE_FEATURES + KNN_FEATURES

train_df_knn_filtered = train_df_knn[FEATURES_WITH_KNN + [LABEL_COL]]
valid_df_knn_filtered = valid_df_knn[FEATURES_WITH_KNN + [LABEL_COL]]

with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp_train:
    train_df_knn_filtered.to_csv(tmp_train.name, index=False)
    TRAIN_CSV_KNN = tmp_train.name

with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp_valid:
    valid_df_knn_filtered.to_csv(tmp_valid.name, index=False)
    VALID_CSV_KNN = tmp_valid.name

print("üìÇ KNN-augmented CSVs:")
print(" ", TRAIN_CSV_KNN)
print(" ", VALID_CSV_KNN)
print("üßÆ Total input features:", len(FEATURES_WITH_KNN))


üìÇ KNN-augmented CSVs:
  /tmp/tmpd6cawyw8.csv
  /tmp/tmpcjd29v1o.csv
üßÆ Total input features: 59


In [17]:
# ============================================================
# CELL 9.5: KNN-augmented MLP training (same as baseline)
# ============================================================

import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score, f1_score
from tqdm import tqdm
import tempfile
import os

# Disable torch.compile to avoid Triton-related issues (same as Cell 8)
os.environ['TORCH_COMPILE_DISABLE'] = '1'

# ---------- 1. Create filtered temp CSVs (BASE + KNN features) ----------
train_df_knn_filtered = train_df_knn[FEATURES_WITH_KNN + [LABEL_COL]]
valid_df_knn_filtered = valid_df_knn[FEATURES_WITH_KNN + [LABEL_COL]]

with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp_train:
    train_df_knn_filtered.to_csv(tmp_train.name, index=False)
    TRAIN_CSV_KNN = tmp_train.name

with tempfile.NamedTemporaryFile(suffix=".csv", delete=False) as tmp_valid:
    valid_df_knn_filtered.to_csv(tmp_valid.name, index=False)
    VALID_CSV_KNN = tmp_valid.name

print("üìÇ KNN-augmented temp CSVs:")
print(" ", TRAIN_CSV_KNN)
print(" ", VALID_CSV_KNN)

# ---------- 2. Dataset + DataLoader ----------
from maps.cell_phenotyping.datasets import CellExpressionCSV

BATCH_SIZE = 256
NUM_WORKERS = 2

train_dataset = CellExpressionCSV(TRAIN_CSV_KNN, is_train=True)
valid_dataset = CellExpressionCSV(
    VALID_CSV_KNN,
    is_train=False,
    mean=train_dataset.mean,
    std=train_dataset.std
)

train_loader = CellExpressionCSV.get_data_loader(
    train_dataset,
    batch_size=BATCH_SIZE,
    is_train=True,
    num_workers=NUM_WORKERS
)

valid_loader = CellExpressionCSV.get_data_loader(
    valid_dataset,
    batch_size=BATCH_SIZE,
    is_train=False,
    num_workers=NUM_WORKERS
)

print(f"üìä Train samples: {len(train_dataset)}")
print(f"üìä Valid samples: {len(valid_dataset)}")

# ---------- 3. Model ----------
from maps.cell_phenotyping.networks import MLP

NUM_FEATURES = len(FEATURES_WITH_KNN)
NUM_CLASSES  = len(class_names)

model = MLP(
    input_dim=NUM_FEATURES,
    hidden_dim=512,
    num_classes=NUM_CLASSES,
    dropout=0.10
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

print("üß† Model: MLP (KNN-augmented)")
print(f"üßÆ Input dim: {NUM_FEATURES}, Classes: {NUM_CLASSES}")

# ---------- 4. Optimizer & Loss ----------
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# ---------- 5. Training loop ----------
MAX_EPOCHS = 50
PATIENCE = 5

best_val_loss = float("inf")
patience_counter = 0

for epoch in range(1, MAX_EPOCHS + 1):
    # ---- Train ----
    model.train()
    train_loss = 0.0

    for x, y in tqdm(train_loader, desc=f"Epoch {epoch} [Train]", leave=False):
        x, y = x.to(device), y.to(device)
        x = x.to(torch.float32)

        optimizer.zero_grad()
        logits, _ = model(x)
        loss = criterion(logits, y)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    train_loss /= len(train_loader)

    # ---- Validate ----
    model.eval()
    val_loss = 0.0
    all_preds, all_labels = [], []

    with torch.no_grad():
        for x, y in tqdm(valid_loader, desc=f"Epoch {epoch} [Valid]", leave=False):
            x, y = x.to(device), y.to(device)
            x = x.to(torch.float32)
            logits, _ = model(x)
            loss = criterion(logits, y)

            val_loss += loss.item()
            all_preds.append(logits.argmax(dim=1).cpu())
            all_labels.append(y.cpu())

    val_loss /= len(valid_loader)
    all_preds = torch.cat(all_preds)
    all_labels = torch.cat(all_labels)

    val_acc = accuracy_score(all_labels, all_preds)
    val_f1  = f1_score(all_labels, all_preds, average="macro")

    print(
        f"Epoch {epoch:02d} | "
        f"Train Loss: {train_loss:.4f} | "
        f"Val Loss: {val_loss:.4f} | "
        f"Val Acc: {val_acc:.4f} | "
        f"Val F1: {val_f1:.4f}"
    )

    # ---- Early stopping ----
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience_counter = 0
        best_state = model.state_dict()
    else:
        patience_counter += 1
        if patience_counter >= PATIENCE:
            print("‚èπÔ∏è Early stopping triggered")
            break

# ---------- 6. Load best model ----------
model.load_state_dict(best_state)
print("‚úÖ Best KNN model loaded")

# ---------- 7. Final evaluation ----------
model.eval()
all_preds, all_labels = [], []

with torch.no_grad():
    for x, y in valid_loader:
        x, y = x.to(device), y.to(device)
        x = x.to(torch.float32)
        logits, _ = model(x)
        all_preds.append(logits.argmax(dim=1).cpu())
        all_labels.append(y.cpu())

all_preds = torch.cat(all_preds)
all_labels = torch.cat(all_labels)

knn_acc = accuracy_score(all_labels, all_preds)
knn_f1  = f1_score(all_labels, all_preds, average="macro")

print("\nüìà KNN-AUGMENTED RESULTS")
print(f"Accuracy : {knn_acc:.4f}")
print(f"Macro F1 : {knn_f1:.4f}")

# ---------- 8. Cleanup ----------
os.remove(TRAIN_CSV_KNN)
os.remove(VALID_CSV_KNN)
print("üßπ Temporary KNN CSV files removed")
print("üöÄ Cell 9.5 complete: KNN-augmented MLP trained")


üìÇ KNN-augmented temp CSVs:
  /tmp/tmpa_1mkkr6.csv
  /tmp/tmpj61yn2kh.csv
üìä Train samples: 114984
üìä Valid samples: 28746
üß† Model: MLP (KNN-augmented)
üßÆ Input dim: 59, Classes: 16




Epoch 01 | Train Loss: 2.6680 | Val Loss: 2.3418 | Val Acc: 0.1123 | Val F1: 0.0816




Epoch 02 | Train Loss: 2.1554 | Val Loss: 2.0069 | Val Acc: 0.3032 | Val F1: 0.2988




Epoch 03 | Train Loss: 1.8000 | Val Loss: 1.8242 | Val Acc: 0.3654 | Val F1: 0.3470




Epoch 04 | Train Loss: 1.5706 | Val Loss: 1.6109 | Val Acc: 0.4223 | Val F1: 0.4194




Epoch 05 | Train Loss: 1.3876 | Val Loss: 1.5510 | Val Acc: 0.4316 | Val F1: 0.4467




Epoch 06 | Train Loss: 1.2873 | Val Loss: 1.4755 | Val Acc: 0.4751 | Val F1: 0.4939




Epoch 07 | Train Loss: 1.2092 | Val Loss: 1.4209 | Val Acc: 0.4981 | Val F1: 0.5223




Epoch 08 | Train Loss: 1.1259 | Val Loss: 1.3085 | Val Acc: 0.5436 | Val F1: 0.5593




Epoch 09 | Train Loss: 1.0419 | Val Loss: 1.2216 | Val Acc: 0.5713 | Val F1: 0.5854




Epoch 10 | Train Loss: 0.9800 | Val Loss: 1.1720 | Val Acc: 0.5944 | Val F1: 0.6074




Epoch 11 | Train Loss: 0.9323 | Val Loss: 1.1115 | Val Acc: 0.6135 | Val F1: 0.6217




Epoch 12 | Train Loss: 0.8885 | Val Loss: 1.1311 | Val Acc: 0.6110 | Val F1: 0.6250




Epoch 13 | Train Loss: 0.8509 | Val Loss: 1.0622 | Val Acc: 0.6360 | Val F1: 0.6444




Epoch 14 | Train Loss: 0.8207 | Val Loss: 0.9735 | Val Acc: 0.6640 | Val F1: 0.6628




Epoch 15 | Train Loss: 0.7972 | Val Loss: 0.9958 | Val Acc: 0.6586 | Val F1: 0.6609




Epoch 16 | Train Loss: 0.7760 | Val Loss: 0.9349 | Val Acc: 0.6800 | Val F1: 0.6756




Epoch 17 | Train Loss: 0.7522 | Val Loss: 0.9330 | Val Acc: 0.6801 | Val F1: 0.6757




Epoch 18 | Train Loss: 0.7332 | Val Loss: 0.8899 | Val Acc: 0.6978 | Val F1: 0.6914




Epoch 19 | Train Loss: 0.7166 | Val Loss: 0.8740 | Val Acc: 0.7021 | Val F1: 0.6957




Epoch 20 | Train Loss: 0.7011 | Val Loss: 0.8579 | Val Acc: 0.7075 | Val F1: 0.6982




Epoch 21 | Train Loss: 0.6875 | Val Loss: 0.8219 | Val Acc: 0.7180 | Val F1: 0.7059




Epoch 22 | Train Loss: 0.6704 | Val Loss: 0.8269 | Val Acc: 0.7186 | Val F1: 0.7079




Epoch 23 | Train Loss: 0.6653 | Val Loss: 0.7992 | Val Acc: 0.7248 | Val F1: 0.7123




Epoch 24 | Train Loss: 0.6366 | Val Loss: 0.8222 | Val Acc: 0.7192 | Val F1: 0.7118




Epoch 25 | Train Loss: 0.6348 | Val Loss: 0.7802 | Val Acc: 0.7331 | Val F1: 0.7259




Epoch 26 | Train Loss: 0.6264 | Val Loss: 0.7510 | Val Acc: 0.7395 | Val F1: 0.7285




Epoch 27 | Train Loss: 0.6032 | Val Loss: 0.7433 | Val Acc: 0.7431 | Val F1: 0.7329




Epoch 28 | Train Loss: 0.5998 | Val Loss: 0.7592 | Val Acc: 0.7389 | Val F1: 0.7305




Epoch 29 | Train Loss: 0.5924 | Val Loss: 0.7270 | Val Acc: 0.7488 | Val F1: 0.7380




Epoch 30 | Train Loss: 0.5827 | Val Loss: 0.7276 | Val Acc: 0.7471 | Val F1: 0.7379




Epoch 31 | Train Loss: 0.5703 | Val Loss: 0.7110 | Val Acc: 0.7538 | Val F1: 0.7429




Epoch 32 | Train Loss: 0.5604 | Val Loss: 0.6888 | Val Acc: 0.7594 | Val F1: 0.7453




Epoch 33 | Train Loss: 0.5577 | Val Loss: 0.7226 | Val Acc: 0.7500 | Val F1: 0.7414




Epoch 34 | Train Loss: 0.5454 | Val Loss: 0.6665 | Val Acc: 0.7688 | Val F1: 0.7575




Epoch 35 | Train Loss: 0.5421 | Val Loss: 0.6919 | Val Acc: 0.7589 | Val F1: 0.7482




Epoch 36 | Train Loss: 0.5269 | Val Loss: 0.6943 | Val Acc: 0.7591 | Val F1: 0.7476




Epoch 37 | Train Loss: 0.5286 | Val Loss: 0.6717 | Val Acc: 0.7642 | Val F1: 0.7517




Epoch 38 | Train Loss: 0.5162 | Val Loss: 0.6330 | Val Acc: 0.7755 | Val F1: 0.7628




Epoch 39 | Train Loss: 0.5120 | Val Loss: 0.6505 | Val Acc: 0.7703 | Val F1: 0.7610




Epoch 40 | Train Loss: 0.5103 | Val Loss: 0.6514 | Val Acc: 0.7719 | Val F1: 0.7619




Epoch 41 | Train Loss: 0.5006 | Val Loss: 0.6408 | Val Acc: 0.7731 | Val F1: 0.7607




Epoch 42 | Train Loss: 0.4978 | Val Loss: 0.6573 | Val Acc: 0.7698 | Val F1: 0.7611


                                                                   

Epoch 43 | Train Loss: 0.4845 | Val Loss: 0.6400 | Val Acc: 0.7761 | Val F1: 0.7678
‚èπÔ∏è Early stopping triggered
‚úÖ Best KNN model loaded





üìà KNN-AUGMENTED RESULTS
Accuracy : 0.7761
Macro F1 : 0.7678
üßπ Temporary KNN CSV files removed
üöÄ Cell 9.5 complete: KNN-augmented MLP trained
