In [None]:
# + tags=["parameters"]
import os
import sys

# -------------------------------
# REPO-ROOT HANDLING (stable)
# -------------------------------

# Get absolute path to repo root based on the notebooks folder
# notebooks folder is fixed, so ".." from notebooks is repo root
NOTEBOOK_DIR = os.path.dirname(os.path.abspath("__file__"))  # fallback, ignored
try:
    # __file__ does not exist in notebook, so fallback to current working dir
    NOTEBOOK_DIR = os.getcwd()
except NameError:
    pass

REPO_ROOT = os.path.abspath(os.path.join(NOTEBOOK_DIR, ".."))  # stable, one level up
os.chdir(REPO_ROOT)                 # set working dir to repo root
if REPO_ROOT not in sys.path:
    sys.path.append(REPO_ROOT)      # add src/ to Python path

print("Repo root:", REPO_ROOT)
print("Current working directory:", os.getcwd())

In [None]:
import pandas as pd
from src.utils import load_processed
from src.train import train_model, save_model
from src.eval import evaluate_model, update_registry
from sklearn.model_selection import train_test_split

In [None]:
df = load_processed(REPO_ROOT, "data/processed/features.csv")
X = df.drop("target", axis=1)
y = df["target"]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
model = train_model(X_train, y_train)
metrics = evaluate_model(model, X_test, y_test)

model_file = save_model(
    model,
    model_name="random_forest",
    root=REPO_ROOT,
    accuracy=metrics["accuracy"]
)
update_registry(model_file, metrics, REPO_ROOT, "registry/models.json")

In [None]:
print(f"Model saved to {model_file}")
print("Metrics:", metrics)