# Layer Classifier Training

Run this notebook from the project root (`CAD AI`) to build a fitted `layer_clf.pkl`.


In [None]:

import pathlib, pandas as pd, joblib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.calibration import CalibratedClassifierCV
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

repo_root = pathlib.Path(__file__).resolve().parent          # notebook is now in root
data_path = repo_root / "ml" / "datasets" / "labeled.csv"
assert data_path.exists(), f"{data_path} not found"

df = pd.read_csv(data_path, on_bad_lines="skip").dropna(subset=["Content", "Layer"])

pipe = Pipeline([
    ("tfidf", TfidfVectorizer(token_pattern=r"[A-Za-z0-9\.]+")),
    ("clf", CalibratedClassifierCV(LinearSVC(C=10.0), cv=5))
])


In [None]:
# Train / test split and fit
X_train, X_test, y_train, y_test = train_test_split(
    df['Content'], df['Layer'], test_size=0.2, random_state=42
)

pipe.fit(X_train, y_train)
print(classification_report(y_test, pipe.predict(X_test)))

In [None]:

out_path = repo_root / "ml" / "artifacts" / "layer_clf.pkl"
assert hasattr(pipe.named_steps["tfidf"], "vocabulary_")   # make sure fitted
joblib.dump(pipe, out_path)
print("✓ Model saved to", out_path)
