# Layer Classifier Training

Run this notebook from the project root (`CAD AI`) to build a fitted `layer_clf.pkl`.


In [None]:
import pathlib, joblib, pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.calibration import CalibratedClassifierCV
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

root = pathlib.Path().resolve()
df = pd.read_csv(root / 'ml' / 'datasets' / 'labeled.csv', on_bad_lines='skip').dropna(subset=['Content', 'Layer'])

pipe = Pipeline([
    ('tfidf', TfidfVectorizer(token_pattern=r'[A-Za-z0-9\.]+')),
    ('clf',   CalibratedClassifierCV(LinearSVC(C=10.0), cv=5))
])

In [None]:
# Train / test split and fit
X_train, X_test, y_train, y_test = train_test_split(
    df['Content'], df['Layer'], test_size=0.2, random_state=42
)

pipe.fit(X_train, y_train)
print(classification_report(y_test, pipe.predict(X_test)))

In [None]:
# Guard to ensure we are saving a fitted model
assert hasattr(pipe.named_steps['tfidf'], 'vocabulary_'), 'Vectorizer not fitted!'

out = root / 'ml' / 'artifacts'
out.mkdir(parents=True, exist_ok=True)
joblib.dump(pipe, out / 'layer_clf.pkl')
print('✓ Model saved to', out / 'layer_clf.pkl')