# Layer Classifier Training

Run this notebook from the project root (`CAD AI`) to build a fitted `layer_clf.pkl`.


In [1]:
import pathlib, pandas as pd, joblib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Use the notebook’s working directory to locate your repo
repo_root = pathlib.Path().resolve()
data_path = repo_root / "ml" / "datasets" / "labeled.csv"
assert data_path.exists(), f"{data_path} not found"

df = pd.read_csv(data_path, on_bad_lines="skip").dropna(subset=["Content", "Layer"])

# Pipeline with TF–IDF and a Logistic Regression classifier
pipe = Pipeline([
    ("tfidf", TfidfVectorizer(token_pattern=r"[A-Za-z0-9\\.]+")),
    ("clf", LogisticRegression(max_iter=1000, multi_class="auto"))
])


In [2]:
# Train/test split and fit
X_train, X_test, y_train, y_test = train_test_split(
    df['Content'], df['Layer'], test_size=0.2, random_state=42
)

pipe.fit(X_train, y_train)
print(classification_report(y_test, pipe.predict(X_test)))



                                 precision    recall  f1-score   support

                              0       0.56      0.75      0.64      1603
                             10       0.00      0.00      0.00         1
                             12       0.00      0.00      0.00         1
                             13       0.00      0.00      0.00         3
                             20       0.00      0.00      0.00         4
                             21       0.00      0.00      0.00        10
                             26       0.00      0.00      0.00         6
                             33       0.67      0.25      0.36         8
                             35       0.72      0.84      0.78        85
                             37       0.00      0.00      0.00         1
                             38       0.00      0.00      0.00         4
                             39       0.00      0.00      0.00         2
                             40       0.00      0.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [3]:
out_path = repo_root / "ml" / "artifacts" / "layer_clf.pkl"
assert hasattr(pipe.named_steps["tfidf"], "vocabulary_")  # make sure fitted
out_path.parent.mkdir(parents=True, exist_ok=True)  # ensure the directory exists
joblib.dump(pipe, out_path)
print("✓ Model saved to", out_path)

✓ Model saved to C:\Users\Jesse 2025\Desktop\CAD AI\ml\artifacts\layer_clf.pkl
