# Layer Classification Model
This notebook loads feature CSVs generated from CAD drawings, trains a simple classifier to predict the appropriate layer for text entities based on their properties, and saves the trained model.

In [None]:
import glob
import pandas as pd
from pathlib import Path
# Load all feature CSVs from data/raw
data_dir = Path('ml/data/raw')
csv_files = list(data_dir.glob('*.features.csv'))
frames = []
for f in csv_files:
    df = pd.read_csv(f)
    df['source_file'] = f.name
    frames.append(df)
features = pd.concat(frames, ignore_index=True)
features.head()

In [None]:
# Example preprocessing: use simple heuristics for demo
# Here we will treat 'TextString' as the only feature and the target is 'Layer'
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

X = features['TextString']
y = features['Layer']

model = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('clf', LogisticRegression(max_iter=1000))
])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model.fit(X_train, y_train)
preds = model.predict(X_test)
print(classification_report(y_test, preds))


In [None]:
# Save the trained model
import joblib
import os
os.makedirs('ml/artifacts', exist_ok=True)
joblib.dump(model, 'ml/artifacts/layer_clf.pkl')
print('Model saved to ml/artifacts/layer_clf.pkl')
