In [None]:
from pathlib import Path
import pandas as pd
from src.dataio import scan_pairs, load_image, load_mask
from src.features import masked_stats_rgb, shape_features, symmetry_features, texture_glcm, edge_density
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
import joblib

# Recharger X_train/y + re-entraîner le meilleur modèle
train = pd.read_csv('outputs/features_train_labeled.csv')
feat_cols = [c for c in train.columns if c not in ['id','bug_type','species']]
X = train[feat_cols].values; y = train['bug_type'].astype(str).values

scaler = StandardScaler().fit(X)
Xz = scaler.transform(X)
clf = SVC(kernel='rbf', probability=True).fit(Xz, y)

# Extraire features pour les IDs 251-347
pairs_test = scan_pairs(Path('data/images'), Path('data/masks'))
pairs_test = [t for t in pairs_test if 251 <= t[0] <= 347]
rows = []
for _id, img_p, msk_p in pairs_test:
    img = load_image(img_p); msk = load_mask(msk_p)
    f = {'id': _id}
    f |= masked_stats_rgb(img, msk)
    f |= shape_features(msk)
    f |= symmetry_features(msk)
    f |= texture_glcm(img, msk)
    f |= edge_density(img, msk)
    rows.append(f)

dfT = pd.DataFrame(rows).sort_values('id')
Xtest = scaler.transform(dfT[feat_cols].values)
pred = clf.predict(Xtest)
sub = pd.DataFrame({'ID': dfT['id'].astype(int), 'bug type': pred})
sub.to_csv('outputs/submission.csv', index=False)
sub.head()
