In [1]:
# 📁 notebooks/2_feature_engineering.ipynb

import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
import joblib

# Load cleaned data
df = pd.read_csv('../data/cleaned_data.csv')

# Encode target
df['mortalite'] = df['mortalite'].map({'oui': 1, 'non': 0})

# Define feature columns
categorical_cols = ['sexe', 'type_sanguin', 'maladie', 'id_service', 'medecin_traitant', 'personnel', 'id_medicament', 'TrancheAge']
numerical_cols = ['age', 'readmission', 'numIntervention', 'duree_sejour']

# Define preprocessor
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ]
)

# Apply preprocessor
X = df[numerical_cols + categorical_cols]
X_transformed = preprocessor.fit_transform(X)

# Save transformed features
pd.DataFrame(X_transformed.toarray()).to_csv('../data/features.csv', index=False)

# Save preprocessor
joblib.dump(preprocessor, '../data/model/feature_encoder.pkl')

print("✅ Preprocessor saved to ../data/model/feature_encoder.pkl")
print("Expected feature columns:", numerical_cols + categorical_cols)

✅ Preprocessor saved to ../data/model/feature_encoder.pkl
Expected feature columns: ['age', 'readmission', 'numIntervention', 'duree_sejour', 'sexe', 'type_sanguin', 'maladie', 'id_service', 'medecin_traitant', 'personnel', 'id_medicament', 'TrancheAge']
