In [16]:
import pandas as pd
import numpy as np
from sklearn import svm
import tensorflow as tf
from sklearn.metrics import classification_report
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

In [11]:
df = pd.read_parquet('datasets/seisme/datasets_seisme.parquet')
X = df.drop(columns=['catastrophe', 'date'])
y = df['catastrophe']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
df['catastrophe'].unique()

array(['aucun', 'seisme'], dtype=object)

In [13]:
# define and fit a linear SVM model
linear_svc_model = svm.SVC(
    kernel='linear',
    C=0.01,                  # valeur optimisée par GridSearchCV
    class_weight='balanced',
    probability=True,
    random_state=42
)
linear_svc_model.fit(X_train, y_train)

# evaluate
print(f"Train accuracy: {linear_svc_model.score(X_train, y_train):.3f}")
print(f"Test accuracy:  {linear_svc_model.score(X_test, y_test):.3f}")

# detailed classification report
y_pred_linear = linear_svc_model.predict(X_test)
print(classification_report(y_test, y_pred_linear))

Train accuracy: 0.755
Test accuracy:  0.755
              precision    recall  f1-score   support

       aucun       0.89      0.71      0.79       279
      seisme       0.60      0.83      0.70       145

    accuracy                           0.75       424
   macro avg       0.75      0.77      0.75       424
weighted avg       0.79      0.75      0.76       424



In [14]:
df = pd.read_parquet('datasets/inondation/datasets_inondation.parquet')
X = df.drop(columns=['catastrophe', 'date'])
y = df['catastrophe']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [15]:
# define and fit a linear SVM model
linear_svc_model = svm.SVC(
    kernel='linear',
    class_weight='balanced',
    probability=True,
    random_state=42
)
linear_svc_model.fit(X_train, y_train)

# evaluate
print(f"Train accuracy: {linear_svc_model.score(X_train, y_train):.3f}")
print(f"Test accuracy:  {linear_svc_model.score(X_test, y_test):.3f}")

# detailed classification report
y_pred_linear = linear_svc_model.predict(X_test)
print(classification_report(y_test, y_pred_linear))

Train accuracy: 0.659
Test accuracy:  0.659
              precision    recall  f1-score   support

       aucun       0.65      0.67      0.66       283
 innondation       0.67      0.64      0.66       289

    accuracy                           0.66       572
   macro avg       0.66      0.66      0.66       572
weighted avg       0.66      0.66      0.66       572



In [None]:
df = pd.read_csv('./dataset.csv')
X = df.drop(columns=['catastrophe', 'date', 'force_du_vecteur_de_vent_max'])
y = df['catastrophe']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# define and fit a linear SVM model
linear_svc_model = svm.SVC(
    kernel='linear',
    class_weight='balanced',
    C=0.01,                  # valeur optimisée par GridSearchCV
    probability=True,
    random_state=42
)
linear_svc_model.fit(X_train, y_train)

# evaluate
print(f"Train accuracy: {linear_svc_model.score(X_train, y_train):.3f}")
print(f"Test accuracy:  {linear_svc_model.score(X_test, y_test):.3f}")

# detailed classification report
y_pred_linear = linear_svc_model.predict(X_test)
print(classification_report(y_test, y_pred_linear))


In [35]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
import xgboost as xgb
import lightgbm as lgb
from sklearn.metrics import accuracy_score, precision_score, recall_score
import joblib

# 1. Charger le dataset
df = pd.read_csv('./dataset.csv')

# 2. Encoder la cible en 4 classes multi-classe
mapping = {
    'aucun': 0,
    "['innondation']": 1,
    "['seisme']": 2,
    "['innondation', 'seisme']": 3
}
df['cat_code'] = df['catastrophe'].map(mapping)

y = df['cat_code']
X = df.drop(columns=['catastrophe', 'timestamp', 'date', 'cat_code','force_du_vecteur_de_vent_max','temperature','humidite','force_moyenne_du_vecteur_de_vent'])

# 3. Séparation des features catégorielles et numériques
cat_feats = ['quartier']
num_feats = [c for c in X.columns if c not in cat_feats]

# Préprocesseur
preprocessor = ColumnTransformer([
    ('cat', OneHotEncoder(sparse_output=False, handle_unknown='ignore'), cat_feats),
    ('num', 'passthrough', num_feats)
])

# 4. Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

# 5. Prétraitement
X_train_proc = preprocessor.fit_transform(X_train)
X_test_proc = preprocessor.transform(X_test)

# 6. XGBoost
xgb_clf = xgb.XGBClassifier(
    objective='multi:softprob',
    num_class=4,
    n_estimators=250,
    learning_rate=0.001,
    use_label_encoder=False,
    eval_metric='mlogloss',
    verbosity=1,
    random_state=42
)
print("=== Training XGBoost ===")
xgb_clf.fit(
    X_train_proc,
    y_train,
    eval_set=[(X_test_proc, y_test)],
)

# 7. LightGBM
lgb_clf = lgb.LGBMClassifier(
    objective='multiclass',
    num_class=4,
    n_estimators=250,
    learning_rate=0.001,
    random_state=42,
    verbosity=1,
)
print("=== Training LightGBM ===")
lgb_clf.fit(
    X_train_proc,
    y_train,
    eval_set=[(X_test_proc, y_test)],
    eval_metric='multi_logloss',
)

# 8. Sauvegarde des modèles et du préprocesseur
joblib.dump(preprocessor, 'preprocessor.pkl')
xgb_clf.save_model('xgb_model.json')
lgb_clf.booster_.save_model('lgbm_model.txt')
joblib.dump(lgb_clf, 'lgbm_pipeline.pkl')
print("\nSaved: preprocessor.pkl, xgb_model.json, lgbm_model.txt, lgbm_pipeline.pkl")

=== Training XGBoost ===
[0]	validation_0-mlogloss:1.38520
[1]	validation_0-mlogloss:1.38410
[2]	validation_0-mlogloss:1.38300
[3]	validation_0-mlogloss:1.38191
[4]	validation_0-mlogloss:1.38081
[5]	validation_0-mlogloss:1.37972
[6]	validation_0-mlogloss:1.37863
[7]	validation_0-mlogloss:1.37755
[8]	validation_0-mlogloss:1.37646
[9]	validation_0-mlogloss:1.37538
[10]	validation_0-mlogloss:1.37430
[11]	validation_0-mlogloss:1.37322
[12]	validation_0-mlogloss:1.37215
[13]	validation_0-mlogloss:1.37107
[14]	validation_0-mlogloss:1.37000
[15]	validation_0-mlogloss:1.36893
[16]	validation_0-mlogloss:1.36786
[17]	validation_0-mlogloss:1.36680
[18]	validation_0-mlogloss:1.36573
[19]	validation_0-mlogloss:1.36467
[20]	validation_0-mlogloss:1.36361
[21]	validation_0-mlogloss:1.36255


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[22]	validation_0-mlogloss:1.36149
[23]	validation_0-mlogloss:1.36044
[24]	validation_0-mlogloss:1.35938
[25]	validation_0-mlogloss:1.35833
[26]	validation_0-mlogloss:1.35728
[27]	validation_0-mlogloss:1.35624
[28]	validation_0-mlogloss:1.35519
[29]	validation_0-mlogloss:1.35414
[30]	validation_0-mlogloss:1.35310
[31]	validation_0-mlogloss:1.35206
[32]	validation_0-mlogloss:1.35103
[33]	validation_0-mlogloss:1.34999
[34]	validation_0-mlogloss:1.34896
[35]	validation_0-mlogloss:1.34793
[36]	validation_0-mlogloss:1.34690
[37]	validation_0-mlogloss:1.34587
[38]	validation_0-mlogloss:1.34484
[39]	validation_0-mlogloss:1.34382
[40]	validation_0-mlogloss:1.34280
[41]	validation_0-mlogloss:1.34178
[42]	validation_0-mlogloss:1.34076
[43]	validation_0-mlogloss:1.33975
[44]	validation_0-mlogloss:1.33873
[45]	validation_0-mlogloss:1.33772
[46]	validation_0-mlogloss:1.33671
[47]	validation_0-mlogloss:1.33570
[48]	validation_0-mlogloss:1.33469
[49]	validation_0-mlogloss:1.33368
[50]	validation_0-ml

In [36]:
# 9. Évaluation des performances
print("\n=== Évaluation XGBoost ===")
y_pred_xgb = xgb_clf.predict(X_test_proc)
print(f"Accuracy: {accuracy_score(y_test, y_pred_xgb):.4f}")
print(f"Precision (macro): {precision_score(y_test, y_pred_xgb, average='macro'):.4f}")
print(f"Recall (macro): {recall_score(y_test, y_pred_xgb, average='macro'):.4f}")

print("\n=== Évaluation LightGBM ===")
y_pred_lgb = lgb_clf.predict(X_test_proc)
print(f"Accuracy: {accuracy_score(y_test, y_pred_lgb):.4f}")
print(f"Precision (macro): {precision_score(y_test, y_pred_lgb, average='macro'):.4f}")
print(f"Recall (macro): {recall_score(y_test, y_pred_lgb, average='macro'):.4f}")


=== Évaluation XGBoost ===
Accuracy: 0.7629
Precision (macro): 0.7400
Recall (macro): 0.8279

=== Évaluation LightGBM ===
Accuracy: 0.6495
Precision (macro): 0.3278
Recall (macro): 0.4187


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
