In [17]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
import lightgbm as lgb
from catboost import CatBoostClassifier
from sklearn.metrics import accuracy_score, classification_report
import mlflow
import mlflow.sklearn
import numpy as np

In [18]:
# Charger les données
dt = pd.read_csv("./dataCleaned.csv")

train = dt[(dt["season"] < 2022) & (dt["season"] >= 2015)]
test = dt[dt["season"] >= 2022]

x_train = train.drop(["Result_code", "Unnamed: 0"], axis=1)
y_train = train["Result_code"]
x_test = test.drop(["Result_code", "Unnamed: 0"], axis=1)
y_test = test["Result_code"]

In [19]:
# Entraînement et suivi de RandomForestClassifier avec MLflow
with mlflow.start_run():
    rf = RandomForestClassifier(n_estimators=50, min_samples_split=10, random_state=1)
    rf.fit(x_train, y_train)
    predict_rf = rf.predict(x_test)
    acc_rf = accuracy_score(y_test, predict_rf)
    print(classification_report(y_test, predict_rf))

    mlflow.log_param("n_estimators", 50)
    mlflow.log_param("min_samples_split", 10)
    mlflow.log_metric("accuracy", acc_rf)
    # Enregistrez d'autres métriques ou paramètres que vous souhaitez suivre



              precision    recall  f1-score   support

           0       0.20      0.05      0.07       174
           1       0.48      0.58      0.53       293
           2       0.48      0.60      0.53       293

    accuracy                           0.47       760
   macro avg       0.39      0.41      0.38       760
weighted avg       0.42      0.47      0.43       760



In [20]:
# Entraînement et suivi de SVM avec MLflow
with mlflow.start_run():
    svm = SVC(kernel='linear', C=1.0)
    svm.fit(x_train, y_train)
    pred_svm = svm.predict(x_test)
    acc_svm = accuracy_score(y_test, pred_svm)
    print(classification_report(y_test, pred_svm))

    mlflow.log_metric("accuracy", acc_svm)
    # Enregistrez d'autres métriques ou paramètres que vous souhaitez suivre

  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00       174
           1       0.48      0.63      0.55       293
           2       0.48      0.63      0.55       293

    accuracy                           0.48       760
   macro avg       0.32      0.42      0.36       760
weighted avg       0.37      0.48      0.42       760



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [21]:


# Entraînement et suivi de XGBoost avec MLflow
with mlflow.start_run():
    model = XGBClassifier()
    param_grid = {
        'learning_rate': [0.01, 0.1, 0.2],
        'max_depth': [3, 4, 5],
        'n_estimators': [50, 100, 200]
    }
    grid_search = GridSearchCV(estimator=model, param_grid=param_grid, scoring='neg_mean_squared_error', cv=3, verbose=1)
    grid_search.fit(x_train, y_train)
    best_model = grid_search.best_estimator_
    test_predictions = best_model.predict(x_test)
    print(classification_report(y_test, test_predictions))

    mlflow.log_params(grid_search.best_params_)
    # Enregistrez d'autres métriques ou paramètres que vous souhaitez suivre

# Entraînement et suivi de LightGBM avec MLflow






Fitting 3 folds for each of 27 candidates, totalling 81 fits


2024/01/01 21:47:43 INFO mlflow.sklearn.utils: Logging the 5 best runs, 22 runs will be omitted.


              precision    recall  f1-score   support

           0       0.18      0.01      0.02       174
           1       0.50      0.63      0.56       293
           2       0.49      0.64      0.56       293

    accuracy                           0.49       760
   macro avg       0.39      0.43      0.38       760
weighted avg       0.43      0.49      0.44       760



In [22]:
with mlflow.start_run():
    mlflow.autolog()
    train_data = lgb.Dataset(x_train, label=y_train)
    test_data = lgb.Dataset(x_test, label=y_test, reference=train_data)
    params = {
        'objective': 'multiclass',
        'num_class': 3,
        'metric': 'multi_logloss',
        'boosting_type': 'gbdt',
        'num_leaves': 31,
        'learning_rate': 0.05,
        'feature_fraction': 0.9
    }
    num_round = 100
    model = lgb.train(params, train_data, num_round, valid_sets=[test_data])
    y_pred_prob = model.predict(x_test, num_iteration=model.best_iteration)
    predGBM = np.argmax(y_pred_prob, axis=1)

    accuracy = accuracy_score(y_test, predGBM)
    print(classification_report(y_test, predGBM))

    mlflow.log_param("num_leaves", 31)
    mlflow.log_param("learning_rate", 0.05)
    mlflow.log_metric("accuracy", accuracy)
    # Enregistrez d'autres métriques ou paramètres que vous souhaitez suivre

2024/01/01 21:48:02 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2024/01/01 21:48:02 INFO mlflow.tracking.fluent: Autologging successfully enabled for xgboost.
2024/01/01 21:48:02 INFO mlflow.tracking.fluent: Autologging successfully enabled for lightgbm.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000533 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 167
[LightGBM] [Info] Number of data points in the train set: 5320, number of used features: 8
[LightGBM] [Info] Start training from score -1.449931
[LightGBM] [Info] Start training from score -0.960486
[LightGBM] [Info] Start training from score -0.960486




              precision    recall  f1-score   support

           0       0.28      0.10      0.14       174
           1       0.50      0.61      0.55       293
           2       0.50      0.59      0.54       293

    accuracy                           0.49       760
   macro avg       0.43      0.43      0.41       760
weighted avg       0.45      0.49      0.46       760



In [None]:
with mlflow.start_run():
    mlflow.autolog()
    model = CatBoostClassifier(iterations=100, depth=6, learning_rate=0.1, loss_function='MultiClass')
    model.fit(x_train, y_train.astype(str), verbose=10)
    predCat = model.predict(x_test)

    accuracy = accuracy_score(y_test.astype(str), predCat)
    print(classification_report(y_test.astype(str), predCat))

    mlflow.log_metric("accuracy", accuracy)
    # Enregistrez d'autres métriques ou paramètres que vous souhaitez suivre