In [1]:
pip install tensorflow==2.13.0 tensorflow-intel==2.13.0 transformers==4.52.4 mlflow==2.22.0 cloudpickle==3.1.1 jinja2==3.1.6 numpy==1.24.3 pandas==2.2.3 scikit-learn==1.6.1 pyyaml transformers datasets tensorflow==2.13.0 tensorflow_hub

Collecting tensorflow==2.13.0
  Using cached tensorflow-2.13.0-cp310-cp310-win_amd64.whl.metadata (2.6 kB)
Collecting tensorflow-intel==2.13.0
  Using cached tensorflow_intel-2.13.0-cp310-cp310-win_amd64.whl.metadata (4.1 kB)
Collecting transformers==4.52.4
  Using cached transformers-4.52.4-py3-none-any.whl.metadata (38 kB)
Collecting mlflow==2.22.0
  Using cached mlflow-2.22.0-py3-none-any.whl.metadata (30 kB)
Collecting cloudpickle==3.1.1
  Using cached cloudpickle-3.1.1-py3-none-any.whl.metadata (7.1 kB)
Collecting numpy==1.24.3
  Using cached numpy-1.24.3-cp310-cp310-win_amd64.whl.metadata (5.6 kB)
Collecting pandas==2.2.3
  Using cached pandas-2.2.3-cp310-cp310-win_amd64.whl.metadata (19 kB)
Collecting scikit-learn==1.6.1
  Using cached scikit_learn-1.6.1-cp310-cp310-win_amd64.whl.metadata (15 kB)
Collecting datasets
  Using cached datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting tensorflow_hub
  Using cached tensorflow_hub-0.16.1-py2.py3-none-any.whl.metadata (1.3 kB)

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
ipython 8.30.0 requires typing-extensions>=4.6; python_version < "3.12", but you have typing-extensions 4.5.0 which is incompatible.


In [3]:
pip install lightgbm

Collecting lightgbm
  Using cached lightgbm-4.6.0-py3-none-win_amd64.whl.metadata (17 kB)
Using cached lightgbm-4.6.0-py3-none-win_amd64.whl (1.5 MB)
Installing collected packages: lightgbm
Successfully installed lightgbm-4.6.0
Note: you may need to restart the kernel to use updated packages.


In [15]:
import mlflow
import mlflow.sklearn
import mlflow.lightgbm
import mlflow.tensorflow
import yaml
import os

from utils import load_data_tfidf, load_data_use
from models.use_model import train_use
from models.lstm_model import train_lstm
from models.bert_model import build_and_train_bert


from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from lightgbm import LGBMClassifier

from sklearn.metrics import f1_score, log_loss, roc_auc_score

# Chargement du fichier YAML
def load_config():
    config_path = "config.yml"
    with open(config_path, "r") as f:
        return yaml.safe_load(f)

# Fonction d'entraînement générique pour les modèles scikit-learn
def train_model_sklearn(model, model_name, X_train, X_test, y_train, y_test):
    with mlflow.start_run(run_name=model_name):
        model.fit(X_train, y_train)

        # Prédictions
        y_pred = model.predict(X_test)
        y_proba = model.predict_proba(X_test)[:, 1]

        # Métriques
        accuracy = model.score(X_test, y_test)
        f1 = f1_score(y_test, y_pred)
        logloss = log_loss(y_test, y_proba)
        roc_auc = roc_auc_score(y_test, y_proba)

        # Logging
        mlflow.log_param("model", model_name)
        mlflow.log_metric("accuracy", accuracy)
        mlflow.log_metric("f1_score", f1)
        mlflow.log_metric("log_loss", logloss)
        mlflow.log_metric("roc_auc", roc_auc)

        # ✅ Ici la correction importante
        mlflow.sklearn.log_model(model, artifact_path="model")

        print(f"✅ {model_name} terminé avec précision={accuracy:.2f} | F1={f1:.2f} | AUC={roc_auc:.2f}")


# Main
if __name__ == "__main__":
    config = load_config()

    # === 1. Données TF-IDF pour les modèles classiques ===
    X_train_tfidf, X_test_tfidf, y_train, y_test, _ = load_data_tfidf()

    # === 2. Random Forest ===
    rf_params = {k: v[0] for k, v in config["grid_search"]["random_forest"].items()}
    rf_model = RandomForestClassifier(**rf_params)
    train_model_sklearn(rf_model, "RandomForest", X_train_tfidf, X_test_tfidf, y_train, y_test)

    # === 3. Logistic Regression ===
    logreg_params = {k: v[0] for k, v in config["grid_search"]["logistic_regression"].items()}
    logreg_model = LogisticRegression(**logreg_params)
    train_model_sklearn(logreg_model, "LogisticRegression", X_train_tfidf, X_test_tfidf, y_train, y_test)

    # === 4. LightGBM ===
    lgbm_params = {k: v[0] for k, v in config["grid_search"]["lightgbm"].items()}
    lgbm_model = LGBMClassifier(**lgbm_params)
    train_model_sklearn(lgbm_model, "LightGBM", X_train_tfidf, X_test_tfidf, y_train, y_test)

    ############################a revoir la l'ordre ##################""
    # === 5. Universal Sentence Encoder ===
    X_train_use, X_test_use, y_train_use, y_test_use = load_data_use()
    train_use(config["use"], X_train_use, X_test_use, y_train_use, y_test_use)

    # === LSTM ===
    train_lstm()


# === Entraînement BERT ===

#print("📦 Modèle BERT")
#build_and_train_bert()





✅ RandomForest terminé avec précision=0.71 | F1=0.73 | AUC=0.79




✅ LogisticRegression terminé avec précision=0.71 | F1=0.70 | AUC=0.77
[LightGBM] [Info] Number of positive: 6405, number of negative: 6395
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.033470 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 31004
[LightGBM] [Info] Number of data points in the train set: 12800, number of used features: 817
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500391 -> initscore=0.001563
[LightGBM] [Info] Start training from score 0.001563




✅ LightGBM terminé avec précision=0.73 | F1=0.74 | AUC=0.81
🔄 Embedding avec Universal Sentence Encoder...




✅ USE + MLP terminé avec accuracy=0.74 | F1=0.74 | AUC=0.81
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5




INFO:tensorflow:Assets written to: C:\Users\Djamel\AppData\Local\Temp\tmprba3fqhe\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\Djamel\AppData\Local\Temp\tmprba3fqhe\model\data\model\assets


✅ Bidirectional LSTM terminé avec acc=0.72 | f1=0.73 | auc=0.78


In [None]:
4+4

############## Azure mlflow 

Clé d'instrumentation : 6112851a-1746-4cbf-9136-a3c655f03b1b

ID abonnement : b9f965a3-3799-47ab-8af7-5375b889c883

In [13]:
import mlflow

mlflow.set_tracking_uri("file:///C:/Users/Djamel/Desktop/Formation/module_7/mlflow_project/mlruns")

# Cette ligne recrée l'expérience par défaut si elle n'existe pas
mlflow.set_experiment("Default")


2025/06/13 16:10:23 INFO mlflow.tracking.fluent: Experiment with name 'Default' does not exist. Creating a new experiment.


<Experiment: artifact_location='file:///C:/Users/Djamel/Desktop/Formation/module_7/mlflow_project/mlruns/763161070789444748', creation_time=1749823823034, experiment_id='763161070789444748', last_update_time=1749823823034, lifecycle_stage='active', name='Default', tags={}>