In [1]:
pip install tensorflow==2.13.0 tensorflow-intel==2.13.0 transformers==4.52.4 mlflow==2.22.0 cloudpickle==3.1.1 jinja2==3.1.6 numpy==1.24.3 pandas==2.2.3 scikit-learn==1.6.1 pyyaml transformers datasets tensorflow==2.13.0 tensorflow_hub

Collecting tensorflow==2.13.0
  Using cached tensorflow-2.13.0-cp310-cp310-win_amd64.whl.metadata (2.6 kB)
Collecting tensorflow-intel==2.13.0
  Using cached tensorflow_intel-2.13.0-cp310-cp310-win_amd64.whl.metadata (4.1 kB)
Collecting transformers==4.52.4
  Using cached transformers-4.52.4-py3-none-any.whl.metadata (38 kB)
Collecting mlflow==2.22.0
  Using cached mlflow-2.22.0-py3-none-any.whl.metadata (30 kB)
Collecting cloudpickle==3.1.1
  Using cached cloudpickle-3.1.1-py3-none-any.whl.metadata (7.1 kB)
Collecting numpy==1.24.3
  Using cached numpy-1.24.3-cp310-cp310-win_amd64.whl.metadata (5.6 kB)
Collecting pandas==2.2.3
  Using cached pandas-2.2.3-cp310-cp310-win_amd64.whl.metadata (19 kB)
Collecting scikit-learn==1.6.1
  Using cached scikit_learn-1.6.1-cp310-cp310-win_amd64.whl.metadata (15 kB)
Collecting datasets
  Using cached datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting tensorflow_hub
  Using cached tensorflow_hub-0.16.1-py2.py3-none-any.whl.metadata (1.3 kB)

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
ipython 8.30.0 requires typing-extensions>=4.6; python_version < "3.12", but you have typing-extensions 4.5.0 which is incompatible.


In [1]:
pip install streamlit

Collecting streamlitNote: you may need to restart the kernel to use updated packages.


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow-intel 2.13.0 requires typing-extensions<4.6.0,>=3.6.6, but you have typing-extensions 4.14.0 which is incompatible.



  Downloading streamlit-1.46.0-py3-none-any.whl.metadata (9.0 kB)
Collecting altair<6,>=4.0 (from streamlit)
  Downloading altair-5.5.0-py3-none-any.whl.metadata (11 kB)
Collecting tenacity<10,>=8.1.0 (from streamlit)
  Downloading tenacity-9.1.2-py3-none-any.whl.metadata (1.2 kB)
Collecting toml<2,>=0.10.1 (from streamlit)
  Downloading toml-0.10.2-py2.py3-none-any.whl.metadata (7.1 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-win_amd64.whl.metadata (44 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting narwhals>=1.14.2 (from altair<6,>=4.0->streamlit)
  Downloading narwhals-1.44.0-py3-none-any.whl.metadata (11 kB)
Collecting typing-extensions<5,>=4.4.0 (from streamlit)
  Using cached typing_extensions-4.14.0-py3-none-any.whl.metadata (3.0 kB)
Downloading streamlit-1.46.0-py3-none-any.whl (10.1 MB)
   ---------------------------------------- 0.0/10.1 MB ? eta -:--:

In [3]:
import mlflow
import mlflow.sklearn
import mlflow.lightgbm
import mlflow.tensorflow
import yaml
import os

from utils import load_data_tfidf, load_data_use
from models.use_model import train_use
from models.lstm_model import train_lstm
from models.bert_model import build_and_train_bert


from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from lightgbm import LGBMClassifier

from sklearn.metrics import f1_score, log_loss, roc_auc_score

# Chargement du fichier YAML
def load_config():
    config_path = "config.yml"
    with open(config_path, "r") as f:
        return yaml.safe_load(f)

# Fonction d'entraînement générique pour les modèles scikit-learn
def train_model_sklearn(model, model_name, X_train, X_test, y_train, y_test):
    with mlflow.start_run(run_name=model_name):
        model.fit(X_train, y_train)

        # Prédictions
        y_pred = model.predict(X_test)
        y_proba = model.predict_proba(X_test)[:, 1]

        # Métriques
        accuracy = model.score(X_test, y_test)
        f1 = f1_score(y_test, y_pred)
        logloss = log_loss(y_test, y_proba)
        roc_auc = roc_auc_score(y_test, y_proba)

        # Logging
        mlflow.log_param("model", model_name)
        mlflow.log_metric("accuracy", accuracy)
        mlflow.log_metric("f1_score", f1)
        mlflow.log_metric("log_loss", logloss)
        mlflow.log_metric("roc_auc", roc_auc)

        # ✅ Ici la correction importante
        mlflow.sklearn.log_model(model, artifact_path="model")

        print(f"✅ {model_name} terminé avec précision={accuracy:.2f} | F1={f1:.2f} | AUC={roc_auc:.2f}")


# Main
if __name__ == "__main__":
    config = load_config()

    # === 1. Données TF-IDF pour les modèles classiques ===
    X_train_tfidf, X_test_tfidf, y_train, y_test, _ = load_data_tfidf()

    # === 2. Random Forest ===
    rf_params = {k: v[0] for k, v in config["grid_search"]["random_forest"].items()}
    rf_model = RandomForestClassifier(**rf_params)
    train_model_sklearn(rf_model, "RandomForest", X_train_tfidf, X_test_tfidf, y_train, y_test)

    # === 3. Logistic Regression ===
    logreg_params = {k: v[0] for k, v in config["grid_search"]["logistic_regression"].items()}
    logreg_model = LogisticRegression(**logreg_params)
    train_model_sklearn(logreg_model, "LogisticRegression", X_train_tfidf, X_test_tfidf, y_train, y_test)

    # === 4. LightGBM ===
    lgbm_params = {k: v[0] for k, v in config["grid_search"]["lightgbm"].items()}
    lgbm_model = LGBMClassifier(**lgbm_params)
    train_model_sklearn(lgbm_model, "LightGBM", X_train_tfidf, X_test_tfidf, y_train, y_test)

    ############################a revoir la l'ordre ##################""
    # === 5. Universal Sentence Encoder ===
    X_train_use, X_test_use, y_train_use, y_test_use = load_data_use()
    train_use(config["use"], X_train_use, X_test_use, y_train_use, y_test_use)

    # === LSTM ===
    train_lstm()


    # === Entraînement BERT ===

    print("📦 Modèle BERT")
    build_and_train_bert()



✅ RandomForest terminé avec précision=0.71 | F1=0.73 | AUC=0.79




✅ LogisticRegression terminé avec précision=0.71 | F1=0.70 | AUC=0.77




✅ LightGBM terminé avec précision=0.73 | F1=0.74 | AUC=0.81
🔄 Embedding avec Universal Sentence Encoder...




✅ USE + MLP terminé avec accuracy=0.74 | F1=0.75 | AUC=0.82
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: C:\Users\Djamel\AppData\Local\Temp\tmpni2cghbj\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\Djamel\AppData\Local\Temp\tmpni2cghbj\model\data\model\assets


✅ Bidirectional LSTM terminé avec acc=0.70 | f1=0.69 | auc=0.76
📦 Modèle BERT


The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DistilBertTokenizer'. 
The class this function is called from is 'BertTokenizer'.


📥 Chargement des données BERT...


You are using a model of type distilbert to instantiate a model of type bert. This is not supported for all configurations of models and can yield errors.


🧠 Initialisation du modèle BERT...


Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertForSequenceClassification: ['distilbert.embeddings.LayerNorm.weight', 'distilbert.transformer.layer.0.attention.q_lin.bias', 'distilbert.transformer.layer.0.attention.k_lin.bias', 'distilbert.transformer.layer.1.output_layer_norm.weight', 'distilbert.transformer.layer.2.attention.out_lin.bias', 'distilbert.transformer.layer.3.attention.k_lin.weight', 'distilbert.transformer.layer.5.attention.out_lin.bias', 'distilbert.transformer.layer.4.attention.k_lin.weight', 'distilbert.transformer.layer.0.sa_layer_norm.bias', 'distilbert.transformer.layer.1.attention.q_lin.weight', 'distilbert.transformer.layer.4.output_layer_norm.weight', 'distilbert.transformer.layer.3.sa_layer_norm.weight', 'distilbert.transformer.layer.1.attention.v_lin.bias', 'distilbert.transformer.layer.0.ffn.lin2.bias', 'distilbert.transformer.layer.2.attention.q_lin.weight', 'distilbert.transformer.layer.0.attention.out_lin.weight', '

🚀 Entraînement en cours...
Epoch 1/4
 62/400 [===>..........................] - ETA: 6:41:22 - loss: 1.2697 - accuracy: 0.4995

KeyboardInterrupt: 

In [11]:
4+4

8

############## Azure mlflow 

Clé d'instrumentation : 6112851a-1746-4cbf-9136-a3c655f03b1b

ID abonnement : b9f965a3-3799-47ab-8af7-5375b889c883

In [None]:
Nom d'affichage
:
github-actions-deploy
Ctrl+C pour copier
Copier dans le Presse-papiers la valeur de Nom d'affichage github-actions-deploy
ID d'application (client): dd109911-60b0-4dab-b040-9da107c2ad7b

ID de l'objet:e5abacb0-80d7-48dd-a0d0-5da5fc1f8b58
ID de l'annuaire (locataire):4bd8e696-75cb-4e5b-abd8-f703a2f73deb


Valeur : JQT8Q~OGBvsDtHiTyY7KwjKIU0Ddvi5WV~aKLcd4

Id secret : e75cac11-2684-4721-aac4-d3bc483bf3aa


In [7]:
import mlflow

mlflow.set_tracking_uri("file:///C:/Users/Djamel/Desktop/Formation/module_7/mlflow_project/mlruns")

# Cette ligne recrée l'expérience par défaut si elle n'existe pas
mlflow.set_experiment("Default")


<Experiment: artifact_location='file:///C:/Users/Djamel/Desktop/Formation/module_7/mlflow_project/mlruns/0', creation_time=1750628584361, experiment_id='0', last_update_time=1750628584361, lifecycle_stage='active', name='Default', tags={}>