# Imports 

In [10]:
import tensorflow as tf
import numpy as np
import pandas as pd
from tensorflow.keras.layers import TextVectorization, Embedding, SimpleRNN, Dense, LSTM, Flatten
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers

from sklearn.model_selection import train_test_split
import multiprocessing
from mlflow import MlflowClient
import mlflow
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import os
from Source.preprocess_data import *  ## import all functions from preprocess_data.py
from Source.postprocess_data import * ## import all functions from postprocess_data.py
from Source.utils import *  ## import all functions from utils.py

# import nltk
# import optuna
# 🤗
from transformers import TFAutoModel, AutoTokenizer



from sklearn.metrics import confusion_matrix 
# from nltk.corpus import stopwords  
from tqdm import tqdm
tqdm.pandas()
os.environ['CUDA_VISIBLE_DEVICES'] = "0"

nw = multiprocessing.cpu_count()



client = MlflowClient(tracking_uri="http://localhost:8080")
os.environ["TF_KERAS"]='1'
print(tf.__version__)
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
print("GPUs disponibles :", tf.config.list_physical_devices("GPU"))
print("Version TF :", tf.__version__)

2.10.1
Num GPUs Available:  1
GPUs disponibles : [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
Version TF : 2.10.1


# Préparation data

## Importation dataset

In [11]:
df = pd.read_csv('https://s3-eu-west-1.amazonaws.com/static.oc-static.com/prod/courses/files/AI+Engineer/Project+7%C2%A0-+D%C3%A9tectez+les+Bad+Buzz+gr%C3%A2ce+au+Deep+Learning/sentiment140.zip',
                header=None,
                compression='zip',
                encoding='cp1252')

df.columns = ['target', 'ids', 'date', 'flag', 'user', 'text']

sample_df, _ = train_test_split(df, test_size=0.99, random_state=42, stratify=df['target'])
sample_df = sample_df.reset_index(drop=True)
print(f"Sample size: {sample_df.shape[0]} rows")
# On ne garde que les colonnes 'target' et 'text'
sample_df = sample_df[['target', 'text']]
sample_df["target"] = sample_df["target"].apply(lambda x: 0 if x == 0 else 1)
sample_df.to_csv('Data/raw_data.csv', index=False)

Sample size: 16000 rows


## Train/Validation split

In [12]:
# Data
X_raw = sample_df['text']
y = sample_df['target']
X_train, X_val, y_train, y_val = train_test_split(X_raw, y, test_size=0.2, random_state=42, stratify=y)


# Fonction centrale 

In [17]:
rnn_size = 128
epochs = 100
lr = 1e-3


def test_bert_model(bert_model_name):
    with mlflow.start_run():
        mlflow.log_params(params={
            'rnn_size': rnn_size, 
            'epochs': epochs, 
            'learning_rate': lr,
            'bert_model_name':bert_model_name
        })

        tokenizer = AutoTokenizer.from_pretrained(bert_model_name)
        encodings_train = tokenizer(X_train.to_list(), 
                                    truncation=True, 
                                    padding=True, 
                                    max_length=64,
                                    return_tensors="tf")
        encodings_val = tokenizer(X_val.to_list(), 
                                  truncation=True, 
                                  padding=True, 
                                  max_length = 64,
                                  return_tensors="tf")

        dataset_train = tf.data.Dataset.from_tensor_slices(
            (
                {"input_ids": encodings_train["input_ids"], 
                 "attention_mask": encodings_train["attention_mask"]
                 },y_train
                )
                ).batch(32)
        
        dataset_val = tf.data.Dataset.from_tensor_slices(
            (
                {"input_ids": encodings_val["input_ids"], 
                 "attention_mask": encodings_val["attention_mask"]
                 },y_val
                )
                ).batch(32)
        # On charge le modèle pré-entrainné
        base_model = TFAutoModel.from_pretrained(bert_model_name, from_pt=True)
        base_model.trainable = False # Pas de fine-tuning ou d'entrainement car impossible à faire avec les ressources disponibles

        # Construction du modèle keras 
        ## Une input layer pour les input ids
        input_ids = tf.keras.Input(shape=(None,), dtype=tf.int32, name="input_ids")
        ## Une input layer pour le masque d'attention
        attention_mask = tf.keras.Input(shape=(None,), dtype=tf.int32, name="attention_mask")
        ## On récupère 

        outputs = base_model(input_ids, attention_mask=attention_mask)
        token_embeddings = outputs.last_hidden_state  # [batch, seq_len, hidden_size]

        # On prend le token [CLS] comme vecteur de phrase
        cls_token = token_embeddings[:, 0, :]
        max_tokens = tf.reduce_max(token_embeddings, axis=1)  #
        mean_tokens = tf.reduce_mean(token_embeddings, axis=1)  #

        all_tokens = tf.concat([cls_token, max_tokens, mean_tokens], axis=-1)  # [batch, hidden_size*2]
    
        x = tf.keras.layers.Dense(rnn_size, activation="relu", 
                                  kernel_regularizer=regularizers.L2(1e-4),
                                  bias_regularizer=regularizers.L2(1e-4), 
                                #   activity_regularizer=regularizers.L2(1e-4),
                                  )(all_tokens)
        x = tf.keras.layers.Dropout(0.5)(x)
        logits = tf.keras.layers.Dense(1, activation="sigmoid")(x)
    
        model = tf.keras.Model(inputs=[input_ids, attention_mask], outputs=logits)

        ## Callbacks
        model_savepath = f"./Models/MY_{'_'.join(bert_model_name.split('/'))}_dense{rnn_size}.h5"
        checkpoint = ModelCheckpoint(model_savepath, monitor='val_accuracy', verbose=0, save_best_only=True, save_weights_only=True, mode='max')
        es = EarlyStopping(monitor='val_loss', mode='min', verbose=0, patience=20)
        lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, verbose=0, min_lr=1e-5)
        callbacks_list = [checkpoint, es, lr_scheduler]


        optimizer = Adam(learning_rate=lr)
        model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=["accuracy"])
        # Summary
        model.summary()
        # History
        with tf.device("/GPU:0"):
            history = model.fit(dataset_train, epochs=epochs, batch_size=64, validation_data=dataset_val, callbacks=callbacks_list, verbose=1)


        model.load_weights(model_savepath)

                # Prédictions sur le jeu de validation
        y_pred_proba = model.predict(dataset_val)
        y_pred = (y_pred_proba>0.5)


        output_dict = postprocess_model_output(y_val, y_pred, y_pred_proba) # voir postprocess_data.py

        # Logging des métriques dans MLflow
        mlflow.log_metrics(output_dict)
        # Matrice de confusion
        cm = confusion_matrix(y_val, y_pred, normalize='pred')
        fig, ax = plt.subplots()
        sns.heatmap(cm, annot=True, fmt=".2f", cmap="Blues", ax=ax, )
        plt.xlabel("Predicted")
        plt.ylabel("True")
        plt.title("Confusion Matrix - Validation Set")
        fig.savefig("confusion_matrix.png")
        plt.close(fig)
        mlflow.log_artifact("confusion_matrix.png")
        #
        fig2 = plot_training_history(history,show=False)
        fig2.savefig("learning_path.png")
        plt.close(fig2)
        mlflow.log_artifact("learning_path.png")

        # Enregistrement du modèle dans MLflow
        mlflow.tensorflow.log_model(model, "model")
    




# Experiment MLFlow

In [18]:
# Création de l'étude Optuna et optimisation
print("Setting up MLflow experiment...")
mlflow.set_experiment("BERT_models_experiment")
exp_id = mlflow.get_experiment_by_name("BERT_models_experiment").experiment_id

experiment_description = (
    "Comparaison de plusieurs modèles BERT"
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "Sentiment analysis modelling",
    "model_type": "BERT_pretrained",
    "team": "Ph. Constant",
    "project_quarter": "Q3-2025",
    "mlflow.note.content": experiment_description,
}

for key, value in experiment_tags.items():
    client.set_experiment_tag(exp_id, key, value)

Traceback (most recent call last):
  File "c:\ProgramData\anaconda3\envs\AI_env_P7_gpu\lib\site-packages\mlflow\store\tracking\file_store.py", line 367, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "c:\ProgramData\anaconda3\envs\AI_env_P7_gpu\lib\site-packages\mlflow\store\tracking\file_store.py", line 465, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "c:\ProgramData\anaconda3\envs\AI_env_P7_gpu\lib\site-packages\mlflow\store\tracking\file_store.py", line 1635, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "c:\ProgramData\anaconda3\envs\AI_env_P7_gpu\lib\site-packages\mlflow\store\tracking\file_store.py", line 1628, in _read_helper
    result = read_yaml(root, file_name)
  File "c:\ProgramData\anaconda3\envs\AI_env_P7_gpu\lib\site-packages\mlflow\utils\yaml_utils.py", line 107, in read_yaml
    raise MissingConfigException(f"Yaml file '{file_path}' d

Setting up MLflow experiment...


In [19]:
bert_model_list = [
    "prajjwal1/bert-tiny",
    "prajjwal1/bert-small",
    "distilbert-base-uncased",
    "roberta-base",
    "distilroberta-base",
    "vinai/bertweet-base",  
    "finiteautomata/bertweet-base-sentiment-analysis"
]

# Lancement experiment

In [20]:
for model_name in bert_model_list:
    print(f"Running test with BERT model : {model_name}")
    test_bert_model(model_name)

Running test with BERT model : prajjwal1/bert-tiny


Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.seq_relationship.bias', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'bert.embeddings.position_ids', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the 

Model: "model_5"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_ids (InputLayer)         [(None, None)]       0           []                               
                                                                                                  
 attention_mask (InputLayer)    [(None, None)]       0           []                               
                                                                                                  
 tf_bert_model_4 (TFBertModel)  TFBaseModelOutputWi  4385920     ['input_ids[0][0]',              
                                thPoolingAndCrossAt               'attention_mask[0][0]']         
                                tentions(last_hidde                                               
                                n_state=(None, None                                         



INFO:tensorflow:Assets written to: C:\Users\bassm\AppData\Local\Temp\tmphf5yol8r\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\bassm\AppData\Local\Temp\tmphf5yol8r\model\data\model\assets


Running test with BERT model : prajjwal1/bert-small


Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.decoder.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'bert.embeddings.position_ids', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the 

Model: "model_6"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_ids (InputLayer)         [(None, None)]       0           []                               
                                                                                                  
 attention_mask (InputLayer)    [(None, None)]       0           []                               
                                                                                                  
 tf_bert_model_5 (TFBertModel)  TFBaseModelOutputWi  28763648    ['input_ids[0][0]',              
                                thPoolingAndCrossAt               'attention_mask[0][0]']         
                                tentions(last_hidde                                               
                                n_state=(None, None                                         



INFO:tensorflow:Assets written to: C:\Users\bassm\AppData\Local\Temp\tmp1ppckiv_\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\bassm\AppData\Local\Temp\tmp1ppckiv_\model\data\model\assets


Running test with BERT model : distilbert-base-uncased


Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFDistilBertModel: ['vocab_layer_norm.bias', 'vocab_transform.weight', 'vocab_transform.bias', 'vocab_projector.weight', 'vocab_projector.bias', 'vocab_layer_norm.weight']
- This IS expected if you are initializing TFDistilBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFDistilBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertModel for predictions without further training.


Model: "model_7"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_ids (InputLayer)         [(None, None)]       0           []                               
                                                                                                  
 attention_mask (InputLayer)    [(None, None)]       0           []                               
                                                                                                  
 tf_distil_bert_model_1 (TFDist  TFBaseModelOutput(l  66362880   ['input_ids[0][0]',              
 ilBertModel)                   ast_hidden_state=(N               'attention_mask[0][0]']         
                                one, None, 768),                                                  
                                 hidden_states=None                                         



























INFO:tensorflow:Assets written to: C:\Users\bassm\AppData\Local\Temp\tmpyg4wlsik\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\bassm\AppData\Local\Temp\tmpyg4wlsik\model\data\model\assets


Running test with BERT model : roberta-base


Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaModel: ['lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing TFRobertaModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFRobertaModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaModel for predictions without further training.


Model: "model_8"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_ids (InputLayer)         [(None, None)]       0           []                               
                                                                                                  
 attention_mask (InputLayer)    [(None, None)]       0           []                               
                                                                                                  
 tf_roberta_model (TFRobertaMod  TFBaseModelOutputWi  124645632  ['input_ids[0][0]',              
 el)                            thPoolingAndCrossAt               'attention_mask[0][0]']         
                                tentions(last_hidde                                               
                                n_state=(None, None                                         



INFO:tensorflow:Assets written to: C:\Users\bassm\AppData\Local\Temp\tmp6wf12ns3\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\bassm\AppData\Local\Temp\tmp6wf12ns3\model\data\model\assets


Running test with BERT model : distilroberta-base


Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaModel: ['lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.bias']
- This IS expected if you are initializing TFRobertaModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFRobertaModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaModel for predictions without further training.


Model: "model_9"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_ids (InputLayer)         [(None, None)]       0           []                               
                                                                                                  
 attention_mask (InputLayer)    [(None, None)]       0           []                               
                                                                                                  
 tf_roberta_model_1 (TFRobertaM  TFBaseModelOutputWi  82118400   ['input_ids[0][0]',              
 odel)                          thPoolingAndCrossAt               'attention_mask[0][0]']         
                                tentions(last_hidde                                               
                                n_state=(None, None                                         



INFO:tensorflow:Assets written to: C:\Users\bassm\AppData\Local\Temp\tmppgvb3xrs\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\bassm\AppData\Local\Temp\tmppgvb3xrs\model\data\model\assets


Running test with BERT model : vinai/bertweet-base


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaModel: ['lm_head.decoder.bias', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'roberta.embeddings.position_ids', 'lm_head.bias']
- This IS expected if you are initializing TFRobertaModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFRobertaModel were initialized from the PyTorch model.
If your task is similar to the task the model of the c

Model: "model_10"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_ids (InputLayer)         [(None, None)]       0           []                               
                                                                                                  
 attention_mask (InputLayer)    [(None, None)]       0           []                               
                                                                                                  
 tf_roberta_model_2 (TFRobertaM  TFBaseModelOutputWi  134899968  ['input_ids[0][0]',              
 odel)                          thPoolingAndCrossAt               'attention_mask[0][0]']         
                                tentions(last_hidde                                               
                                n_state=(None, None                                        



INFO:tensorflow:Assets written to: C:\Users\bassm\AppData\Local\Temp\tmpj9wymnw0\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\bassm\AppData\Local\Temp\tmpj9wymnw0\model\data\model\assets


Running test with BERT model : finiteautomata/bertweet-base-sentiment-analysis


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaModel: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias', 'roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFRobertaModel were not initialized from the PyTorch model and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bia

Model: "model_11"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_ids (InputLayer)         [(None, None)]       0           []                               
                                                                                                  
 attention_mask (InputLayer)    [(None, None)]       0           []                               
                                                                                                  
 tf_roberta_model_3 (TFRobertaM  TFBaseModelOutputWi  134899968  ['input_ids[0][0]',              
 odel)                          thPoolingAndCrossAt               'attention_mask[0][0]']         
                                tentions(last_hidde                                               
                                n_state=(None, None                                        



INFO:tensorflow:Assets written to: C:\Users\bassm\AppData\Local\Temp\tmp6apxj6iv\model\data\model\assets


INFO:tensorflow:Assets written to: C:\Users\bassm\AppData\Local\Temp\tmp6apxj6iv\model\data\model\assets


# Optimisation de la tête du modèle (plus petit dataset) 

## Réduction du dataset d'entrainement

In [21]:
sample_df, _ = train_test_split(df, test_size=0.99, random_state=42, stratify=df['target'])
sample_df = sample_df.reset_index(drop=True)
print(f"Sample size: {sample_df.shape[0]} rows")
# On ne garde que les colonnes 'target' et 'text'
sample_df = sample_df[['target', 'text']]
sample_df["target"] = sample_df["target"].apply(lambda x: 0 if x == 0 else 1)
sample_df.to_csv('Data/raw_data_mini.csv', index=False)

# Data
X_raw = sample_df['text']
y = sample_df['target']
X_train, X_val, y_train, y_val = train_test_split(X_raw, y, test_size=0.2, random_state=42, stratify=y)


Sample size: 16000 rows


Fonction de base

In [None]:
epochs = 100
lr = 1e-3


def test_bert_model_v2(bert_model_name, rnn_size):
    with mlflow.start_run():
        mlflow.log_params(params={
            'rnn_size': rnn_size, 
            'epochs': epochs, 
            'learning_rate': lr,
            'bert_model_name':bert_model_name
        })

        tokenizer = AutoTokenizer.from_pretrained(bert_model_name)
        encodings_train = tokenizer(X_train.to_list(), truncation=True, padding=True, return_tensors="tf")
        encodings_val = tokenizer(X_val.to_list(), truncation=True, padding=True, return_tensors="tf")

        dataset_train = tf.data.Dataset.from_tensor_slices(
            (
                {"input_ids": encodings_train["input_ids"], 
                 "attention_mask": encodings_train["attention_mask"]
                 },y_train
                )
                ).batch(32)
        
        dataset_val = tf.data.Dataset.from_tensor_slices(
            (
                {"input_ids": encodings_val["input_ids"], 
                 "attention_mask": encodings_val["attention_mask"]
                 },y_val
                )
                ).batch(32)
        # On charge le modèle pré-entrainné
        base_model = TFAutoModel.from_pretrained(bert_model_name, from_pt=True)
        base_model.trainable = False # Pas de fine-tuning ou d'entrainement car impossible à faire avec les ressources disponibles

        # Construction du modèle keras 
        ## Une input layer pour les input ids

        input_ids = tf.keras.Input(shape=(None,), dtype=tf.int32, name="input_ids")
        ## Une input layer pour le masque d'attention
        attention_mask = tf.keras.Input(shape=(None,), dtype=tf.int32, name="attention_mask")
        ## On récupère 

        outputs = base_model(input_ids, attention_mask=attention_mask)
        token_embeddings = outputs.last_hidden_state  # [batch, seq_len, hidden_size]

        # On prend le token [CLS] comme vecteur de phrase
        cls_token = token_embeddings[:, 0, :]
        max_tokens = tf.reduce_max(token_embeddings, axis=1)  #
        mean_tokens = tf.reduce_mean(token_embeddings, axis=1)  #

        all_tokens = tf.concat([cls_token, max_tokens, mean_tokens], axis=-1)  # [batch, hidden_size*2]
    
        x = tf.keras.layers.Dense(rnn_size, activation="relu", 
                                  kernel_regularizer=regularizers.L2(1e-4),
                                  bias_regularizer=regularizers.L2(1e-4), 
                                #   activity_regularizer=regularizers.L2(1e-4),
                                  )(all_tokens)
        x = tf.keras.layers.Dropout(0.5)(x)
        logits = tf.keras.layers.Dense(1, activation="sigmoid")(x)
    
        model = tf.keras.Model(inputs=[input_ids, attention_mask], outputs=logits)


        ## Callbacks
        model_savepath = f"./Models/MY_{'_'.join(bert_model_name.split('/'))}_dense{rnn_size}.h5"
        checkpoint = ModelCheckpoint(model_savepath, monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=True, mode='min')
        es = EarlyStopping(monitor='val_loss', mode='min', verbose=0, patience=10)
        lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, verbose=0, min_lr=1e-5)
        callbacks_list = [checkpoint, es, lr_scheduler]


        optimizer = Adam(learning_rate=lr)
        model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=["accuracy"])
        # Summary
        model.summary()
        # History
        with tf.device("/GPU:0"):
            history = model.fit(dataset_train, epochs=epochs, batch_size=64, validation_data=dataset_val, callbacks=callbacks_list, verbose=1)


        model.load_weights(model_savepath)

                # Prédictions sur le jeu de validation
        y_pred_proba = model.predict(dataset_val)
        y_pred = (y_pred_proba>0.5)


        output_dict = postprocess_model_output(y_val, y_pred, y_pred_proba) # voir postprocess_data.py

        # Logging des métriques dans MLflow
        mlflow.log_metrics(output_dict)
        # Matrice de confusion
        cm = confusion_matrix(y_val, y_pred, normalize='pred')
        fig, ax = plt.subplots()
        sns.heatmap(cm, annot=True, fmt=".2f", cmap="Blues", ax=ax, )
        plt.xlabel("Predicted")
        plt.ylabel("True")
        plt.title("Confusion Matrix - Validation Set")
        fig.savefig("confusion_matrix.png")
        plt.close(fig)
        mlflow.log_artifact("confusion_matrix.png")
        #
        fig2 = plot_training_history(history,show=False)
        fig2.savefig("learning_path.png")
        plt.close(fig2)
        mlflow.log_artifact("learning_path.png")

        # Enregistrement du modèle dans MLflow
        mlflow.tensorflow.log_model(
            model,
            artifact_path="Bert_best_Model",
            keras_model_kwargs={"save_format":"h5"}
        )    




In [23]:
# Création de l'étude Optuna et optimisation
print("Setting up MLflow experiment...")
mlflow.set_experiment("BERT_models_experiment")
exp_id = mlflow.get_experiment_by_name("BERT_models_experiment").experiment_id

experiment_description = (
    "Optimisation de la tête du modèle basé sur BERT."
)

# Provide searchable tags that define characteristics of the Runs that
# will be in this Experiment
experiment_tags = {
    "project_name": "Sentiment analysis modelling",
    "model_type": "BERT_pretrained",
    "team": "Ph. Constant",
    "project_quarter": "Q3-2025",
    "mlflow.note.content": experiment_description,
}

for key, value in experiment_tags.items():
    client.set_experiment_tag(exp_id, key, value)

Traceback (most recent call last):
  File "c:\ProgramData\anaconda3\envs\AI_env_P7_gpu\lib\site-packages\mlflow\store\tracking\file_store.py", line 367, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "c:\ProgramData\anaconda3\envs\AI_env_P7_gpu\lib\site-packages\mlflow\store\tracking\file_store.py", line 465, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "c:\ProgramData\anaconda3\envs\AI_env_P7_gpu\lib\site-packages\mlflow\store\tracking\file_store.py", line 1635, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "c:\ProgramData\anaconda3\envs\AI_env_P7_gpu\lib\site-packages\mlflow\store\tracking\file_store.py", line 1628, in _read_helper
    result = read_yaml(root, file_name)
  File "c:\ProgramData\anaconda3\envs\AI_env_P7_gpu\lib\site-packages\mlflow\utils\yaml_utils.py", line 107, in read_yaml
    raise MissingConfigException(f"Yaml file '{file_path}' d

Setting up MLflow experiment...


In [24]:
for rnn_size in list(range(64,254,64)):
    print(f"Running test with BERT model finiteautomata/bertweet-base-sentiment-analysis")
    test_bert_model_v2(bert_model_name="finiteautomata/bertweet-base-sentiment-analysis", rnn_size=rnn_size)

Running test with BERT model finiteautomata/bertweet-base-sentiment-analysis


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaModel: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.weight', 'classifier.dense.bias', 'roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFRobertaModel were not initialized from the PyTorch model and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bia

Model: "model_12"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_ids (InputLayer)         [(None, None)]       0           []                               
                                                                                                  
 attention_mask (InputLayer)    [(None, None)]       0           []                               
                                                                                                  
 tf_roberta_model_4 (TFRobertaM  TFBaseModelOutputWi  134899968  ['input_ids[0][0]',              
 odel)                          thPoolingAndCrossAt               'attention_mask[0][0]']         
                                tentions(last_hidde                                               
                                n_state=(None, None                                        

KeyboardInterrupt: 

# Modèle Bert automatique depuis Huggingface 


In [None]:
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification

# Charger le tokenizer et le modèle automatiquement (transformers choisit RobertaTokenizer / TFRobertaForSequenceClassification)
tokenizer = AutoTokenizer.from_pretrained("finiteautomata/bertweet-base-sentiment-analysis")
model = TFAutoModelForSequenceClassification.from_pretrained(
                                                        "finiteautomata/bertweet-base-sentiment-analysis", 
                                                        num_labels=2,
                                                        ignore_mismatched_sizes=True )

for layer in model.layers:
    if layer.name != "classifier":   # nom de la couche finale
        layer.trainable = False

# Vérification

model.summary()


# Tokenisation
encodings_train = tokenizer(X_train.to_list(), truncation=True, padding=True, return_tensors="tf")
encodings_val = tokenizer(X_val.to_list(), truncation=True, padding=True, return_tensors="tf")

dataset_train = tf.data.Dataset.from_tensor_slices(
    (
        {
        "input_ids": encodings_train["input_ids"], 
        "attention_mask": encodings_train["attention_mask"]
        },y_train
    )
    ).batch(32)
        
dataset_val = tf.data.Dataset.from_tensor_slices(
    (
        {
        "input_ids": encodings_val["input_ids"], 
        "attention_mask": encodings_val["attention_mask"]
        },y_val
    )
    ).batch(32)


optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)  # LR plus élevé car seules quelques couches sont entraînées
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

metrics = ["accuracy"]

rename = '_'.join("finiteautomata/bertweet-base-sentiment-analysis".split('/'))
        ## Callbacks
model_savepath = f"./Models/AUTO_{rename}.h5"
checkpoint = ModelCheckpoint(model_savepath, monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=True, mode='min')
es = EarlyStopping(monitor='val_loss', mode='min', verbose=0, patience=10)
lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, verbose=0, min_lr=1e-5)
callbacks_list = [checkpoint, es, lr_scheduler]


model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

# Exemple d’entraînement
history = model.fit(dataset_train, validation_data=dataset_val, epochs=50,callbacks=callbacks_list, verbose=1)

print(model.config.id2label)


emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
All model checkpoint layers were used when initializing TFRobertaForSequenceClassification.

Some weights of TFRobertaForSequenceClassification were not initialized from the model checkpoint at finiteautomata/bertweet-base-sentiment-analysis and are newly initialized because the shapes did not match:
- classifier/out_proj/kernel:0: found shape (768, 3) in the checkpoint and (768, 2) in the model instantiated
- classifier/out_proj/bias:0: found shape (3,) in the checkpoint and (2,) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: "tf_roberta_for_sequence_classification"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 roberta (TFRobertaMainLayer  multiple                 134309376 
 )                                                               
                                                                 
 classifier (TFRobertaClassi  multiple                 592130    
 ficationHead)                                                   
                                                                 
Total params: 134,901,506
Trainable params: 592,130
Non-trainable params: 134,309,376
_________________________________________________________________
Epoch 1/50


AttributeError: in user code:

    File "c:\ProgramData\anaconda3\envs\AI_env_P7_gpu\lib\site-packages\keras\engine\training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "c:\ProgramData\anaconda3\envs\AI_env_P7_gpu\lib\site-packages\keras\engine\training.py", line 1146, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\ProgramData\anaconda3\envs\AI_env_P7_gpu\lib\site-packages\keras\engine\training.py", line 1135, in run_step  **
        outputs = model.train_step(data)
    File "c:\ProgramData\anaconda3\envs\AI_env_P7_gpu\lib\site-packages\transformers\modeling_tf_utils.py", line 1614, in train_step
        x, y, sample_weight = keras.utils.unpack_x_y_sample_weight(data)

    AttributeError: module 'keras.utils' has no attribute 'unpack_x_y_sample_weight'
