# <a id='toc1_'></a>[Projet 7 : Réalisez une analyse de sentiments grâce au Deep Learning](#toc0_)
# <a id='toc2_'></a>[Modèle sur mesure avancé](#toc0_)

[Lien OpenClassroom](https://openclassrooms.com/fr/paths/795/projects/1516/1578-mission)

---

**Table of contents**<a id='toc0_'></a>    
- [Projet 7 : Réalisez une analyse de sentiments grâce au Deep Learning](#toc1_)    
- [Modèle sur mesure simple](#toc2_)    

<!-- vscode-jupyter-toc-config
	numbering=false
	anchor=true
	flat=false
	minLevel=1
	maxLevel=6
	/vscode-jupyter-toc-config -->
<!-- THIS CELL WILL BE REPLACED ON TOC UPDATE. DO NOT WRITE YOUR TEXT IN THIS CELL -->

---
---

## <a id='toc2_1_'></a>[Imports](#toc0_)

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import (
    Embedding,
    LSTM,
    Dense,
    Dropout,
    Bidirectional,
    Input,
    SpatialDropout1D,
)
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    classification_report,
    confusion_matrix,
)
import mlflow
import mlflow.tensorflow  # Essential for autologging
import pickle  # For saving the tokenizer
import os
import re
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

warnings.filterwarnings("ignore")
tf.get_logger().setLevel("ERROR")

---
---

## <a id='toc2_2_'></a>[Chargement des données](#toc0_)

In [22]:
TRAIN_DATA_PATH = "./train_data.csv"
VAL_DATA_PATH = "./validation_data.csv"
TEST_DATA_PATH = "./test_data.csv"

train_df = pd.read_csv(TRAIN_DATA_PATH)
val_df = pd.read_csv(VAL_DATA_PATH)
test_df = pd.read_csv(TEST_DATA_PATH)

# Handle potential NaN values in 'cleaned_text' that might result from preprocessing
train_df["cleaned_text"].fillna("", inplace=True)
val_df["cleaned_text"].fillna("", inplace=True)
test_df["cleaned_text"].fillna("", inplace=True)


X_train = train_df["cleaned_text"]
y_train = train_df["sentiment"].replace({"negative": 0, "positive": 1}).astype(int)
X_val = val_df["cleaned_text"]
y_val = val_df["sentiment"].replace({"negative": 0, "positive": 1}).astype(int)
X_test = test_df["cleaned_text"]
y_test = test_df["sentiment"].replace({"negative": 0, "positive": 1}).astype(int)

print("Data loaded successfully:")
print(f"Train samples: {len(X_train)}")
print(f"Validation samples: {len(X_val)}")
print(f"Test samples: {len(X_test)}")

Data loaded successfully:
Train samples: 1114106
Validation samples: 238737
Test samples: 238738


---
---

## Préparation pour Deep Leanring

---

### Création d'un Tokenizer

In [23]:
VOCAB_SIZE = 200
MAX_SEQUENCE_LENGTH = 20

tokenizer = Tokenizer(
    num_words=VOCAB_SIZE, oov_token="<OOV>"
)  # OOV token for out-of-vocabulary words

# Fit the tokenizer ONLY on the training data
tokenizer.fit_on_texts(X_train)

# Convert text data to sequences of integers
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_val_seq = tokenizer.texts_to_sequences(X_val)
X_test_seq = tokenizer.texts_to_sequences(X_test)

# Pad sequences to ensure uniform length
X_train_pad = pad_sequences(
    X_train_seq, maxlen=MAX_SEQUENCE_LENGTH, padding="post", truncating="post"
)
X_val_pad = pad_sequences(
    X_val_seq, maxlen=MAX_SEQUENCE_LENGTH, padding="post", truncating="post"
)
X_test_pad = pad_sequences(
    X_test_seq, maxlen=MAX_SEQUENCE_LENGTH, padding="post", truncating="post"
)

# Vocabulary size for the embedding layer (add 1 for the padding token 0)
# Use min to handle cases where actual vocab is smaller than VOCAB_SIZE
actual_vocab_size = min(VOCAB_SIZE, len(tokenizer.word_index) + 1)
print(f"Actual vocabulary size used: {actual_vocab_size}")
print(f"Shape of padded training sequences: {X_train_pad.shape}")
print(f"Shape of padded validation sequences: {X_val_pad.shape}")
print(f"Shape of padded test sequences: {X_test_pad.shape}")

Actual vocabulary size used: 200
Shape of padded training sequences: (1114106, 20)
Shape of padded validation sequences: (238737, 20)
Shape of padded test sequences: (238738, 20)


---

### Sauvegarde du Tokenizer

In [24]:
with open("keras_tokenizer.pkl", "wb") as f:
    pickle.dump(tokenizer, f)

---

### MLFlow Setup

In [25]:
EXPERIMENT_NAME = "Tweet Sentiment Analysis - Advanced DL"
mlflow.set_experiment(EXPERIMENT_NAME)
print(f"MLflow experiment set to: '{EXPERIMENT_NAME}'")

MLflow experiment set to: 'Tweet Sentiment Analysis - Advanced DL'


---
---

## Experiment 1: LSTM avec GloVe Embeddings

---

### Chargement de GloVe Embeddings

In [26]:
GLOVE_PATH = "./glove.6B.300d.txt"
EMBEDDING_DIM = 300

embeddings_index = {}
try:
    with open(GLOVE_PATH, encoding="utf-8") as f:
        for line in f:
            values = line.split()
            word = values[0]
            coefs = np.asarray(values[1:], dtype="float32")
            embeddings_index[word] = coefs
    print(f"Found {len(embeddings_index)} word vectors in {GLOVE_PATH}.")
except FileNotFoundError:
    print(f"Error: GloVe file not found at {GLOVE_PATH}")
    print("Skipping GloVe experiment.")
    embeddings_index = None  # Ensure variable exists but is None
except Exception as e:
    print(f"An error occurred loading GloVe file: {e}")
    embeddings_index = None

embedding_matrix = None
if embeddings_index:
    print("Creating embedding matrix...")
    # Initialize matrix with zeros
    embedding_matrix = np.zeros((actual_vocab_size, EMBEDDING_DIM))
    hits = 0
    misses = 0
    # Populate the matrix with GloVe vectors for words in our tokenizer's vocabulary
    for word, i in tokenizer.word_index.items():
        if i >= actual_vocab_size:  # Skip words beyond our vocab size limit
            continue
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            # Words not found in embedding index will be all-zeros.
            embedding_matrix[i] = embedding_vector
            hits += 1
        else:
            misses += 1
    print(f"Converted {hits} words ({misses} misses)")
    print(f"Shape of embedding matrix: {embedding_matrix.shape}")

Found 400001 word vectors in ./glove.6B.300d.txt.
Creating embedding matrix...
Converted 198 words (1 misses)
Shape of embedding matrix: (200, 300)


---

### Création du modèle

In [None]:
def build_lstm_model(
    vocab_size,
    embedding_dim,
    max_length,
    lstm_units,
    dropout_rate,
    spatial_dropout_rate,
    learning_rate,
    embedding_matrix=None,
    is_embedding_trainable=False,
):
    """Builds a Keras LSTM model, OPTIMIZED for speed."""
    model = Sequential()
    model.add(Input(shape=(max_length,)))

    model.add(
        Embedding(
            input_dim=vocab_size,
            output_dim=embedding_dim,
            weights=[embedding_matrix],
            input_length=max_length,
            trainable=is_embedding_trainable,  # Explicitly False for GloVe
        )
    )

    model.add(SpatialDropout1D(spatial_dropout_rate))
    model.add(LSTM(lstm_units, dropout=dropout_rate, recurrent_dropout=dropout_rate))

    model.add(Dense(1, activation="sigmoid"))

    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(loss="binary_crossentropy", optimizer=optimizer, metrics=["accuracy"])

    print("\nOptimized Model Summary:")
    model.summary()
    return model

---

### Entrainement du modèle avec MLFlow

In [None]:
LSTM_UNITS = 32
DROPOUT_RATE = 0.2
SPATIAL_DROPOUT_RATE = 0.2

EPOCHS = 10
BATCH_SIZE = 512
LEARNING_RATE = 0.001

TOKENIZER_ARTIFACT_PATH = "tokenizer"
MODEL_ARTIFACT_PATH = "model"

run_name_glove = "LSTM_GloVe_Embeddings"
print(f"\n--- Starting MLflow Run for: {run_name_glove} ---")


mlflow.tensorflow.autolog(
    log_models=True, disable=False, registered_model_name=None
)  # Disable registration via autolog for now

with mlflow.start_run(run_name=run_name_glove) as run_glove:
    run_id_glove = run_glove.info.run_id
    print(f"MLflow Run ID (GloVe): {run_id_glove}")

    # --- Log additional parameters manually (autolog might miss some) ---
    mlflow.log_param("embedding_type", "GloVe (Not Trainable)")
    mlflow.log_param("vocab_size", actual_vocab_size)
    mlflow.log_param("max_sequence_length", MAX_SEQUENCE_LENGTH)
    mlflow.log_param("embedding_dim", EMBEDDING_DIM)
    mlflow.log_param("lstm_units", LSTM_UNITS)
    mlflow.log_param("dropout_rate", DROPOUT_RATE)
    mlflow.log_param("spatial_dropout_rate", SPATIAL_DROPOUT_RATE)
    mlflow.log_param("learning_rate", LEARNING_RATE)
    mlflow.log_param("epochs", EPOCHS)
    mlflow.log_param("batch_size", BATCH_SIZE)
    mlflow.log_param("architecture", "Input-Embedding-SpatialDropout-BiLSTM-Dense")

    # --- Build the model ---
    model_glove = build_lstm_model(
        vocab_size=actual_vocab_size,
        embedding_dim=EMBEDDING_DIM,
        max_length=MAX_SEQUENCE_LENGTH,
        lstm_units=LSTM_UNITS,
        dropout_rate=DROPOUT_RATE,
        spatial_dropout_rate=SPATIAL_DROPOUT_RATE,
        learning_rate=LEARNING_RATE,
        embedding_matrix=embedding_matrix,
    )

    # --- Callbacks ---
    early_stopping = EarlyStopping(
        monitor="val_loss", patience=2, restore_best_weights=True
    )

    # --- Train the model ---
    print("\nTraining LSTM model with GloVe embeddings...")
    history_glove = model_glove.fit(
        X_train_pad,
        y_train,
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        validation_data=(X_val_pad, y_val),
        callbacks=[early_stopping],
        verbose=1,
    )
    print("GloVe Model Training Finished.")

    if os.path.exists("keras_tokenizer.pkl"):
        mlflow.log_artifact(
            "keras_tokenizer.pkl", artifact_path=TOKENIZER_ARTIFACT_PATH
        )
        print(f"Tokenizer logged as artifact to MLflow run {run_id_glove}.")
    else:
        print(
            "Warning: Tokenizer file keras_tokenizer.pkl not found, could not log artifact."
        )

    print(f"--- MLflow Run {run_id_glove} finished ---")


--- Starting MLflow Run for: LSTM_GloVe_Embeddings ---
MLflow Run ID (GloVe): 0c17b15490fb4c54ba166550369a5e49
Using pre-trained embedding matrix (non-trainable).

Optimized Model Summary:





Training LSTM model with GloVe embeddings...


Epoch 1/10
[1m2176/2176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - accuracy: 0.6779 - loss: 0.5900



[1m2176/2176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m93s[0m 42ms/step - accuracy: 0.6779 - loss: 0.5900 - val_accuracy: 0.7120 - val_loss: 0.5491
Epoch 2/10
[1m2175/2176[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 34ms/step - accuracy: 0.7073 - loss: 0.5555



[1m2176/2176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 36ms/step - accuracy: 0.7073 - loss: 0.5555 - val_accuracy: 0.7140 - val_loss: 0.5447
Epoch 3/10
[1m2175/2176[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 34ms/step - accuracy: 0.7111 - loss: 0.5490



[1m2176/2176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 36ms/step - accuracy: 0.7111 - loss: 0.5490 - val_accuracy: 0.7147 - val_loss: 0.5425
Epoch 4/10
[1m2175/2176[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 34ms/step - accuracy: 0.7122 - loss: 0.5470



[1m2176/2176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 36ms/step - accuracy: 0.7122 - loss: 0.5470 - val_accuracy: 0.7151 - val_loss: 0.5416
Epoch 5/10
[1m2175/2176[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 34ms/step - accuracy: 0.7135 - loss: 0.5448



[1m2176/2176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 36ms/step - accuracy: 0.7135 - loss: 0.5448 - val_accuracy: 0.7163 - val_loss: 0.5398
Epoch 6/10
[1m2175/2176[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 33ms/step - accuracy: 0.7135 - loss: 0.5439



[1m2176/2176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 36ms/step - accuracy: 0.7135 - loss: 0.5439 - val_accuracy: 0.7173 - val_loss: 0.5395
Epoch 7/10
[1m2176/2176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.7139 - loss: 0.5425



[1m2176/2176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 36ms/step - accuracy: 0.7139 - loss: 0.5425 - val_accuracy: 0.7170 - val_loss: 0.5387
Epoch 8/10
[1m2175/2176[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 33ms/step - accuracy: 0.7151 - loss: 0.5417



[1m2176/2176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 36ms/step - accuracy: 0.7151 - loss: 0.5417 - val_accuracy: 0.7173 - val_loss: 0.5385
Epoch 9/10
[1m2175/2176[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 34ms/step - accuracy: 0.7141 - loss: 0.5412



[1m2176/2176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 36ms/step - accuracy: 0.7141 - loss: 0.5412 - val_accuracy: 0.7180 - val_loss: 0.5378
Epoch 10/10
[1m2175/2176[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 34ms/step - accuracy: 0.7153 - loss: 0.5408



[1m2176/2176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 37ms/step - accuracy: 0.7153 - loss: 0.5408 - val_accuracy: 0.7182 - val_loss: 0.5374
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 198ms/step
GloVe Model Training Finished.
Tokenizer logged as artifact to MLflow run 0c17b15490fb4c54ba166550369a5e49.
--- MLflow Run 0c17b15490fb4c54ba166550369a5e49 finished ---


---
---

## Experiment 2: LSTM avec GloVe Embeddings (Entrainable)

In [None]:
run_name_trainable = "LSTM_Trainable_Embeddings"
print(f"\n--- Starting MLflow Run for: {run_name_trainable} ---")

# Re-enable autologging for the new run if it was disabled, ensure clean state
mlflow.tensorflow.autolog(log_models=True, disable=False, registered_model_name=None)

with mlflow.start_run(run_name=run_name_trainable) as run_trainable:
    run_id_trainable = run_trainable.info.run_id
    print(f"MLflow Run ID (Trainable): {run_id_trainable}")

    # --- Log additional parameters manually ---
    mlflow.log_param("embedding_type", "Trainable")
    mlflow.log_param("vocab_size", actual_vocab_size)
    mlflow.log_param("max_sequence_length", MAX_SEQUENCE_LENGTH)
    mlflow.log_param(
        "embedding_dim", EMBEDDING_DIM
    )  # Can be different from GloVe dim if desired
    mlflow.log_param("lstm_units", LSTM_UNITS)
    mlflow.log_param("dropout_rate", DROPOUT_RATE)
    mlflow.log_param("spatial_dropout_rate", SPATIAL_DROPOUT_RATE)
    mlflow.log_param("learning_rate", LEARNING_RATE)
    mlflow.log_param("epochs", EPOCHS)
    mlflow.log_param("batch_size", BATCH_SIZE)
    mlflow.log_param("architecture", "Input-Embedding-SpatialDropout-BiLSTM-Dense")

    # --- Build the model ---
    model_trainable = build_lstm_model(
        vocab_size=actual_vocab_size,
        embedding_dim=EMBEDDING_DIM,
        max_length=MAX_SEQUENCE_LENGTH,
        lstm_units=LSTM_UNITS,
        dropout_rate=DROPOUT_RATE,
        spatial_dropout_rate=SPATIAL_DROPOUT_RATE,
        learning_rate=LEARNING_RATE,
        embedding_matrix=embedding_matrix,
        is_embedding_trainable=True,
    )

    # --- Callbacks ---
    early_stopping = EarlyStopping(
        monitor="val_loss", patience=3, restore_best_weights=True
    )

    # --- Train the model ---
    print("\nTraining LSTM model with Trainable embeddings...")
    history_trainable = model_trainable.fit(
        X_train_pad,
        y_train,
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        validation_data=(X_val_pad, y_val),
        callbacks=[early_stopping],
        verbose=1,
    )
    print("Trainable Model Training Finished.")

    # --- Manually log the tokenizer artifact ---
    if os.path.exists("keras_tokenizer.pkl"):
        mlflow.log_artifact(
            "keras_tokenizer.pkl", artifact_path=TOKENIZER_ARTIFACT_PATH
        )
        print(f"Tokenizer logged as artifact to MLflow run {run_id_glove}.")
    else:
        print(
            "Warning: Tokenizer file keras_tokenizer.pkl not found, could not log artifact."
        )

    print(f"--- MLflow Run {run_id_glove} finished ---")


--- Starting MLflow Run for: LSTM_Trainable_Embeddings ---
MLflow Run ID (Trainable): b72ca14fe64c4a549306aebe1ba4641c

Optimized Model Summary:





Training LSTM model with Trainable embeddings...


Epoch 1/10
[1m2176/2176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - accuracy: 0.6866 - loss: 0.5778



[1m2176/2176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m93s[0m 42ms/step - accuracy: 0.6866 - loss: 0.5778 - val_accuracy: 0.7142 - val_loss: 0.5419
Epoch 2/10
[1m2175/2176[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 38ms/step - accuracy: 0.7129 - loss: 0.5419



[1m2176/2176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 41ms/step - accuracy: 0.7129 - loss: 0.5419 - val_accuracy: 0.7167 - val_loss: 0.5371
Epoch 3/10
[1m2176/2176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - accuracy: 0.7160 - loss: 0.5371



[1m2176/2176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m92s[0m 42ms/step - accuracy: 0.7160 - loss: 0.5371 - val_accuracy: 0.7182 - val_loss: 0.5347
Epoch 4/10
[1m2175/2176[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 39ms/step - accuracy: 0.7177 - loss: 0.5348



[1m2176/2176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 41ms/step - accuracy: 0.7177 - loss: 0.5348 - val_accuracy: 0.7192 - val_loss: 0.5333
Epoch 5/10
[1m2175/2176[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 39ms/step - accuracy: 0.7185 - loss: 0.5326



[1m2176/2176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 41ms/step - accuracy: 0.7185 - loss: 0.5326 - val_accuracy: 0.7199 - val_loss: 0.5329
Epoch 6/10
[1m2175/2176[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 39ms/step - accuracy: 0.7192 - loss: 0.5321



[1m2176/2176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 41ms/step - accuracy: 0.7192 - loss: 0.5321 - val_accuracy: 0.7204 - val_loss: 0.5321
Epoch 7/10
[1m2175/2176[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 39ms/step - accuracy: 0.7194 - loss: 0.5320



[1m2176/2176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 42ms/step - accuracy: 0.7194 - loss: 0.5320 - val_accuracy: 0.7208 - val_loss: 0.5317
Epoch 8/10
[1m2175/2176[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 39ms/step - accuracy: 0.7212 - loss: 0.5297



[1m2176/2176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 41ms/step - accuracy: 0.7212 - loss: 0.5297 - val_accuracy: 0.7202 - val_loss: 0.5312
Epoch 9/10
[1m2176/2176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - accuracy: 0.7216 - loss: 0.5290



[1m2176/2176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m92s[0m 42ms/step - accuracy: 0.7216 - loss: 0.5290 - val_accuracy: 0.7208 - val_loss: 0.5308
Epoch 10/10
[1m2176/2176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step - accuracy: 0.7214 - loss: 0.5292



[1m2176/2176[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m98s[0m 45ms/step - accuracy: 0.7214 - loss: 0.5292 - val_accuracy: 0.7210 - val_loss: 0.5307
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 222ms/step
Trainable Model Training Finished.
Tokenizer logged as artifact to MLflow run 0c17b15490fb4c54ba166550369a5e49.
--- MLflow Run 0c17b15490fb4c54ba166550369a5e49 finished ---


## Evaluation et Selection