In [1]:
# Chargement des bibliothèques nécessaires
import pickle
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from keras.models import Sequential
from keras.layers import LSTM, Bidirectional, Dense, Dropout


# Chargement des données et des modèles
with open("data.pkl", "rb") as f:
    questions_matrix, data, best_num_clusters, tfidf_vectorizer, tfidf_vectorizer1 = pickle.load(f)
    
with open("model.pkl", "rb") as f1:
    model, history = pickle.load(f1)


In [2]:
tfidf_matrix = tfidf_vectorizer1.fit_transform(data['title_x'])
tfidf_matrix = tfidf_matrix.toarray()

labels = data['cluster']

X_train, X_test, y_train, y_test = train_test_split(tfidf_matrix, labels, test_size=0.2, random_state=42)

X_train = X_train.reshape(X_train.shape[0],1, X_train.shape[1])
X_test = X_test.reshape(X_test.shape[0],1, X_test.shape[1])


In [3]:
lstm = Sequential()
lstm.add(LSTM(units=128, input_shape=(1, X_train.shape[2])))
lstm.add(Dropout(0.5))
lstm.add(Dense(units=best_num_clusters, activation='softmax'))

lstm.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

  super().__init__(**kwargs)


In [4]:
lstm.fit(X_train, y_train, epochs=3, batch_size=64)

Epoch 1/3
[1m744/744[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m115s[0m 144ms/step - accuracy: 0.6199 - loss: 1.2189
Epoch 2/3
[1m744/744[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m131s[0m 176ms/step - accuracy: 0.9811 - loss: 0.1006
Epoch 3/3
[1m744/744[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m121s[0m 162ms/step - accuracy: 0.9954 - loss: 0.0272


<keras.src.callbacks.history.History at 0x2cbbc9ce4b0>

In [5]:
loss, accuracy = lstm.evaluate(X_test, y_test)
print("Perte sur l'ensemble de test:", loss)
print("Précision sur l'ensemble de test:", accuracy)

[1m372/372[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 20ms/step - accuracy: 0.9891 - loss: 0.0322
Perte sur l'ensemble de test: 0.03393689915537834
Précision sur l'ensemble de test: 0.9884874224662781


In [6]:
# Prédire les étiquettes sur l'ensemble de test
predictions = lstm.predict(X_test)
predicted_labels = np.argmax(predictions, axis=1)

# Calculer l'accuracy
accuracy = accuracy_score(y_test, predicted_labels)

# Calculer le F-score
f1 = f1_score(y_test, predicted_labels, average='weighted')

print("Accuracy sur l'ensemble de test:", accuracy)
print("F-score sur l'ensemble de test:", f1)

[1m372/372[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 18ms/step
Accuracy sur l'ensemble de test: 0.9884873949579832
F-score sur l'ensemble de test: 0.9885057505384695


### BLSTM

In [7]:
blstm = Sequential()
blstm.add(Bidirectional(LSTM(units=128, input_shape=(1, X_train.shape[2]))))
blstm.add(Dropout(0.5))
blstm.add(Dense(units=best_num_clusters, activation='softmax'))

blstm.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

  super().__init__(**kwargs)


In [8]:
blstm.fit(X_train, y_train, epochs=3, batch_size=64)

Epoch 1/3
[1m744/744[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m228s[0m 291ms/step - accuracy: 0.6613 - loss: 1.0899
Epoch 2/3
[1m744/744[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m256s[0m 283ms/step - accuracy: 0.9886 - loss: 0.0599
Epoch 3/3
[1m744/744[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m219s[0m 224ms/step - accuracy: 0.9974 - loss: 0.0173


<keras.src.callbacks.history.History at 0x2cbca19e7e0>

In [9]:
loss, accuracy = blstm.evaluate(X_test, y_test)
print("Perte sur l'ensemble de test:", loss)

# Prédire les étiquettes sur l'ensemble de test
predictions = blstm.predict(X_test)
predicted_labels = np.argmax(predictions, axis=1)

# Calculer l'accuracy
accuracy = accuracy_score(y_test, predicted_labels)

# Calculer le F-score
f1 = f1_score(y_test, predicted_labels, average='weighted')

print("Accuracy sur l'ensemble de test:", accuracy)
print("F-score sur l'ensemble de test:", f1)

[1m372/372[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 19ms/step - accuracy: 0.9905 - loss: 0.0290
Perte sur l'ensemble de test: 0.03030090406537056
[1m372/372[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 20ms/step
Accuracy sur l'ensemble de test: 0.9900840336134453
F-score sur l'ensemble de test: 0.9900940555089108
