In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report

In [None]:
# importer des datas
sent_data = pd.read_csv('sentiment_data.csv')

In [None]:
# séparer le dataset
train_data = sentiment_data.sample(frac=0.8, random_state=42)
test_data = sentiment_data.drop(train_data.index)

In [None]:
# caractéristique et label
X_train, y_train = train_data['text'], train_data['sentiment']
X_test, y_test = test_data['text'], test_data['sentiment']

In [None]:
# encodgae de label
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_test = label_encoder.transform(y_test)

In [None]:
# vectorizer
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(X_train)

X_train = pad_sequences(tokenizer.texts_to_sequences(X_train), maxlen=200)
X_test = pad_sequences(tokenizer.texts_to_sequences(X_test), maxlen=200)

In [None]:
# paramètres de modèle
vocab_size = len(tokenizer.word_index) + 1  # taille de vocabulaire
embedding_dim = 128  # embedding layer
lstm_units = 128  # nombre de "units" de LSTM

In [None]:
# construction de modèle
model = Sequential()
model.add(Embedding(vocab_size, embedding_dim, input_length=200))
model.add(LSTM(lstm_units, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


model.summary()
# des paramètres
batch_size = 64
epochs = 10

In [None]:
# enrtraînement
history = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_test, y_test), callbacks=[EarlyStopping(monitor='val_loss', patience=3, min_delta=0.0001)])
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, accuracy_score
import seaborn as sns

In [None]:
# prédiction
y_pred = model.predict(X_test)
y_pred = y_pred.argmax(axis=1)

In [None]:
# calculer
report = classification_report(y_test, y_pred, target_names=label_encoder.classes_)
print(report)

In [None]:
# matrix
conf_matrix = confusion_matrix(y_test, y_pred)
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
plt.rcParams['font.sans-serif']=['SimHei'] #affichier des étiquettes en chinois
plt.ylabel('catégorie réel')
plt.xlabel('catégorie prédite')
plt.show()