In [1]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, SpatialDropout1D
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Load dataset
df = pd.read_csv("DataSetForModel.csv")  # Replace with your CSV path
df = df[['text', 'label']].dropna()




In [2]:
# Text preprocessing
max_words = 5000
max_len = 150
tokenizer = Tokenizer(num_words=max_words, lower=True)
tokenizer.fit_on_texts(df['text'].values)
X = tokenizer.texts_to_sequences(df['text'].values)
X = pad_sequences(X, maxlen=max_len)# Labels
y = df['label'].values

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [3]:

# Build LSTM model
model = Sequential()
model.add(Embedding(max_words,64 ))
model.add(SpatialDropout1D(0.2))
model.add(LSTM(32, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))  # For binary classification

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


In [4]:
# Train model
history = model.fit(X_train, y_train, epochs=4, batch_size=128, validation_split=0.2)

# Evaluate
y_pred = model.predict(X_test)
y_pred_classes = (y_pred > 0.5).astype(int)




Epoch 1/4
[1m221/221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 102ms/step - accuracy: 0.8665 - loss: 0.3429 - val_accuracy: 0.9789 - val_loss: 0.0699
Epoch 2/4
[1m221/221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 100ms/step - accuracy: 0.9771 - loss: 0.0790 - val_accuracy: 0.9800 - val_loss: 0.0634
Epoch 3/4
[1m221/221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 106ms/step - accuracy: 0.9856 - loss: 0.0488 - val_accuracy: 0.9848 - val_loss: 0.0511
Epoch 4/4
[1m221/221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 106ms/step - accuracy: 0.9895 - loss: 0.0390 - val_accuracy: 0.9790 - val_loss: 0.0637
[1m276/276[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 13ms/step


In [6]:
from sklearn.metrics import confusion_matrix, accuracy_score

# Accuracy
accuracy = accuracy_score(y_test, y_pred_classes)
print("Accuracy:", accuracy)

# Classification Report
classify_report = classification_report(y_test, y_pred_classes)
print("\nClassification Report:\n", classify_report)

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred_classes)
print("\nConfusion Matrix:\n", conf_matrix)

Accuracy: 0.9821793416572078

Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.98      0.98      4536
           1       0.98      0.99      0.98      4274

    accuracy                           0.98      8810
   macro avg       0.98      0.98      0.98      8810
weighted avg       0.98      0.98      0.98      8810


Confusion Matrix:
 [[4433  103]
 [  54 4220]]


In [7]:
model.save("my_model.h5")  



In [9]:
#Store the results in a dictionary
results = {
    "accuracy": accuracy,
    "classification_report": classify_report,
    "confusion_matrix": conf_matrix.tolist()  # Convert to list for easier CSV export
}

#  Convert dictionary to DataFrame
results_df = pd.DataFrame([results])

#  Export the results to a CSV file
results_df.to_csv('LSTM_model_evaluation_results.csv', index=False)
