In [1]:
import pandas as pd
import numpy as np

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

from keras.models import load_model
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report, accuracy_score
from joblib import load

In [2]:
df = pd.read_csv('dataset/cleaned_dota2_chat.csv')

# Split the data into training and testing sets
train_data, test_data = train_test_split(df, test_size=0.3, random_state=42)

# Extract the text and labels
train_texts = train_data['cleaned_text']
train_labels = train_data['label']
test_texts = test_data['cleaned_text']
test_labels = test_data['label']

# Ensure train_texts and test_texts are lists of strings
train_texts = list(train_texts) if isinstance(train_texts, (pd.Series, np.ndarray)) else train_texts
test_texts = list(test_texts) if isinstance(test_texts, (pd.Series, np.ndarray)) else test_texts

# Vectorize text using TF-IDF
tfidf_vectorizer = TfidfVectorizer(max_features=5000)
train_tfidf_vectors = tfidf_vectorizer.fit_transform(train_texts)
test_tfidf_vectors = tfidf_vectorizer.transform(test_texts)

In [3]:
# Load the saved ML models
logistic_regression_model = load('results/model_Logistic Regression.joblib')
svm_model = load('results/model_Support Vector Machine.joblib')
random_forest_model = load('results/model_Random Forest.joblib')
gradient_boosting_model = load('results/model_Gradient Boosting.joblib')
mlp_model = load('results/model_Neural Network.joblib')

# Load the saved DL models
loaded_cnn_model = load_model('results/cnn_model.h5')
loaded_rnn_model = load_model('results/rnn_model.h5')
loaded_lstm_model = load_model('results/lstm_model.h5')

# Evaluate the loaded ML models
logistic_regression_predictions = logistic_regression_model.predict(test_tfidf_vectors)
svm_predictions = svm_model.predict(test_tfidf_vectors)
random_forest_predictions = random_forest_model.predict(test_tfidf_vectors)
gradient_boosting_predictions = gradient_boosting_model.predict(test_tfidf_vectors)
mlp_predictions = mlp_model.predict(test_tfidf_vectors)

# Convert predictions to binary labels for ML models
logistic_regression_pred_labels = logistic_regression_predictions
svm_pred_labels = svm_predictions
random_forest_pred_labels = random_forest_predictions
gradient_boosting_pred_labels = gradient_boosting_predictions
mlp_pred_labels = mlp_predictions

# Evaluate the loaded DL models
loaded_cnn_results = loaded_cnn_model.evaluate(test_tfidf_vectors, test_labels)
loaded_rnn_results = loaded_rnn_model.evaluate(test_tfidf_vectors, test_labels)
loaded_lstm_results = loaded_lstm_model.evaluate(test_tfidf_vectors, test_labels)

# Predict using the loaded DL models
cnn_predictions = loaded_cnn_model.predict(test_tfidf_vectors)
rnn_predictions = loaded_rnn_model.predict(test_tfidf_vectors)
lstm_predictions = loaded_lstm_model.predict(test_tfidf_vectors)

# Convert predictions to binary labels for DL models
cnn_pred_labels = (cnn_predictions > 0.5).astype(int).flatten()
rnn_pred_labels = (rnn_predictions > 0.5).astype(int).flatten()
lstm_pred_labels = (lstm_predictions > 0.5).astype(int).flatten()

# Print classification reports
print("Logistic Regression Classification Report:")
print(classification_report(test_labels, logistic_regression_pred_labels))

print("SVM Classification Report:")
print(classification_report(test_labels, svm_pred_labels))

print("Random Forest Classification Report:")
print(classification_report(test_labels, random_forest_pred_labels))

print("Gradient Boosting Classification Report:")
print(classification_report(test_labels, gradient_boosting_pred_labels))

print("MLP Classification Report:")
print(classification_report(test_labels, mlp_pred_labels))

print("CNN Classification Report:")
print(classification_report(test_labels, cnn_pred_labels))

print("RNN Classification Report:")
print(classification_report(test_labels, rnn_pred_labels))

print("LSTM Classification Report:")
print(classification_report(test_labels, lstm_pred_labels))

# Print accuracy scores for comparison
print(f"Logistic Regression Accuracy: {accuracy_score(test_labels, logistic_regression_pred_labels)}")
print(f"SVM Accuracy: {accuracy_score(test_labels, svm_pred_labels)}")
print(f"Random Forest Accuracy: {accuracy_score(test_labels, random_forest_pred_labels)}")
print(f"Gradient Boosting Accuracy: {accuracy_score(test_labels, gradient_boosting_pred_labels)}")
print(f"MLP Accuracy: {accuracy_score(test_labels, mlp_pred_labels)}")
print(f"CNN Accuracy: {accuracy_score(test_labels, cnn_pred_labels)}")
print(f"RNN Accuracy: {accuracy_score(test_labels, rnn_pred_labels)}")
print(f"LSTM Accuracy: {accuracy_score(test_labels, lstm_pred_labels)}")



[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 59ms/step - accuracy: 0.8363 - loss: 0.4341
[1m167/167[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 293ms/step - accuracy: 0.8363 - loss: 0.4462
[1m110/167[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m28s[0m 498ms/step - accuracy: 0.8358 - loss: 0.4468