In [25]:
import pandas as pd
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.feature_extraction.text import CountVectorizer
import matplotlib.pyplot as plt
import seaborn as sns

NBB = joblib.load('NBBotML.joblib')
RFB = joblib.load('RFBotML.joblib')

#Convert the CSV files to dataframes. 
data = pd.read_csv('lemmatized_test.csv')
data2 = pd.read_csv('test_labels.csv')

#Gets the tokenized data
x = data['tokens']
y = data2.values.flatten()

#This converts the tokens to a vector so it can be converted to an array later to be read by the bot
vectorizer = joblib.load('vectorizer.joblib')
x_transformed = vectorizer.transform(x)

#This is where we 'train' the bots, by giving them things to reference
NB_predictions = NBB.predict(x_transformed.toarray())
RF_predictions= RFB.predict(x_transformed)

accuracyNB = accuracy_score(y, NB_predictions)
accuracyRF = accuracy_score(y, RF_predictions)

if accuracyRF >= accuracyNB:
    print('True')
    #Create a DataFrame with original data and predicted labels
    results_df = pd.DataFrame({
        'category': RF_predictions,
        'tokens': x
    })
else:
    print('False')
    #Create a DataFrame with original data and predicted labels
    results_df = pd.DataFrame({
        'category': NB_predictions,
        'tokens': x
    })


#Save the results to a CSV file
results_df.to_csv('predicted_labels.csv', index=False)

print("\nClassification Report NB:")
#Generate the report by comparing true labels ('y_test') with predicted labels ('bot_predictions')
print(classification_report(y, NB_predictions))
print("\nClassification Report RF:")
print(classification_report(y, RF_predictions))


True

Classification Report NB:
               precision    recall  f1-score   support

     business       0.89      0.80      0.84        95
entertainment       0.90      0.93      0.91        94
     politics       0.88      0.90      0.89       106
        sport       0.98      0.97      0.97       115
         tech       0.88      0.94      0.91        89

     accuracy                           0.91       499
    macro avg       0.91      0.91      0.91       499
 weighted avg       0.91      0.91      0.91       499


Classification Report RF:
               precision    recall  f1-score   support

     business       0.87      0.95      0.90        95
entertainment       0.96      0.93      0.94        94
     politics       0.96      0.91      0.93       106
        sport       0.93      0.99      0.96       115
         tech       0.98      0.90      0.94        89

     accuracy                           0.94       499
    macro avg       0.94      0.93      0.93       499
 