In [1]:
import pandas as pd
import numpy as np
import warnings
from pathlib import Path
warnings.simplefilter(action='ignore', category=FutureWarning)

This notebook can be used to evalute the results from the BERT model. At the moment, the code is written to consider the results from a run conducted on 1/29/2023  (Data\Raw_BERT_results\testresults_1_29_23.csv).  This first cell takes the csv results file that was written by classifytweets.py, replaces the party with the proper coding {Democrat: 0, Republican: 1}, and writes the coded data in a csv for use by the second cell in this notebook. It also reports chunk-wise accuracy.

In [None]:
def get_chunk_wise_accuracy(BERT_results, output_file_name):
    BERT_results = BERT_results.drop(['Text'], axis=1)
    print(BERT_results)
    BERT_results.loc[BERT_results['Party'] == "Democratic Party", "Party"] = 0 
    BERT_results.loc[BERT_results['Party'] == "Republican Party", "Party"] = 1
    new_cols = ["UserID", "Party", "Predictions"]
    BERT_results = BERT_results[new_cols]
    pred_arr = BERT_results["Predictions"].to_numpy()
    truth_arr = BERT_results["Party"].to_numpy()
    accuracy = np.sum(pred_arr == truth_arr)/len(pred_arr)
    BERT_results.to_csv(Path("Data/Data_for_Evaluation/{}.csv".format(output_file_name)), index=False)
    return "Chunk level accuracy is {}".format(accuracy)

# change the path in the next line by replacing the last 1 with a 2 to test the results from a different run.
results = pd.read_csv(Path('Data/Raw_BERT_results/testresults_1_29_23.csv'), index_col=0)
print(get_chunk_wise_accuracy(results, "testresults_1_29_23_chunk"))

In [None]:
def get_user_accuracy(chunk_results, output_file_name):
    user_list = list(set(chunk_results["UserID"]))
    correct_pred = 0
    user_pred = pd.DataFrame(columns = ["UserID", "Party", "Predictions"])
    for user in user_list:
        user_df = chunk_results.loc[chunk_results["UserID"] == user]
        party = int(user_df.mode()["Party"])
        prediction = int(user_df.mode()["Predictions"])
        user_pred = user_pred.append({"UserID": user, "Party": party, "Predictions": prediction}, ignore_index=True)
        if party == prediction:
            correct_pred += 1
    accuracy = correct_pred/len(user_list)
    user_pred.to_csv(Path("Data/Data_for_evaluation/{}.csv".format(output_file_name)), index=False)
    return "User level accuracy is: {0:.3f}".format(accuracy)

chunk_results = pd.read_csv(Path("Data/Data_for_evaluation/testresults_1_29_23_chunk.csv"))
print(get_user_accuracy(chunk_results, "testresults_1_29_23_user"))

The next cell will calculate the confusion matrix for either user-level or chunk-level results.  I consider Democrat -> Positive, Republican -> Negative.

In [None]:
def get_confusion_matrix(results):
    TP = 0
    TN = 0
    FP = 0
    FN = 0
    predictions = list(results["Predictions"])
    labels = list(results["Party"])
    for i in range(0, len(labels)):
        if predictions[i] == labels[i]:
            if labels[i] == 1:
                TP += 1
            else:
                TN += 1
        else:
            if labels[i] == 1:
                FN += 1
            else:
                FP += 1
    return {"TP": TP, "FP": FP, "FN": FN, "TN": TN}

user_results = pd.read_csv(Path("Data/Data_for_evaluation/testresults_1_29_23_user.csv"))
print(get_confusion_matrix(user_results))