In [51]:
import pandas as pd
import numpy as np
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

This notebook can be used to evalute the results from the BERT model.  At the moment, the code is written to consider the results from "RUN1" (Data\Raw_BERT_results\sample_chunk_results1.csv).  I have included the raw results from another run, called "RUN2" at Data\Raw_BERT_results\sample_chunk_results2.csv.  Should you so desire, you could run everything for RUN2 by changing a couple paths/names.  This first cell takes the csv file that was written by classifytweets.py, replaces the party with the proper coding {Democrat: 0, Republican: 0}, writes the coded data in a csv for use by the second cell in this notebook. It also reports chunk-wise accuracy.

In [76]:
def get_chunk_wise_accuracy(BERT_results, output_file_name):
    BERT_results = BERT_results.drop(['Text'], axis=1)
    BERT_results.loc[BERT_results['Party'] == "Democratic Party", "Party"] = 0 
    BERT_results.loc[BERT_results['Party'] == "Republican Party", "Party"] = 1
    new_cols = ["UserID", "Party", "Predictions"]
    BERT_results = BERT_results[new_cols]
    pred_arr = BERT_results["Predictions"].to_numpy()
    truth_arr = BERT_results["Party"].to_numpy()
    accuracy = np.sum(pred_arr == truth_arr)/len(pred_arr)
    BERT_results.to_csv("Data\\Data_for_Evaluation\\{}_chunk_accuracy={}.csv".format(output_file_name, accuracy), index=False)
    return "Chunk level accuracy is {}".format(accuracy)

# change the path in the next line by replacing the last 1 with a 2 to test the results from a different run.
results = pd.read_csv('Data\\Raw_BERT_results\\sample_chunk_results1.csv', index_col=0)
print(get_chunk_wise_accuracy(results, "RUN1"))

Chunk level accuracy is 0.8


In [77]:
def get_user_accuracy(chunk_results, output_file_name):
    user_list = list(set(chunk_results["UserID"]))
    correct_pred = 0
    user_pred = pd.DataFrame(columns = ["UserID", "Party", "Predictions"])
    for user in user_list:
        user_df = chunk_results.loc[chunk_results["UserID"] == user]
        party = int(user_df.mode()["Party"])
        prediction = int(user_df.mode()["Predictions"])
        user_pred = user_pred.append({"UserID": user, "Party": party, "Predictions": prediction}, ignore_index=True)
        if party == prediction:
            correct_pred += 1
    accuracy = correct_pred/len(user_list)
    user_pred.to_csv("Data\\Data_for_evaluation\\{}_user_accuracy={}.csv".format(output_file_name, accuracy))
    return "User level accuracy is: {0:.3f}".format(accuracy)

chunk_results = pd.read_csv("Data\\Data_for_evaluation\\RUN1_chunk_accuracy=0.8.csv")
print(get_user_accuracy(chunk_results, "RUN1"))

User level accuracy is: 0.858


The next cell will calculate the confusion matrix for either user-level or chunk-level results.  I consider Democrat -> Positive, Republican -> Negative.

In [79]:
def get_confusion_matrix(results):
    TP = 0
    TN = 0
    FP = 0
    FN = 0
    predictions = list(results["Predictions"])
    labels = list(results["Party"])
    for i in range(0, len(labels)):
        if predictions[i] == labels[i]:
            if labels[i] == 1:
                TP += 1
            else:
                TN += 1
        else:
            if labels[i] == 1:
                FN += 1
            else:
                FP += 1
    return {"TP": TP, "FP": FP, "FN": FN, "TN": TN}

user_results = pd.read_csv("Data\Data_for_evaluation\RUN1_user_accuracy=0.8578947368421053.csv")
print(get_confusion_matrix(user_results))

{'TP': 77, 'FP': 13, 'FN': 14, 'TN': 86}
