In [460]:
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from csv import QUOTE_NONE
from pathlib import Path
import os
import sys
import csv
import numpy as np
sys.path.append(str(Path(os.path.abspath("")).parents[0]))
from utils.metrics import get_metrics

In [461]:
data_path = Path(os.path.abspath("")).parents[1]

max_int = sys.maxsize
while True:
    # Decrease the value by factor 10 as long as the OverflowError occurs.
    try:
        csv.field_size_limit(max_int)
        break
    except OverflowError:
        max_int = int(max_int/10)


In [462]:
pred_path = Path(os.path.abspath("")).parents[0] / "pred_values"

model_vote_weighting = {
    #"svm": 0.5111,
    #"gaussian_process": 0.7359,
    #"albert": 0.8527,
    "bert": 0.8981,
    "distilbert": 0.9077,
    "roberta": 0.9276,
    "lstm": 0.8816,
    "cnn": 0.8569
}

In [463]:
model_pred_paths = {
    "gaussian_process": pred_path / "classical_gp_bert_head_256_preds.csv",
    "svm": pred_path / "classical_svm_liwc_2022_256_preprocessed_preds.csv",
    "cnn": pred_path / "nn_cnn_preds.csv",
    "lstm": pred_path / "nn_lstm_preds.csv",
    "albert": pred_path / "llm_albert-base-v2_256_preds.csv",
    "bert": pred_path / "llm_bert-base-uncased_256_preds.csv",
    "distilbert": pred_path / "llm_distilbert-base-uncased_256_preds.csv",
    "roberta": pred_path / "llm_roberta-base_256_preds.csv"
}

def majority_vote_label_pred(models, weighted: bool = True, binary_input: bool = True):

    """
    Makes all models deemed suited for classification cast a vote for what class each text belongs to
    Returns
    final_preds: list of final predictions (argmax of class 0 and 1 for each text)
    true_labels: Gold standard labels for comparison
    """

    all_model_preds = {model: pd.read_csv(model_pred_paths[model]) for model in models}
    test_length = len(all_model_preds["roberta"].index)
    true_labels = all_model_preds["roberta"]["label"].values

    combined_preds = np.zeros((test_length, 2))
    for model, preds in all_model_preds.items():
        weight = model_vote_weighting[model] if weighted else 1

        if binary_input:
            preds = preds["pred_label"].values
            # Sum up all preds for both classes and weight the preds based on models' f2 scores
            for i in range(test_length):
                combined_preds[i][preds[i]] += weight**2
        
        else:
            preds = preds["pred_val"].values
            for i in range(test_length):
                pred_val = preds[i]
                classification = 1 if pred_val > 0.5 else 0
                combined_preds[i][classification] += pred_val*weight**2 # **2 should punish weaker models more

    # Most voted for class wins classification
    final_preds = [np.argmax(class_scores) for class_scores in combined_preds]

    return final_preds, true_labels, combined_preds


In [464]:
models = list(model_vote_weighting.keys())

voted_labels, true_labels, combined_preds = majority_vote_label_pred(models, weighted=False, binary_input=True)

In [465]:
metrics = get_metrics(voted_labels, true_labels)
print(metrics)

{'tn': 2513, 'fp': 19, 'fn': 17, 'tp': 426, 'accuracy': 0.9878991596638655, 'precision': 0.9573033707865168, 'recall': 0.9616252821670429, 'specificity': 0.9924960505529226, 'f1_score': 0.9594594594594594, 'f05_score': 0.9581646423751686, 'f2_score': 0.9607577807848443, 'roc_auc': 0.9770606663599827}


In [466]:
def get_combined_score_for_preds(combined_preds):
    total_scores = []

    for combined_pred in combined_preds:
        total_scores.append(combined_pred[1]-combined_pred[0])

    return total_scores


In [467]:
def get_misclassified(pred_labels, true_labels):
    test_texts = pd.read_csv(Path(os.path.abspath("")).parents[1] / "dataset_creation/data/train_test/new/test_sliced_stair_twitter_256.csv", sep="‎", quoting=QUOTE_NONE)

    indexes = []
    misclassified = []
    preds = []
    for idx, (pred, true) in enumerate(zip(pred_labels, true_labels)):
        if int(pred) != int(true):
            indexes.append(idx)
            misclassified.append(str(test_texts["text"].iloc[idx]))
            preds.append((pred, true))

    return indexes, misclassified, preds
    

In [470]:
indexes, misclassified, preds = get_misclassified(voted_labels, true_labels)

total_scores = get_combined_score_for_preds([combined_preds[idx] for idx in indexes])
total_scores_sorted_idx = np.argsort(total_scores)

for idx in total_scores_sorted_idx:
    print(f"idx {indexes[idx]} pred {preds[idx][0]} score {total_scores[idx]} true {preds[idx][1]}\n{misclassified[idx]}")

idx 9 pred 0 score -5.0 true 1
Yesssss!!!
idx 23 pred 0 score -3.0 true 1
I'm surprised most of America can dress their self.
idx 442 pred 0 score -3.0 true 1
:(
idx 91 pred 0 score -3.0 true 1
I'm getting over much of the problems I've had in the past, dacruz, the only reason I'm so worried about this whole thing is it seems to be rooted in truth. If a White Owl is a bad Omen, then I'd rather be prepared than unprepared.
idx 366 pred 0 score -3.0 true 1
Stickam Party!
idx 422 pred 0 score -1.0 true 1
Oh, dear... Is it wrong that I want to go to a hoedown now?
idx 408 pred 0 score -1.0 true 1
Tick tock NE...
idx 308 pred 0 score -1.0 true 1
Look up at the stars and you'll see the ones who have gone before you. A smile is always on their face. Why am I not in the stars?
idx 255 pred 0 score -1.0 true 1
I don't struggle with self worth. I struggle with patience. I have virtually zero patience for anyone.
idx 345 pred 0 score -1.0 true 1
matter, but it is simply not my fault. The Universi

  test_texts = pd.read_csv(Path(os.path.abspath("")).parents[1] / "dataset_creation/data/train_test/new/test_sliced_stair_twitter_256.csv", sep="‎", quoting=QUOTE_NONE)
