In [50]:
import pandas as pd
import json
import os
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn.metrics import confusion_matrix, classification_report, multilabel_confusion_matrix
from sklearn.preprocessing import MultiLabelBinarizer
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from transformers import (
    AutoTokenizer,
    DataCollatorWithPadding,
)
from typing import List, Dict
import seaborn as sns
import matplotlib.pyplot as plt

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define the dataset class

# Load data from json file
with open('../reports/VENEZUELA_201901_2_epochs_200_train_size_full.json') as f:
    data = json.load(f)

dfs = []
for k, v in data.items():
    valid_metrics = v['valid']
    valid_metrics['dataset'] = 'valid'
    valid_metrics['fold'] = int(k) + 1
    dfs.append(pd.DataFrame([valid_metrics]))
    
    test_metrics = v['test']
    test_metrics['dataset'] = 'test'
    test_metrics['fold'] = int(k) + 1
    dfs.append(pd.DataFrame([test_metrics]))

# Concatenate all dataframes together
df = pd.concat(dfs, ignore_index=True)

# Rename columns
df.columns = df.columns.str.replace('eval_', '')
df = df.rename(columns={'epoch': 'num_epochs'})

# Print the final dataframe
print(df)

       loss  accuracy  micro_precision  micro_recall  micro_f1  \
0  0.132741       0.0         0.823881      0.787072  0.805056   
1  0.184075       0.0         0.779221      0.703934  0.739666   
2  0.109380       0.0         0.830753      0.772553  0.800597   
3  0.153071       0.0         0.784553      0.665977  0.720418   
4  0.113060       0.0         0.816616      0.772031  0.793698   
5  0.157828       0.0         0.798791      0.638371  0.709628   
6  0.113176       0.0         0.824473      0.772556  0.797671   
7  0.152108       0.0         0.803723      0.655625  0.722159   
8  0.111680       0.0         0.827362      0.729187  0.775178   
9  0.154341       0.0         0.802740      0.606625  0.691038   

   macro_precision  macro_recall  macro_f1  runtime  samples_per_second  \
0         0.721881      0.660063  0.683039   3.3753             237.019   
1         0.707577      0.623297  0.637038   4.2092             237.577   
2         0.810919      0.593703  0.664724   3.3

In [53]:
df[df.dataset == "test"].mean()

  df[df.dataset == "test"].mean()


loss                    0.160285
accuracy                0.000000
micro_precision         0.793805
micro_recall            0.654106
micro_f1                0.716582
macro_precision         0.691064
macro_recall            0.525625
macro_f1                0.562369
runtime                 4.245200
samples_per_second    235.569600
steps_per_second       58.892200
num_epochs              5.800000
fold                    3.000000
dtype: float64

In [29]:
def model_summary(model):
    print("Model summary:")
    print("---------------------------")
    total_params = 0
    for name, param in model.named_parameters():
        param_count = param.numel()
        total_params += param_count
    print(f"Total parameters: {total_params}")
    
"""def print_report(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    report = classification_report(y_true, y_pred, target_names=categories)
    print(report)
    sns.heatmap(cm, annot=True, xticklabels=categories, yticklabels=categories, fmt='g')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.show()"""

class TweetDataset(Dataset):
    def __init__(self, x, y, mlb, tokenizer):
        self.x = x
        self.y = y
        self.mlb = mlb
        self.tokenizer = tokenizer
        self.encoded_tweets = self.preprocess_text(self.x)
    
    def preprocess_text(self, text):
        return self.tokenizer(text, return_attention_mask=True, return_tensors='pt', padding=True)
        
    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        label = self.y[idx]
        return {'input_ids': self.encoded_tweets['input_ids'][idx],
                'attention_mask': self.encoded_tweets['attention_mask'][idx],
                'label': torch.tensor(label, dtype=torch.float32)}
        
class MultiLabelDataCollator(DataCollatorWithPadding):
    def __init__(self, tokenizer):
        super().__init__(tokenizer)

    def __call__(self, features: List[Dict[str, torch.Tensor]]):
        batch = super().__call__(features)
        batch["labels"] = torch.stack([feature["label"] for feature in features])
        return batch
    
def get_classification_report(data_loader, model, target_names, label_names):
    labels = []
    predictions = []
    for batch in data_loader:
        batch_inputs = {'input_ids': batch['input_ids'].to(device),
                        'attention_mask': batch['attention_mask'].to(device)}
        with torch.no_grad():
            logits = model(**batch_inputs).logits
        sigmoid = torch.nn.Sigmoid()
        probs = sigmoid(torch.Tensor(logits))
        batch_predictions = (probs >= 0.5).detach().cpu().numpy().astype(int)
        predictions.append(batch_predictions)
        labels.append(batch['labels'].detach().cpu().numpy().astype(int))

    predictions = np.concatenate(predictions, axis=0)
    labels = np.concatenate(labels, axis = 0)

    #cm = multilabel_confusion_matrix(labels, predictions)
    dict_report = classification_report(labels, predictions, target_names=target_names, labels=label_names, zero_division=0, output_dict=True)
    report = classification_report(labels, predictions, target_names=target_names, labels=label_names, zero_division=0)
    return dict_report, report
    
def calculate_average_report(reports):
    print(reports)
    print(len(reports))
    avg_report = {}
    for report in reports:
        for key, scores in report.items():
            if key not in avg_report:
                avg_report[key] = {}
                for score_key, score_value in scores.items():
                    avg_report[key][score_key] = score_value
            else:
                for score_key, score_value in scores.items():
                    avg_report[key][score_key] += score_value

    num_reports = len(reports)
    for key, scores in avg_report.items():
        for score_key in scores:
            avg_report[key][score_key] /= num_reports

    return avg_report

def average_report_to_dataframe(average_report):
    data = {
        "precision": [],
        "recall": [],
        "f1-score": [],
        "support": []
    }
    index = []

    for class_name, metrics in average_report.items():
        if class_name == 'accuracy':
            continue

        index.append(class_name)
        data["precision"].append(metrics["precision"])
        data["recall"].append(metrics["recall"])
        data["f1-score"].append(metrics["f1-score"])
        data["support"].append(metrics["support"])

    return pd.DataFrame(data, index=index)

def calculate_metrics(task):
    k = 5
    
    val_classification_reports = []
    test_classification_reports = []

    # Loop over each fold and load the corresponding model
    for fold in range(k):
        model_path = f"../models/{task}_epochs_200_train_size_full_fold_{fold}"
        # find the latest checkpoint file
        #checkpoint_files = [f for f in os.listdir(model_path) if f.startswith("checkpoint")]
        latest_checkpoint = os.path.join(model_path, "")  # use "" for models that were manually saved after training. use sorted(checkpoint_files)[0] for the first automatically saved checkpoint 
        print(latest_checkpoint)
        
        # Load the model and tokenizer
        model = AutoModelForSequenceClassification.from_pretrained(latest_checkpoint)
        model.to(device)
        tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-large")

        filename = f"../data/labeled_data/{task}_test_{fold}.json"
        with open(filename) as f:
            data = json.load(f)
        train_df = pd.DataFrame(data["train"])
        val_df = pd.DataFrame(data["valid"])
        test_df = pd.DataFrame(data["test"])
        
        train_annotations = train_df["annotations"].tolist()
        classes = set()
        for annotation in train_annotations:
            classes.update(annotation)
        classes = sorted(list(classes))
        
        checkpoint = torch.load(os.path.join(model_path, "pytorch_model.bin"))
        model.load_state_dict(checkpoint)
        
        mlb = MultiLabelBinarizer(classes=classes)
        
        train_labels = mlb.fit_transform(train_df["annotations"])
        val_labels = mlb.transform(val_df["annotations"])
        test_labels = mlb.transform(test_df["annotations"])
        
        train_dataset = TweetDataset(train_df['text'].to_list(), torch.tensor(train_labels), mlb, tokenizer)
        val_dataset = TweetDataset(val_df['text'].to_list(), torch.tensor(val_labels), mlb, tokenizer)
        test_dataset = TweetDataset(test_df['text'].to_list(), torch.tensor(test_labels), mlb, tokenizer)
        
        val_loader = torch.utils.data.DataLoader(
            val_dataset, batch_size=256, shuffle=False, collate_fn=MultiLabelDataCollator(tokenizer)
        )
        test_loader = torch.utils.data.DataLoader(
            test_dataset, batch_size=256, shuffle=False, collate_fn=MultiLabelDataCollator(tokenizer)
        )
        
        model.eval()
        val_report_dict, val_report = get_classification_report(val_loader, model, classes, range(len(classes)))
        test_report_dict, test_report = get_classification_report(test_loader, model, classes, range(len(classes)))
        val_classification_reports.append(val_report_dict)
        test_classification_reports.append(test_report_dict)

    val_average_report = calculate_average_report(val_classification_reports)
    test_average_report = calculate_average_report(test_classification_reports)
    val_average_report_df = average_report_to_dataframe(val_average_report)
    test_average_report_df = average_report_to_dataframe(test_average_report)
    print("\nAverage Validation Classification Report In DataFrame Format:")
    print(val_average_report_df) 
    print("\nAverage Test Classification Report In DataFrame Format:")
    print(test_average_report_df) 
    return val_average_report_df, test_average_report_df

generic_val_average_report_df, generic_test_average_report_df = calculate_metrics("generic")
GRU_202012_val_average_report_df, GRU_202012_test_average_report_df = calculate_metrics("GRU_202012")
IRA_202012_val_average_report_df, IRA_202012_test_average_report_df = calculate_metrics("IRA_202012")
REA_0621_val_average_report_df, REA_0621_test_average_report_df = calculate_metrics("REA_0621")
UGANDA_0621_val_average_report_df, UGANDA_0621_test_average_report_df = calculate_metrics("UGANDA_0621")
VENEZUELA_201901_2_val_average_report_df, VENEZUELA_201901_2_test_average_report_df = calculate_metrics("VENEZUELA_201901_2")

../models/generic_epochs_200_train_size_full_fold_0/


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/generic_epochs_200_train_size_full_fold_1/


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/generic_epochs_200_train_size_full_fold_2/


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/generic_epochs_200_train_size_full_fold_3/


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/generic_epochs_200_train_size_full_fold_4/


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


[{'Conspiracy Theory': {'precision': 0.6521739130434783, 'recall': 0.3191489361702128, 'f1-score': 0.4285714285714286, 'support': 47}, 'Education': {'precision': 0.875, 'recall': 0.5833333333333334, 'f1-score': 0.7000000000000001, 'support': 12}, 'Election Campaign': {'precision': 0.8076923076923077, 'recall': 0.8076923076923077, 'f1-score': 0.8076923076923077, 'support': 26}, 'Environment': {'precision': 1.0, 'recall': 0.5714285714285714, 'f1-score': 0.7272727272727273, 'support': 14}, 'Government/Public': {'precision': 0.7686832740213523, 'recall': 0.8571428571428571, 'f1-score': 0.8105065666041276, 'support': 252}, 'Health': {'precision': 0.8571428571428571, 'recall': 0.8, 'f1-score': 0.8275862068965518, 'support': 45}, 'Immigration/Integration': {'precision': 0.8275862068965517, 'recall': 0.6, 'f1-score': 0.6956521739130435, 'support': 40}, 'Justice/Crime': {'precision': 0.7669902912621359, 'recall': 0.7383177570093458, 'f1-score': 0.7523809523809523, 'support': 107}, 'Labor/Employ

  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/GRU_202012_epochs_200_train_size_full_fold_1/


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/GRU_202012_epochs_200_train_size_full_fold_2/


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/GRU_202012_epochs_200_train_size_full_fold_3/


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/GRU_202012_epochs_200_train_size_full_fold_4/


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


[{'Conspiracy Theory': {'precision': 0.6, 'recall': 0.375, 'f1-score': 0.4615384615384615, 'support': 32}, 'Education': {'precision': 0.8333333333333334, 'recall': 0.5, 'f1-score': 0.625, 'support': 10}, 'Election Campaign': {'precision': 0.8148148148148148, 'recall': 0.7857142857142857, 'f1-score': 0.7999999999999999, 'support': 28}, 'Environment': {'precision': 1.0, 'recall': 0.46153846153846156, 'f1-score': 0.631578947368421, 'support': 13}, 'Government/Public': {'precision': 0.8508064516129032, 'recall': 0.7535714285714286, 'f1-score': 0.7992424242424243, 'support': 280}, 'Health': {'precision': 1.0, 'recall': 0.574468085106383, 'f1-score': 0.7297297297297298, 'support': 47}, 'Immigration/Integration': {'precision': 0.6538461538461539, 'recall': 0.7083333333333334, 'f1-score': 0.68, 'support': 24}, 'Justice/Crime': {'precision': 0.7357142857142858, 'recall': 0.8442622950819673, 'f1-score': 0.7862595419847328, 'support': 122}, 'Labor/Employment': {'precision': 0.6111111111111112, 'r

  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/IRA_202012_epochs_200_train_size_full_fold_1/


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/IRA_202012_epochs_200_train_size_full_fold_2/


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/IRA_202012_epochs_200_train_size_full_fold_3/


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/IRA_202012_epochs_200_train_size_full_fold_4/


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


[{'Conspiracy Theory': {'precision': 0.8, 'recall': 0.4528301886792453, 'f1-score': 0.5783132530120482, 'support': 53}, 'Education': {'precision': 0.7272727272727273, 'recall': 0.4444444444444444, 'f1-score': 0.5517241379310345, 'support': 18}, 'Election Campaign': {'precision': 0.875, 'recall': 0.84, 'f1-score': 0.8571428571428572, 'support': 25}, 'Environment': {'precision': 1.0, 'recall': 0.29411764705882354, 'f1-score': 0.45454545454545453, 'support': 17}, 'Government/Public': {'precision': 0.7777777777777778, 'recall': 0.8064516129032258, 'f1-score': 0.7918552036199094, 'support': 217}, 'Health': {'precision': 0.8235294117647058, 'recall': 0.7368421052631579, 'f1-score': 0.7777777777777778, 'support': 38}, 'Immigration/Integration': {'precision': 0.7419354838709677, 'recall': 0.6388888888888888, 'f1-score': 0.6865671641791045, 'support': 36}, 'Justice/Crime': {'precision': 0.8888888888888888, 'recall': 0.8135593220338984, 'f1-score': 0.8495575221238938, 'support': 118}, 'Labor/Emp

  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/REA_0621_epochs_200_train_size_full_fold_1/


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/REA_0621_epochs_200_train_size_full_fold_2/


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/REA_0621_epochs_200_train_size_full_fold_3/


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/REA_0621_epochs_200_train_size_full_fold_4/


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


[{'Conspiracy Theory': {'precision': 0.7906976744186046, 'recall': 0.5666666666666667, 'f1-score': 0.6601941747572816, 'support': 60}, 'Education': {'precision': 0.5, 'recall': 0.2, 'f1-score': 0.28571428571428575, 'support': 5}, 'Election Campaign': {'precision': 0.7142857142857143, 'recall': 0.6896551724137931, 'f1-score': 0.7017543859649122, 'support': 29}, 'Environment': {'precision': 0.5, 'recall': 0.2, 'f1-score': 0.28571428571428575, 'support': 10}, 'Government/Public': {'precision': 0.8022388059701493, 'recall': 0.8174904942965779, 'f1-score': 0.8097928436911488, 'support': 263}, 'Health': {'precision': 0.8387096774193549, 'recall': 0.6341463414634146, 'f1-score': 0.7222222222222222, 'support': 41}, 'Immigration/Integration': {'precision': 0.9666666666666667, 'recall': 0.6304347826086957, 'f1-score': 0.763157894736842, 'support': 46}, 'Justice/Crime': {'precision': 0.6818181818181818, 'recall': 0.6428571428571429, 'f1-score': 0.6617647058823529, 'support': 70}, 'Labor/Employmen

  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/UGANDA_0621_epochs_200_train_size_full_fold_1/


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/UGANDA_0621_epochs_200_train_size_full_fold_2/


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/UGANDA_0621_epochs_200_train_size_full_fold_3/


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/UGANDA_0621_epochs_200_train_size_full_fold_4/


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


[{'Conspiracy Theory': {'precision': 0.7297297297297297, 'recall': 0.5, 'f1-score': 0.5934065934065933, 'support': 54}, 'Education': {'precision': 0.8181818181818182, 'recall': 0.6923076923076923, 'f1-score': 0.7500000000000001, 'support': 13}, 'Election Campaign': {'precision': 0.7037037037037037, 'recall': 0.8260869565217391, 'f1-score': 0.76, 'support': 23}, 'Environment': {'precision': 0.3333333333333333, 'recall': 0.5, 'f1-score': 0.4, 'support': 8}, 'Government/Public': {'precision': 0.7941176470588235, 'recall': 0.8586572438162544, 'f1-score': 0.8251273344651952, 'support': 283}, 'Health': {'precision': 0.9230769230769231, 'recall': 0.8571428571428571, 'f1-score': 0.888888888888889, 'support': 42}, 'Immigration/Integration': {'precision': 0.94, 'recall': 0.8103448275862069, 'f1-score': 0.8703703703703703, 'support': 58}, 'Justice/Crime': {'precision': 0.7686567164179104, 'recall': 0.8046875, 'f1-score': 0.7862595419847329, 'support': 128}, 'Labor/Employment': {'precision': 0.521

  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/VENEZUELA_201901_2_epochs_200_train_size_full_fold_1/


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/VENEZUELA_201901_2_epochs_200_train_size_full_fold_2/


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/VENEZUELA_201901_2_epochs_200_train_size_full_fold_3/


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/VENEZUELA_201901_2_epochs_200_train_size_full_fold_4/


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


[{'Conspiracy Theory': {'precision': 0.6216216216216216, 'recall': 0.5111111111111111, 'f1-score': 0.5609756097560976, 'support': 45}, 'Education': {'precision': 0.8571428571428571, 'recall': 0.5, 'f1-score': 0.631578947368421, 'support': 12}, 'Election Campaign': {'precision': 0.7241379310344828, 'recall': 0.84, 'f1-score': 0.7777777777777777, 'support': 25}, 'Environment': {'precision': 0.6666666666666666, 'recall': 0.5714285714285714, 'f1-score': 0.6153846153846153, 'support': 14}, 'Government/Public': {'precision': 0.7215909090909091, 'recall': 0.671957671957672, 'f1-score': 0.695890410958904, 'support': 189}, 'Health': {'precision': 0.851063829787234, 'recall': 0.7017543859649122, 'f1-score': 0.7692307692307693, 'support': 57}, 'Immigration/Integration': {'precision': 0.8095238095238095, 'recall': 0.6296296296296297, 'f1-score': 0.7083333333333334, 'support': 27}, 'Justice/Crime': {'precision': 0.803030303030303, 'recall': 0.8983050847457628, 'f1-score': 0.8480000000000001, 'suppo

In [30]:
import pandas as pd

dataframes = {
    "generic_val_average_report": generic_val_average_report_df,
    "generic_test_average_report": generic_test_average_report_df,
    "GRU_202012_val_average_report": GRU_202012_val_average_report_df,
    "GRU_202012_test_average_report": GRU_202012_test_average_report_df,
    "IRA_202012_val_average_report": IRA_202012_val_average_report_df,
    "IRA_202012_test_average_report": IRA_202012_test_average_report_df,
    "REA_0621_val_average_report": REA_0621_val_average_report_df,
    "REA_0621_test_average_report": REA_0621_test_average_report_df,
    "UGANDA_0621_val_average_report": UGANDA_0621_val_average_report_df,
    "UGANDA_0621_test_average_report": UGANDA_0621_test_average_report_df,
    "VENEZUELA_201901_2_val_average_report": VENEZUELA_201901_2_val_average_report_df,
    "VENEZUELA_201901_2_test_average_report": VENEZUELA_201901_2_test_average_report_df,
}

for name, df in dataframes.items():
    csv_filename = f"../reports/{name}_weight_decay.csv"
    df.to_csv(csv_filename, index=False)

# Macro Averages:

In [37]:
def extract_macro_avg_value(df):
    return df[df.index == "macro avg"]["f1-score"].values[0]

summary_data = []

for name, df in dataframes.items():
    micro_avg_value = round(extract_macro_avg_value(df), 2)
    train_data = name.split("_")[0]

    if "val" in name:
        validation_value = micro_avg_value
        test_value = None
    elif "test" in name:
        validation_value = None
        test_value = micro_avg_value

    test_data = train_data
    if train_data != "generic":
        train_data = "All but " + train_data

    summary_data.append({
        "Train Data": train_data,
        "Test Data": test_data,
        "Validation": validation_value,
        "Test": test_value,
    })

# Combine rows with the same "Train Data" and "Test Data" into one
macro_summary_df = pd.DataFrame(summary_data)
macro_summary_df = macro_summary_df.groupby(["Train Data", "Test Data"], as_index=False).first()

# Reorder columns
macro_summary_df = macro_summary_df[["Train Data", "Test Data", "Validation", "Test"]]
macro_summary_df = macro_summary_df.reindex([macro_summary_df.index[-1]] + list(macro_summary_df.index[:-1]))
macro_summary_df = macro_summary_df.reset_index(drop=True)

print(macro_summary_df)

          Train Data  Test Data  Validation  Test
0            generic    generic        0.69  0.68
1        All but GRU        GRU        0.68  0.42
2        All but IRA        IRA        0.70  0.56
3        All but REA        REA        0.66  0.54
4     All but UGANDA     UGANDA        0.69  0.48
5  All but VENEZUELA  VENEZUELA        0.65  0.55


# Micro Averages:

In [32]:
def extract_micro_avg_value(df):
    return df[df.index == "micro avg"]["f1-score"].values[0]

summary_data = []

for name, df in dataframes.items():
    micro_avg_value = round(extract_micro_avg_value(df), 2)
    train_data = name.split("_")[0]

    if "val" in name:
        validation_value = micro_avg_value
        test_value = None
    elif "test" in name:
        validation_value = None
        test_value = micro_avg_value

    test_data = train_data
    if train_data != "generic":
        train_data = "All but " + train_data

    summary_data.append({
        "Train Data": train_data,
        "Test Data": test_data,
        "Validation": validation_value,
        "Test": test_value,
    })

# Combine rows with the same "Train Data" and "Test Data" into one
micro_summary_df = pd.DataFrame(summary_data)
micro_summary_df = micro_summary_df.groupby(["Train Data", "Test Data"], as_index=False).first()

# Reorder columns
micro_summary_df = micro_summary_df[["Train Data", "Test Data", "Validation", "Test"]]
micro_summary_df = micro_summary_df.reindex([micro_summary_df.index[-1]] + list(micro_summary_df.index[:-1]))
micro_summary_df = micro_summary_df.reset_index(drop=True)

print(micro_summary_df)

          Train Data  Test Data  Validation  Test
0            generic    generic        0.80  0.78
1        All but GRU        GRU        0.78  0.77
2        All but IRA        IRA        0.81  0.63
3        All but REA        REA        0.79  0.72
4     All but UGANDA     UGANDA        0.80  0.76
5  All but VENEZUELA  VENEZUELA        0.79  0.70


In [7]:
def create_latex_table(val_average_report_df, test_average_report_df):
    train_data = "Generic"
    test_data = "Generic"
    
    val_micro_avg = round(val_average_report_df.loc["micro avg", "f1-score"], 2)
    test_micro_avg = round(test_average_report_df.loc["micro avg", "f1-score"], 2)
    
    data = [[train_data, test_data, val_micro_avg, test_micro_avg]]
    
    columns = pd.MultiIndex.from_tuples([
        ("Dataset", "Train Data"),
        ("Dataset", "Test Data"),
        ("BERTweet Large", "Validation"),
        ("BERTweet Large", "Test")
    ])
    
    f1_scores_df = pd.DataFrame(data, columns=columns)
    
    print("Human-readable table:")
    print(f1_scores_df.to_string(index=False))
    print("\n")
    
    latex_table = f1_scores_df.to_latex(index=False, bold_rows=True, multicolumn=True, multicolumn_format='c', decimal=',', column_format='|l|l|l|l|', header=True, escape=False)

    latex_table = latex_table.replace('\\toprule', '')
    latex_table = latex_table.replace('\\midrule', '')
    latex_table = latex_table.replace('\\bottomrule', '')

    # Resize the header and center it
    latex_table = latex_table.replace('{tabular}', '{tabular}{@{}c@{}}')
    latex_table = latex_table.replace('Dataset & BERTweet Large', '\\large{Dataset} & \\large{BERTweet Large}')
    
    latex_table = latex_table.replace("\\begin{tabular}", "\\begin{tabular}{|l|l|l|l|}\n\\hline \\hline")
    latex_table = latex_table.replace("\\end{tabular}", "\\\\ \\hline \\hline\n\\end{tabular}")

    # Add borders between the rows
    latex_table = latex_table.replace('\\\\\n', '\\\\ \\hline\n')

    return latex_table

latex_table = create_latex_table(val_average_report_df, test_average_report_df)
print("LaTeX table:")
print(latex_table)

Human-readable table:
   Dataset           BERTweet Large     
Train Data Test Data     Validation Test
   Generic   Generic            0.8 0.79


LaTeX table:
\begin{tabular}{|l|l|l|l|}
\hline \hline{@{}c@{}}{|l|l|l|l|}

   Dataset & \multicolumn{2}{c}{BERTweet Large} \\ \hline
Train Data & Test Data &     Validation & Test \\ \hline

   Generic &   Generic &            0,8 & 0,79 \\ \hline

\\ \hline \hline
\end{tabular}{@{}c@{}}



  latex_table = f1_scores_df.to_latex(index=False, bold_rows=True, multicolumn=True, multicolumn_format='c', decimal=',', column_format='|l|l|l|l|', header=True, escape=False)
