In [2]:
import pandas as pd
import json
import os
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn.metrics import confusion_matrix, classification_report, multilabel_confusion_matrix
from sklearn.preprocessing import MultiLabelBinarizer
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from transformers import (
    AutoTokenizer,
    DataCollatorWithPadding,
)
from typing import List, Dict
import seaborn as sns
import matplotlib.pyplot as plt

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define the dataset class

# Load data from json file
with open('../reports/generic_epochs_200_train_size_full.json') as f:
    data = json.load(f)

dfs = []
for k, v in data.items():
    valid_metrics = v['valid']
    valid_metrics['dataset'] = 'valid'
    valid_metrics['fold'] = int(k) + 1
    dfs.append(pd.DataFrame([valid_metrics]))
    
    test_metrics = v['test']
    test_metrics['dataset'] = 'test'
    test_metrics['fold'] = int(k) + 1
    dfs.append(pd.DataFrame([test_metrics]))

# Concatenate all dataframes together
df = pd.concat(dfs, ignore_index=True)

# Rename columns
df.columns = df.columns.str.replace('eval_', '')
df = df.rename(columns={'epoch': 'num_epochs'})

# Print the final dataframe
print(df)

  from .autonotebook import tqdm as notebook_tqdm


       loss  accuracy  micro_precision  micro_recall  micro_f1  \
0  0.131072  0.966333         0.825000      0.794444  0.809434   
1  0.140527  0.964000         0.803671      0.774020  0.788567   
2  0.129314  0.963500         0.823242      0.766364  0.793785   
3  0.127321  0.964267         0.814297      0.761722  0.787133   
4  0.110308  0.965333         0.851293      0.739700  0.791583   
5  0.117867  0.962333         0.825088      0.717909  0.767776   
6  0.150143  0.964667         0.808491      0.794991  0.801684   
7  0.152349  0.963667         0.798107      0.777863  0.787855   
8  0.131018  0.962417         0.809000      0.756782  0.782020   
9  0.129022  0.963200         0.804719      0.760184  0.781818   

   macro_precision  macro_recall  macro_f1  runtime  samples_per_second  \
0         0.783455      0.711673  0.739942    7.138             112.076   
1         0.711862      0.685473  0.688094    8.360             119.617   
2         0.771415      0.666642  0.705737    7.

In [2]:
df

Unnamed: 0,loss,accuracy,micro_precision,micro_recall,micro_f1,macro_precision,macro_recall,macro_f1,runtime,samples_per_second,steps_per_second,num_epochs,dataset,fold
0,0.131072,0.966333,0.825,0.794444,0.809434,0.783455,0.711673,0.739942,7.138,112.076,28.019,9.0,valid,1
1,0.140527,0.964,0.803671,0.77402,0.788567,0.711862,0.685473,0.688094,8.36,119.617,29.904,9.0,test,1
2,0.129314,0.9635,0.823242,0.766364,0.793785,0.771415,0.666642,0.705737,7.235,110.574,27.643,7.0,valid,2
3,0.127321,0.964267,0.814297,0.761722,0.787133,0.729128,0.672005,0.694474,9.069,110.266,27.566,7.0,test,2
4,0.110308,0.965333,0.851293,0.7397,0.791583,0.839969,0.584434,0.655538,6.944,115.207,28.802,5.0,valid,3
5,0.117867,0.962333,0.825088,0.717909,0.767776,0.787322,0.581559,0.636694,8.001,124.984,31.246,5.0,test,3
6,0.150143,0.964667,0.808491,0.794991,0.801684,0.731478,0.678978,0.697062,6.679,119.778,29.945,10.5,valid,4
7,0.152349,0.963667,0.798107,0.777863,0.787855,0.703339,0.7135,0.70218,7.999,125.016,31.254,10.5,test,4
8,0.131018,0.962417,0.809,0.756782,0.78202,0.75818,0.643461,0.671983,6.788,117.855,29.464,7.0,valid,5
9,0.129022,0.9632,0.804719,0.760184,0.781818,0.766275,0.631091,0.682364,8.396,119.104,29.776,7.0,test,5


In [4]:
def model_summary(model):
    print("Model summary:")
    print("---------------------------")
    total_params = 0
    for name, param in model.named_parameters():
        param_count = param.numel()
        total_params += param_count
    print(f"Total parameters: {total_params}")
    
"""def print_report(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    report = classification_report(y_true, y_pred, target_names=categories)
    print(report)
    sns.heatmap(cm, annot=True, xticklabels=categories, yticklabels=categories, fmt='g')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.show()"""

class TweetDataset(Dataset):
    def __init__(self, x, y, mlb, tokenizer):
        self.x = x
        self.y = y
        self.mlb = mlb
        self.tokenizer = tokenizer
        self.encoded_tweets = self.preprocess_text(self.x)
    
    def preprocess_text(self, text):
        return self.tokenizer(text, return_attention_mask=True, return_tensors='pt', padding=True)
        
    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        label = self.y[idx]
        return {'input_ids': self.encoded_tweets['input_ids'][idx],
                'attention_mask': self.encoded_tweets['attention_mask'][idx],
                'label': torch.tensor(label, dtype=torch.float32)}
        
class MultiLabelDataCollator(DataCollatorWithPadding):
    def __init__(self, tokenizer):
        super().__init__(tokenizer)

    def __call__(self, features: List[Dict[str, torch.Tensor]]):
        batch = super().__call__(features)
        batch["labels"] = torch.stack([feature["label"] for feature in features])
        return batch
    
def get_classification_report(data_loader, model, target_names, label_names):
    labels = []
    predictions = []
    for batch in data_loader:
        batch_inputs = {'input_ids': batch['input_ids'].to(device),
                        'attention_mask': batch['attention_mask'].to(device)}
        with torch.no_grad():
            logits = model(**batch_inputs).logits
        batch_predictions = (logits > 0.5).detach().cpu().numpy().astype(int)
        predictions.append(batch_predictions)
        labels.append(batch['labels'].detach().cpu().numpy().astype(int))

    predictions = np.concatenate(predictions, axis=0)
    labels = np.concatenate(labels, axis = 0)

    #cm = multilabel_confusion_matrix(labels, predictions)
    dict_report = classification_report(labels, predictions, target_names=target_names, labels=label_names, zero_division=0, output_dict=True)
    report = classification_report(labels, predictions, target_names=target_names, labels=label_names, zero_division=0)
    return dict_report, report
    
def calculate_average_report(reports):
    avg_report = {}
    for report in reports:
        for key, scores in report.items():
            if key not in avg_report:
                avg_report[key] = {}
                for score_key, score_value in scores.items():
                    avg_report[key][score_key] = score_value
            else:
                for score_key, score_value in scores.items():
                    avg_report[key][score_key] += score_value

    num_reports = len(reports)
    for key, scores in avg_report.items():
        for score_key in scores:
            avg_report[key][score_key] /= num_reports

    return avg_report

def average_report_to_dataframe(average_report):
    data = {
        "precision": [],
        "recall": [],
        "f1-score": [],
        "support": []
    }
    index = []

    for class_name, metrics in average_report.items():
        if class_name == 'accuracy':
            continue

        index.append(class_name)
        data["precision"].append(metrics["precision"])
        data["recall"].append(metrics["recall"])
        data["f1-score"].append(metrics["f1-score"])
        data["support"].append(metrics["support"])

    return pd.DataFrame(data, index=index)

def calculate_metrics(task):
    k = 5
    
    val_classification_reports = []
    test_classification_reports = []

    # Loop over each fold and load the corresponding model
    for fold in range(k):
        model_path = f"../models/{task}_epochs_200_train_size_full_fold_{fold}"
        # find the latest checkpoint file
        #checkpoint_files = [f for f in os.listdir(model_path) if f.startswith("checkpoint")]
        latest_checkpoint = os.path.join(model_path, "")  # use "" for models that were manually saved after training. use sorted(checkpoint_files)[0] for the first automatically saved checkpoint 
        print(latest_checkpoint)
        
        # Load the model and tokenizer
        model = AutoModelForSequenceClassification.from_pretrained(latest_checkpoint)
        model.to(device)
        tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-large")

        filename = f"../data/labeled_data/{task}_test_{fold}.json"
        with open(filename) as f:
            data = json.load(f)
        train_df = pd.DataFrame(data["train"])
        val_df = pd.DataFrame(data["valid"])
        test_df = pd.DataFrame(data["test"])
        
        train_annotations = train_df["annotations"].tolist()
        classes = set()
        for annotation in train_annotations:
            classes.update(annotation)
        classes = sorted(list(classes))
        
        checkpoint = torch.load(os.path.join(model_path, "pytorch_model.bin"))
        model.load_state_dict(checkpoint)
        
        mlb = MultiLabelBinarizer(classes=classes)
        
        train_labels = mlb.fit_transform(train_df["annotations"])
        val_labels = mlb.transform(val_df["annotations"])
        test_labels = mlb.transform(test_df["annotations"])
        
        train_dataset = TweetDataset(train_df['text'].to_list(), torch.tensor(train_labels), mlb, tokenizer)
        val_dataset = TweetDataset(val_df['text'].to_list(), torch.tensor(val_labels), mlb, tokenizer)
        test_dataset = TweetDataset(test_df['text'].to_list(), torch.tensor(test_labels), mlb, tokenizer)
        
        val_loader = torch.utils.data.DataLoader(
            val_dataset, batch_size=4, shuffle=False, collate_fn=MultiLabelDataCollator(tokenizer)
        )
        test_loader = torch.utils.data.DataLoader(
            test_dataset, batch_size=4, shuffle=False, collate_fn=MultiLabelDataCollator(tokenizer)
        )
        
        model.eval()
        val_report_dict, val_report = get_classification_report(val_loader, model, classes, range(len(classes)))
        test_report_dict, test_report = get_classification_report(test_loader, model, classes, range(len(classes)))
        val_classification_reports.append(val_report_dict)
        test_classification_reports.append(test_report_dict)

    val_average_report = calculate_average_report(val_classification_reports)
    test_average_report = calculate_average_report(test_classification_reports)
    val_average_report_df = average_report_to_dataframe(val_average_report)
    test_average_report_df = average_report_to_dataframe(test_average_report)
    print("\nAverage Validation Classification Report In DataFrame Format:")
    print(val_average_report_df) 
    print("\nAverage Test Classification Report In DataFrame Format:")
    print(test_average_report_df) 
    return val_average_report_df, test_average_report_df

generic_val_average_report_df, generic_test_average_report_df = calculate_metrics("generic")
GRU_202012_val_average_report_df, GRU_202012_test_average_report_df = calculate_metrics("GRU_202012")
IRA_202012_val_average_report_df, IRA_202012_test_average_report_df = calculate_metrics("IRA_202012")
REA_0621_val_average_report_df, REA_0621_test_average_report_df = calculate_metrics("REA_0621")
UGANDA_0621_val_average_report_df, UGANDA_0621_test_average_report_df = calculate_metrics("UGANDA_0621")
VENEZUELA_201901_2_val_average_report_df, VENEZUELA_201901_2_test_average_report_df = calculate_metrics("VENEZUELA_201901_2")

../models/generic_epochs_200_train_size_full_fold_0\


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/generic_epochs_200_train_size_full_fold_1\


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/generic_epochs_200_train_size_full_fold_2\


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/generic_epochs_200_train_size_full_fold_3\


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/generic_epochs_200_train_size_full_fold_4\


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}



Average Validation Classification Report In DataFrame Format:
                                    precision    recall  f1-score  support
Conspiracy Theory                    0.757218  0.405258  0.523622     50.8
Education                            0.781645  0.518074  0.611675     12.6
Election Campaign                    0.816610  0.723478  0.764618     26.6
Environment                          0.767778  0.623701  0.670835     11.6
Government/Public                    0.765498  0.835984  0.798719    249.6
Health                               0.828771  0.809300  0.814534     42.8
Immigration/Integration              0.833065  0.705780  0.758313     40.2
Justice/Crime                        0.807952  0.783177  0.790424    114.4
Labor/Employment                     0.802183  0.589868  0.658678     19.4
Macroeconomics/Economic Regulation   0.756704  0.688306  0.717625     50.0
Media/Journalism                     0.750921  0.677937  0.709312     36.8
Others                               

  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/GRU_202012_epochs_200_train_size_full_fold_1\


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/GRU_202012_epochs_200_train_size_full_fold_2\


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/GRU_202012_epochs_200_train_size_full_fold_3\


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/GRU_202012_epochs_200_train_size_full_fold_4\


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}



Average Validation Classification Report In DataFrame Format:
                                    precision    recall  f1-score  support
Conspiracy Theory                    0.487619  0.185714  0.262914     27.6
Education                            0.878788  0.325315  0.443636     13.4
Election Campaign                    0.842761  0.718005  0.772715     32.4
Environment                          0.803077  0.545641  0.635353     13.4
Government/Public                    0.783813  0.831081  0.805855    285.0
Health                               0.862912  0.676321  0.757540     41.0
Immigration/Integration              0.761587  0.682992  0.716051     30.4
Justice/Crime                        0.827295  0.829153  0.826934    133.8
Labor/Employment                     0.738079  0.544762  0.620644     22.0
Macroeconomics/Economic Regulation   0.755060  0.684091  0.711724     58.4
Media/Journalism                     0.741435  0.732045  0.732626     40.4
Others                               

  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/IRA_202012_epochs_200_train_size_full_fold_1\


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/IRA_202012_epochs_200_train_size_full_fold_2\


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/IRA_202012_epochs_200_train_size_full_fold_3\


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/IRA_202012_epochs_200_train_size_full_fold_4\


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}



Average Validation Classification Report In DataFrame Format:
                                    precision    recall  f1-score  support
Conspiracy Theory                    0.734867  0.554409  0.629351     53.0
Education                            0.655000  0.465812  0.542857     12.6
Election Campaign                    0.813182  0.669468  0.732557     24.6
Environment                          0.643810  0.463654  0.519456     11.2
Government/Public                    0.801787  0.815791  0.807746    219.8
Health                               0.852431  0.760095  0.801660     41.2
Immigration/Integration              0.753346  0.741867  0.741841     39.8
Justice/Crime                        0.851844  0.824038  0.836712    126.8
Labor/Employment                     0.511420  0.364228  0.414439     15.2
Macroeconomics/Economic Regulation   0.757594  0.529776  0.613402     25.0
Media/Journalism                     0.880761  0.640455  0.728272     26.2
Others                               

  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/REA_0621_epochs_200_train_size_full_fold_1\


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/REA_0621_epochs_200_train_size_full_fold_2\


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/REA_0621_epochs_200_train_size_full_fold_3\


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/REA_0621_epochs_200_train_size_full_fold_4\


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}



Average Validation Classification Report In DataFrame Format:
                                    precision    recall  f1-score  support
Conspiracy Theory                    0.709593  0.469116  0.550600     58.4
Education                            0.783333  0.361319  0.479754     10.4
Election Campaign                    0.860476  0.716264  0.777278     25.6
Environment                          0.780952  0.430000  0.509762      7.8
Government/Public                    0.779164  0.835641  0.806027    264.6
Health                               0.823932  0.804324  0.811148     38.0
Immigration/Integration              0.789080  0.730749  0.757779     45.0
Justice/Crime                        0.712772  0.622174  0.655140     61.2
Labor/Employment                     0.805848  0.555071  0.622296     19.0
Macroeconomics/Economic Regulation   0.815025  0.641988  0.715637     52.6
Media/Journalism                     0.808596  0.757246  0.780849     43.2
Others                               

  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/UGANDA_0621_epochs_200_train_size_full_fold_1\


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/UGANDA_0621_epochs_200_train_size_full_fold_2\


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/UGANDA_0621_epochs_200_train_size_full_fold_3\


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/UGANDA_0621_epochs_200_train_size_full_fold_4\


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}



Average Validation Classification Report In DataFrame Format:
                                    precision    recall  f1-score  support
Conspiracy Theory                    0.716206  0.415223  0.518381     57.6
Education                            0.753788  0.443450  0.529921     12.2
Election Campaign                    0.850889  0.769755  0.801767     26.6
Environment                          0.821275  0.506161  0.607821     13.2
Government/Public                    0.806748  0.814413  0.810417    276.0
Health                               0.853277  0.785185  0.816062     42.4
Immigration/Integration              0.800115  0.746656  0.771080     46.6
Justice/Crime                        0.792340  0.816158  0.802951    137.2
Labor/Employment                     0.736447  0.584028  0.648187     21.4
Macroeconomics/Economic Regulation   0.816876  0.684122  0.741638     58.6
Media/Journalism                     0.785145  0.713565  0.747036     44.4
Others                               

  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/VENEZUELA_201901_2_epochs_200_train_size_full_fold_1\


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/VENEZUELA_201901_2_epochs_200_train_size_full_fold_2\


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/VENEZUELA_201901_2_epochs_200_train_size_full_fold_3\


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}


../models/VENEZUELA_201901_2_epochs_200_train_size_full_fold_4\


  'label': torch.tensor(label, dtype=torch.float32)}
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  'label': torch.tensor(label, dtype=torch.float32)}



Average Validation Classification Report In DataFrame Format:
                                    precision    recall  f1-score  support
Conspiracy Theory                    0.690635  0.550844  0.608461     42.6
Education                            0.718462  0.546032  0.612222     12.2
Election Campaign                    0.816014  0.737045  0.773338     23.6
Environment                          0.801948  0.458095  0.572738     12.0
Government/Public                    0.727636  0.735022  0.729572    185.8
Health                               0.830982  0.770965  0.798733     45.4
Immigration/Integration              0.781185  0.696334  0.729669     27.8
Justice/Crime                        0.870626  0.817260  0.842467    108.2
Labor/Employment                     0.743788  0.580308  0.647341     22.4
Macroeconomics/Economic Regulation   0.768681  0.713211  0.735934     55.0
Media/Journalism                     0.725274  0.658272  0.686856     31.4
Others                               

In [5]:
import pandas as pd

dataframes = {
    "generic_val_average_report": generic_val_average_report_df,
    "generic_test_average_report": generic_test_average_report_df,
    "GRU_202012_val_average_report": GRU_202012_val_average_report_df,
    "GRU_202012_test_average_report": GRU_202012_test_average_report_df,
    "IRA_202012_val_average_report": IRA_202012_val_average_report_df,
    "IRA_202012_test_average_report": IRA_202012_test_average_report_df,
    "REA_0621_val_average_report": REA_0621_val_average_report_df,
    "REA_0621_test_average_report": REA_0621_test_average_report_df,
    "UGANDA_0621_val_average_report": UGANDA_0621_val_average_report_df,
    "UGANDA_0621_test_average_report": UGANDA_0621_test_average_report_df,
    "VENEZUELA_201901_2_val_average_report": VENEZUELA_201901_2_val_average_report_df,
    "VENEZUELA_201901_2_test_average_report": VENEZUELA_201901_2_test_average_report_df,
}

for name, df in dataframes.items():
    csv_filename = f"../reports/{name}.csv"
    df.to_csv(csv_filename, index=False)

# Macro Averages:

In [35]:
def extract_macro_avg_value(df):
    return df[df.index == "macro avg"]["f1-score"].values[0]

summary_data = []

for name, df in dataframes.items():
    micro_avg_value = round(extract_macro_avg_value(df), 2)
    train_data = name.split("_")[0]

    if "val" in name:
        validation_value = micro_avg_value
        test_value = None
    elif "test" in name:
        validation_value = None
        test_value = micro_avg_value

    test_data = train_data
    if train_data != "generic":
        train_data = "All but " + train_data

    summary_data.append({
        "Train Data": train_data,
        "Test Data": test_data,
        "Validation": validation_value,
        "Test": test_value,
    })

# Combine rows with the same "Train Data" and "Test Data" into one
macro_summary_df = pd.DataFrame(summary_data)
macro_summary_df = macro_summary_df.groupby(["Train Data", "Test Data"], as_index=False).first()

# Reorder columns
macro_summary_df = macro_summary_df[["Train Data", "Test Data", "Validation", "Test"]]
macro_summary_df = macro_summary_df.reindex([macro_summary_df.index[-1]] + list(macro_summary_df.index[:-1]))
macro_summary_df = macro_summary_df.reset_index(drop=True)

print(macro_summary_df)

          Train Data  Test Data  Validation  Test
0            generic    generic        0.71  0.69
1        All but GRU        GRU        0.64  0.43
2        All but IRA        IRA        0.68  0.55
3        All but REA        REA        0.68  0.55
4     All but UGANDA     UGANDA        0.69  0.46
5  All but VENEZUELA  VENEZUELA        0.69  0.62


# Micro Averages:

In [30]:
def extract_micro_avg_value(df):
    return df[df.index == "micro avg"]["f1-score"].values[0]

summary_data = []

for name, df in dataframes.items():
    micro_avg_value = round(extract_micro_avg_value(df), 2)
    train_data = name.split("_")[0]

    if "val" in name:
        validation_value = micro_avg_value
        test_value = None
    elif "test" in name:
        validation_value = None
        test_value = micro_avg_value

    test_data = train_data
    if train_data != "generic":
        train_data = "All but " + train_data

    summary_data.append({
        "Train Data": train_data,
        "Test Data": test_data,
        "Validation": validation_value,
        "Test": test_value,
    })

# Combine rows with the same "Train Data" and "Test Data" into one
micro_summary_df = pd.DataFrame(summary_data)
micro_summary_df = micro_summary_df.groupby(["Train Data", "Test Data"], as_index=False).first()

# Reorder columns
micro_summary_df = micro_summary_df[["Train Data", "Test Data", "Validation", "Test"]]
micro_summary_df = micro_summary_df.reindex([micro_summary_df.index[-1]] + list(micro_summary_df.index[:-1]))
micro_summary_df = micro_summary_df.reset_index(drop=True)

print(macro_summary_df)

          Train Data  Test Data  Validation  Test
0            generic    generic        0.80  0.79
1        All but GRU        GRU        0.77  0.77
2        All but IRA        IRA        0.82  0.61
3        All but REA        REA        0.80  0.71
4     All but UGANDA     UGANDA        0.80  0.76
5  All but VENEZUELA  VENEZUELA        0.80  0.74


In [7]:
def create_latex_table(val_average_report_df, test_average_report_df):
    train_data = "Generic"
    test_data = "Generic"
    
    val_micro_avg = round(val_average_report_df.loc["micro avg", "f1-score"], 2)
    test_micro_avg = round(test_average_report_df.loc["micro avg", "f1-score"], 2)
    
    data = [[train_data, test_data, val_micro_avg, test_micro_avg]]
    
    columns = pd.MultiIndex.from_tuples([
        ("Dataset", "Train Data"),
        ("Dataset", "Test Data"),
        ("BERTweet Large", "Validation"),
        ("BERTweet Large", "Test")
    ])
    
    f1_scores_df = pd.DataFrame(data, columns=columns)
    
    print("Human-readable table:")
    print(f1_scores_df.to_string(index=False))
    print("\n")
    
    latex_table = f1_scores_df.to_latex(index=False, bold_rows=True, multicolumn=True, multicolumn_format='c', decimal=',', column_format='|l|l|l|l|', header=True, escape=False)

    latex_table = latex_table.replace('\\toprule', '')
    latex_table = latex_table.replace('\\midrule', '')
    latex_table = latex_table.replace('\\bottomrule', '')

    # Resize the header and center it
    latex_table = latex_table.replace('{tabular}', '{tabular}{@{}c@{}}')
    latex_table = latex_table.replace('Dataset & BERTweet Large', '\\large{Dataset} & \\large{BERTweet Large}')
    
    latex_table = latex_table.replace("\\begin{tabular}", "\\begin{tabular}{|l|l|l|l|}\n\\hline \\hline")
    latex_table = latex_table.replace("\\end{tabular}", "\\\\ \\hline \\hline\n\\end{tabular}")

    # Add borders between the rows
    latex_table = latex_table.replace('\\\\\n', '\\\\ \\hline\n')

    return latex_table

latex_table = create_latex_table(val_average_report_df, test_average_report_df)
print("LaTeX table:")
print(latex_table)

Human-readable table:
   Dataset           BERTweet Large     
Train Data Test Data     Validation Test
   Generic   Generic            0.8 0.79


LaTeX table:
\begin{tabular}{|l|l|l|l|}
\hline \hline{@{}c@{}}{|l|l|l|l|}

   Dataset & \multicolumn{2}{c}{BERTweet Large} \\ \hline
Train Data & Test Data &     Validation & Test \\ \hline

   Generic &   Generic &            0,8 & 0,79 \\ \hline

\\ \hline \hline
\end{tabular}{@{}c@{}}



  latex_table = f1_scores_df.to_latex(index=False, bold_rows=True, multicolumn=True, multicolumn_format='c', decimal=',', column_format='|l|l|l|l|', header=True, escape=False)
