In [9]:
import torch, datasets, pandas
import numpy as np
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from safetensors.torch import load_file
from sklearn.metrics import f1_score, classification_report
from torch.utils.data import DataLoader
from torch.nn.utils.rnn import pad_sequence

from utils.config import ID_TO_LABEL, LABEL_TO_ID

In [10]:
def collate_fn(batch):
    texts = [item["text"] for item in batch]
    labels = [torch.tensor(item["labels"], dtype=torch.float32) for item in batch]

    # Pad labels to the maximum label length in the dataset
    labels_padded = pad_sequence(labels, batch_first=True)

    return {"text": texts, "labels": labels_padded}

def to_one_hot(indices, num_classes):
    # if num_classes == 2:
    #     return indices.long().numpy()
    one_hot = torch.zeros((indices.shape[0], num_classes))
    
    one_hot.scatter_(1, indices.long(), 1)
    return one_hot.long().numpy()

def binarize(preds):
    # 1 is unfair, 0 is fair. Assuming the first element corresponds to the "fair" category
    return [1 if el[0] != 1 else 0 for el in preds]
    
def run_metrics(preds, labels, to_binarize, label_to_id):
    if to_binarize:
        labels = binarize(labels)
        preds = binarize(preds)

    micro_f1 = f1_score(labels, preds, average="micro", zero_division=0)
    macro_f1 = f1_score(labels, preds, average="macro", zero_division=0)

    # Display results
    print(f"Micro F1 Score: {micro_f1:.4f}")
    print(f"Macro F1 Score: {macro_f1:.4f}")
    report = classification_report(labels, preds, zero_division=0, output_dict=True)
    # report = classification_report(labels, preds, zero_division=0, target_names=label_to_id, output_dict=True)
    df = pandas.DataFrame(report).transpose()
    return df

In [14]:
def evaluate_model(model_name, seed, num_labels, is_binary, use_heads):
    if use_heads and not is_binary:
        print("Use a binary model for the multi-head approach")
        return 
    if is_binary:
        seed = str(seed)+"_b"
    safetensors_path = f"logs/unfair_tos/{model_name}/seed_{seed}/model.safetensors"

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Initialize the model and tokenizer
    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)
    model.load_state_dict(load_file(safetensors_path))
    model.to(device)
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    if use_heads:
        head_model_name = "prajjwal1/bert-mini"
        head_models = dict.fromkeys(LABEL_TO_ID.keys())
        for cat in head_models.keys():
            if cat != "fair":
                head = AutoModelForSequenceClassification.from_pretrained(head_model_name, num_labels=2)
                safetensors_path = f"logs/unfair_tos_head/{head_model_name}/{cat}/seed_1/model.safetensors"
                head.load_state_dict(load_file(safetensors_path))
                head.to(device)
                head_models[cat] = head


    # Load the test dataset
    full_dataset = datasets.load_from_disk("./142_dataset/tos.hf/")

    test_dataset = full_dataset["test"]

    test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, collate_fn=collate_fn)

    model.eval()
    all_preds = []
    all_labels = []
    all_logits = []

    with torch.no_grad():
        for batch in test_loader:
            inputs = tokenizer(batch['text'], padding=True, truncation=True, return_tensors="pt").to(device)
            # outputs = model(**batch["inputs"].to(device))
            outputs = model(**inputs)
            all_logits.extend(outputs.logits)
            preds = (torch.sigmoid(outputs.logits) > 0.5).int()

            if is_binary and not use_heads:
                # aggregate unfair categories (e.g. [2, 5] --> [0, 1])
                batch["labels"] = (batch["labels"].sum(dim=1) > 0).float().unsqueeze(1)
            elif is_binary and use_heads:
                print(preds)
                pass

            all_preds.extend(preds.cpu().long().numpy())
            all_labels.extend(to_one_hot(batch["labels"], num_labels))

    return all_preds, all_labels, all_logits

In [11]:
model_name = "nlpaueb/legal-bert-base-uncased"  # (2, 2)
# model_name = "microsoft/deberta-base" # (5, 4)
# model_name = "zlucia/custom-legalbert"  # (1, 2)
# model_name = "roberta-large"
# model_name = "allenai/longformer-base-4096"
seed_multi = 2
seed_bin = 2

In [17]:
# Multi-head model
preds, labels, logits = evaluate_model(model_name, seed_bin, len(LABEL_TO_ID), is_binary=True, use_heads=True) 

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at nlpaueb/legal-bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


RuntimeError: Error(s) in loading state_dict for BertForSequenceClassification:
	size mismatch for classifier.weight: copying a param with shape torch.Size([2, 768]) from checkpoint, the shape in current model is torch.Size([10, 768]).
	size mismatch for classifier.bias: copying a param with shape torch.Size([2]) from checkpoint, the shape in current model is torch.Size([10]).

In [None]:
# Binary model
preds, labels, logits = evaluate_model(model_name, seed_bin, 2, is_binary=True, use_heads=True) 

In [26]:
run_metrics(preds, labels, False, {"fair":0, "unfair":1})

Micro F1 Score: 0.9617
Macro F1 Score: 0.8792


Unnamed: 0,precision,recall,f1-score,support
0,0.97619,0.98185,0.979012,3967.0
1,0.803815,0.75641,0.779392,390.0
micro avg,0.961671,0.961671,0.961671,4357.0
macro avg,0.890003,0.86913,0.879202,4357.0
weighted avg,0.960761,0.961671,0.961144,4357.0
samples avg,0.961671,0.961671,0.961671,4357.0


In [34]:
# Multi label model
preds, labels = evaluate_model(model_name, seed_multi, len(LABEL_TO_ID), is_binary=False)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at nlpaueb/legal-bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [29]:
# binarize label at test time
df_bin = run_metrics(preds, labels, True, {"fair":0, "unfair":1})
df_bin

Micro F1 Score: 0.9617
Macro F1 Score: 0.8762


Unnamed: 0,precision,recall,f1-score,support
fair,0.977227,0.980909,0.979065,3981.0
unfair,0.789474,0.757979,0.773406,376.0
accuracy,0.961671,0.961671,0.961671,0.961671
macro avg,0.88335,0.869444,0.876235,4357.0
weighted avg,0.961024,0.961671,0.961317,4357.0


In [30]:
df_classes = run_metrics(preds, labels, False, LABEL_TO_ID)
df_classes

Micro F1 Score: 0.9575
Macro F1 Score: 0.7779


Unnamed: 0,precision,recall,f1-score,support
fair,0.977227,0.980909,0.979065,3981.0
a,0.818182,0.5625,0.666667,16.0
ch,0.732394,0.8,0.764706,65.0
cr,0.677419,0.724138,0.7,29.0
j,0.956522,0.846154,0.897959,26.0
law,0.954545,0.913043,0.933333,23.0
ltd,0.772358,0.664336,0.714286,143.0
ter,0.692308,0.782609,0.734694,69.0
use,0.926829,0.730769,0.817204,52.0
pinc,0.75,0.461538,0.571429,13.0


In [35]:
# Write false positive and false negatives to files
full_dataset = datasets.load_from_disk("./142_dataset/tos.hf/")
test_dataset = full_dataset["test"]

false_positive_texts = []
false_pos_classes = {v: [] for k,v in ID_TO_LABEL.items()}
false_neg_classes = {v: [] for k,v in ID_TO_LABEL.items()}
for i, (label, pred, data) in enumerate(zip(labels, preds, test_dataset)):
    for j in range(len(label)):
        # False positive
        if pred[j] == 1 and label[j] == 0: 
            true_labels = [ID_TO_LABEL[el] for el in np.where(label == 1)[0]]
            false_pos_classes[ID_TO_LABEL[j]].append((data['text'], true_labels))
        
        # False negative
        if pred[j] == 0 and label[j] == 1:
            false_labels = [ID_TO_LABEL[el] for el in np.where(pred == 1)[0]]
            false_neg_classes[ID_TO_LABEL[j]].append((data['text'], false_labels))   

for cls in false_pos_classes.keys():
    with open(f"./out/fp_{cls}.txt", "w") as f:
        for el in false_pos_classes[cls]:
            f.write(f"clause: {el[0]}\t true: {el[1]}\n")
            # print(el)
    # print(false_pos_classes[cls])


for cls in false_neg_classes.keys():
    with open(f"./out/fn_{cls}.txt", "w") as f:
        for el in false_neg_classes[cls]:
            f.write(f"clause: {el[0]}\t prediction: {el[1]}\n")