In [1]:
from datasets import load_dataset

import torch
from transformers import AutoTokenizer, AutoModel

import numpy as np
from scipy import spatial
from sklearn.metrics import classification_report

In [2]:
# "CaseHOLD" Dataset in "LexGLUE" Benchmark
# GitHub: https://github.com/coastalcph/lex-glue
dataset=load_dataset("lex_glue", "case_hold")

Found cached dataset lex_glue (/root/.cache/huggingface/datasets/lex_glue/case_hold/1.0.0/8a66420941bf6e77a7ddd4da4d3bfb7ba88ef48c1d55302a568ac650a095ca3a)


  0%|          | 0/3 [00:00<?, ?it/s]

In [3]:
# Example of "CaseHOLD" Dataset
print("context\n---")
print(dataset["train"][0]["context"])
print("===")
print("endings\n---")
for ending in dataset["train"][0]["endings"]:
    print(ending)
print("===")
print("label", dataset["train"][0]["label"])

context
---
Drapeau’s cohorts, the cohort would be a “victim” of making the bomb. Further, firebombs are inherently dangerous. There is no peaceful purpose for making a bomb. Felony offenses that involve explosives qualify as “violent crimes” for purposes of enhancing the sentences of career offenders. See 18 U.S.C. § 924(e)(2)(B)(ii) (defining a “violent felony” as: “any crime punishable by imprisonment for a term exceeding one year ... that ... involves use of explosives”). Courts have found possession of a'bomb to be a crime of violence based on the lack of a nonviolent purpose for a bomb and the fact that, by its very nature, there is a substantial risk that the bomb would be used against the person or property of another. See United States v. Newman, 125 F.3d 863 (10th Cir.1997) (unpublished) (<HOLDING>); United States v. Dodge, 846 F.Supp. 181,
===
endings
---
holding that possession of a pipe bomb is a crime of violence for purposes of 18 usc  3142f1
holding that bank robbery by

In [4]:
# Device
device=torch.device("cuda:0")

# Model: Official (Unsupervised) SimCSE
# GitHub: https://github.com/princeton-nlp/SimCSE
tokenizer=AutoTokenizer.from_pretrained("princeton-nlp/sup-simcse-roberta-large")
model=AutoModel.from_pretrained("princeton-nlp/sup-simcse-roberta-large").to(device)

In [5]:
# Evaluate Model on "CaseHOLD" Test Set
model.eval()

preds=[]
labels=[]
with torch.no_grad():
    for index, data in enumerate(dataset["test"]):
        if (index+1)%900==0:
            print(index+1)
            
        # Context
        enc_context=tokenizer.encode(data["context"])
        embd_context=model(torch.tensor([enc_context]).to(device)).last_hidden_state[:,0,:]
        
        pred=-1
        sim_max=-1
        # Endings
        for idx, ending in enumerate(data["endings"]):
            enc_ending=tokenizer.encode(ending)
            embd_ending=model(torch.tensor([enc_ending]).to(device)).last_hidden_state[:,0,:]
            
            sim=1-spatial.distance.cosine(
                np.array(embd_context.detach().cpu()),
                np.array(embd_ending.detach().cpu())
            )
            if sim>sim_max:
                pred=idx
                sim_max=sim
                
        preds.append(pred)
        labels.append(data["label"])

900
1800
2700
3600


In [6]:
# Results
print(classification_report(labels, preds))

              precision    recall  f1-score   support

           0       0.49      0.47      0.48       721
           1       0.51      0.52      0.52       725
           2       0.50      0.51      0.51       727
           3       0.52      0.53      0.53       713
           4       0.51      0.50      0.50       714

    accuracy                           0.51      3600
   macro avg       0.51      0.51      0.51      3600
weighted avg       0.51      0.51      0.51      3600

