In [1]:
import spacy
from spacy.tokens import DocBin
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import json
# from datasets import load_dataset
from collections import defaultdict
from typing import List, Dict, Tuple
from datasets import Dataset, DatasetDict
import ast

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
categories = ['CASE_NUMBER', 'COURT', 'DATE', 'GPE', 'JUDGE', 'LAWYER', 'ORG', 'OTHER_PERSON', 'PETITIONER', 'PRECEDENT', 'PROVISION', 'RESPONDENT', 'STATUTE', 'WITNESS']

# Create label2id and id2label dictionaries
B_PREFIX = 'B-'
I_PREFIX = 'I-'
O_TAG = 'O'
label2id = {O_TAG: 0}
id2label = {0: O_TAG}
idx = 1
for category in categories:
    label2id[B_PREFIX + category] = idx
    id2label[idx] = B_PREFIX + category
    idx += 1
    label2id[I_PREFIX + category] = idx
    id2label[idx] = I_PREFIX + category
    idx += 1

### Creating data [don't run if already created]

In [None]:
train = pd.read_csv("./data/finetuning/train.csv")
dev = pd.read_csv("./data/finetuning/dev.csv")
test = pd.read_csv("./data/finetuning/test.csv")

In [4]:
test

Unnamed: 0,sentence,raw_entities,entities_dict,text
0,$~40 * In The High Court Of Delhi At New Delhi...,"{""CASE_NUMBER"": ""[]"", ""COURT"": ""['High Court O...","{'CASE_NUMBER': '[]', 'COURT': ""['High Court O...",<s> [INST] You are solving the NER problem in ...
1,1 Reportable In The Supreme Court Of India Civ...,"{""CASE_NUMBER"": ""[]"", ""COURT"": ""['Supreme Cour...","{'CASE_NUMBER': '[]', 'COURT': ""['Supreme Cour...",<s> [INST] You are solving the NER problem in ...
2,R/Scr.A/9089/2017 Judgment In The High Court O...,"{""CASE_NUMBER"": ""[]"", ""COURT"": ""['High Court O...","{'CASE_NUMBER': '[]', 'COURT': ""['High Court O...",<s> [INST] You are solving the NER problem in ...
3,High Court Of Judicature For Rajasthan Bench A...,"{""CASE_NUMBER"": ""[]"", ""COURT"": ""['High Court O...","{'CASE_NUMBER': '[]', 'COURT': ""['High Court O...",<s> [INST] You are solving the NER problem in ...
4,1 In The High Court Of Judicature At Madras Da...,"{""CASE_NUMBER"": ""[]"", ""COURT"": ""['High Court O...","{'CASE_NUMBER': '[]', 'COURT': ""['High Court O...",<s> [INST] You are solving the NER problem in ...
...,...,...,...,...
1069,"Apparently, Channaraddi set up his daughters G...","{""CASE_NUMBER"": ""['O.S.No.31/2009']"", ""COURT"":...","{'CASE_NUMBER': ""['O.S.No.31/2009']"", 'COURT':...",<s> [INST] You are solving the NER problem in ...
1070,After the dismissal of the petition for annulm...,"{""CASE_NUMBER"": ""['F.C.O.P.No.41 of 2012']"", ""...","{'CASE_NUMBER': ""['F.C.O.P.No.41 of 2012']"", '...",<s> [INST] You are solving the NER problem in ...
1071,"On 12.07.2018, a letter was received from the ...","{""CASE_NUMBER"": ""['Special Case (NDPS) No.17 o...","{'CASE_NUMBER': ""['Special Case (NDPS) No.17 o...",<s> [INST] You are solving the NER problem in ...
1072,The date on which the measurements were record...,"{""CASE_NUMBER"": ""[]"", ""COURT"": ""[]"", ""DATE"": ""...","{'CASE_NUMBER': '[]', 'COURT': '[]', 'DATE': '...",<s> [INST] You are solving the NER problem in ...


In [5]:
nlp = spacy.load('en_core_web_sm')
def tokenize_and_tag(df: pd.DataFrame, categories: List[str]) -> pd.DataFrame:
    # Define tag prefixes
    B_PREFIX = 'B-'
    I_PREFIX = 'I-'
    O_TAG = 'O'

    # Prepare output data
    output_data = {'tokens': [], 'ner_tags': []}

    for _, row in df.iterrows():
        sentence = row['sentence']
        entities = row['entities_dict']
        # print(entities)

        # Tokenize the sentence
        # tokens = sentence.split()  # Simple tokenization, can be replaced with a more robust tokenizer
        doc = nlp(sentence)
        tokens = [token.text for token in doc]

        # Initialize tags as 'Outside' for each token
        tags = [O_TAG for _ in tokens]

        entities = ast.literal_eval(entities)
        # print(type(entities))

        # Update tags based on entities
        for category, entity_list in entities.items():
            entity_lista = ast.literal_eval(entity_list)
            for entity in entity_lista:
                entity_tokens = entity.split()
                # Find all occurrences of the entity in the tokens
                for i in range(len(tokens)):
                    # print(entity_tokens, tokens[i:i+len(entity_tokens)])
                    if tokens[i:i+len(entity_tokens)] == entity_tokens:
                        # Update the tags for this occurrence of the entity
                        tags[i] = B_PREFIX + category
                        for j in range(i + 1, i + len(entity_tokens)):
                            tags[j] = I_PREFIX + category

        output_data['tokens'].append(tokens)
        output_data['ner_tags'].append(tags)
        data = pd.DataFrame(output_data) 
        data['ner_tags_str'] = data['ner_tags']
        data['ner_tags'] = data['ner_tags'].apply(lambda x: list(map(label2id.get, x)))

    return data


In [6]:
train_data = tokenize_and_tag(train, categories)
dev_data = tokenize_and_tag(dev, categories)
test_data = tokenize_and_tag(test, categories)

In [8]:
train_data.to_csv("./data/roberta/train.csv", index=False)
dev_data.to_csv("./data/roberta/dev.csv", index=False)
test_data.to_csv("./data/roberta/test.csv", index=False)

In [39]:
train_data

Unnamed: 0,tokens,ner_tags,ner_tags_str
0,"[(, 7, ), On, specific, query, by, the, Bench,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ..."
1,"[He, was, also, asked, whether, Agya, <, span,...","[0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[O, O, O, O, O, B-OTHER_PERSON, O, O, O, O, O,..."
2,"[5.2, CW3, Mr, Vijay, Mishra, ,, Deputy, Manag...","[0, 0, 0, 27, 28, 0, 0, 0, 0, 13, 14, 0, 0, 0,...","[O, O, O, B-WITNESS, I-WITNESS, O, O, O, O, B-..."
3,"[The, pillion, rider, T.V., Satyanarayana, Mur...","[0, 0, 0, 15, 16, 16, 0, 0, 0, 0]","[O, O, O, B-OTHER_PERSON, I-OTHER_PERSON, I-OT..."
4,"[,, if, the, argument, of, the, learned, couns...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ..."
...,...,...,...
9890,"[1, ®, In, The, High, Court, Of, Karnataka, At...","[0, 0, 0, 0, 3, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, ...","[O, O, O, O, B-COURT, I-COURT, I-COURT, I-COUR..."
9891,"[They, had, admittedly, left, India, after, th...","[0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[O, O, O, O, B-GPE, O, O, O, O, O, O, O, O, O,..."
9892,"[Non, -, applicant, produced, witnesses, NAW, ...","[0, 0, 0, 0, 0, 0, 0, 27, 28, 0, 0, 0, 0, 0, 2...","[O, O, O, O, O, O, O, B-WITNESS, I-WITNESS, O,..."
9893,"[No, doubt, ,, civil, and, criminal, jurisdict...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, ..."


### Model Building

In [3]:
def transform_columns_to_list(df):
    for categ in df.columns:
        df[categ] = df[categ].apply(ast.literal_eval)
    return df

In [4]:
train_data = pd.read_csv("../../Data/roberta/train.csv")
dev_data = pd.read_csv("../../Data/roberta/dev.csv")
test_data = pd.read_csv("../../Data/roberta/test.csv")

In [5]:
train_data = transform_columns_to_list(train_data)
dev_data = transform_columns_to_list(dev_data)
test_data = transform_columns_to_list(test_data)

In [6]:
# Convert pandas DataFrames to Hugging Face's Dataset objects
train_dataset = Dataset.from_pandas(train_data)
dev_dataset = Dataset.from_pandas(dev_data)
test_dataset = Dataset.from_pandas(test_data)

# Create a DatasetDict
data = DatasetDict({
    'train': train_dataset,
    'validation': dev_dataset,
    'test': test_dataset
})


In [7]:
data

DatasetDict({
    train: Dataset({
        features: ['tokens', 'ner_tags', 'ner_tags_str'],
        num_rows: 9895
    })
    validation: Dataset({
        features: ['tokens', 'ner_tags', 'ner_tags_str'],
        num_rows: 1100
    })
    test: Dataset({
        features: ['tokens', 'ner_tags', 'ner_tags_str'],
        num_rows: 1074
    })
})

#### Tokenization

In [8]:
from transformers import AutoTokenizer

model_checkpoint = "xlm-roberta-base"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

In [9]:
tokenizer.is_fast

True

In [10]:
inputs = data['train'][0]['tokens']
inputs = tokenizer(inputs, is_split_into_words=True)
print(inputs.tokens())

['<s>', '▁(', '▁7', '▁)', '▁On', '▁specific', '▁que', 'ry', '▁by', '▁the', '▁Ben', 'ch', '▁about', '▁an', '▁entry', '▁of', '▁Rs', '▁', '.', '▁1,3', '1', ',', '37', ',', '500', '▁on', '▁deposit', '▁side', '▁of', '▁Hongkong', '▁Bank', '▁account', '▁of', '▁which', '▁a', '▁photo', '▁copy', '▁is', '▁appear', 'ing', '▁at', '▁p', '.', '▁40', '▁of', '▁assess', 'ee', "▁'", 's', '▁paper', '▁book', '▁', ',', '▁learned', '▁author', 'ised', '▁representativ', 'e', '▁submitted', '▁that', '▁it', '▁was', '▁related', '▁to', '▁loan', '▁from', '▁broker', '▁', ',', '▁Rahul', '▁&', '▁Co', '.', '▁on', '▁the', '▁basis', '▁of', '▁his', '▁sub', 'mission', '▁a', '▁necessary', '▁mark', '▁is', '▁put', '▁by', '▁us', '▁on', '▁that', '▁photo', '▁copy', '▁', '.', '</s>']


In [11]:
print(inputs.word_ids())

[None, 0, 1, 2, 3, 4, 5, 5, 6, 7, 8, 8, 9, 10, 11, 12, 13, 14, 14, 15, 15, 15, 15, 15, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 29, 30, 31, 31, 32, 33, 34, 34, 35, 35, 36, 37, 38, 38, 39, 40, 40, 41, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 51, 52, 53, 54, 54, 55, 56, 57, 58, 59, 60, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 72, None]


In [12]:
def align_labels_with_tokens(labels, word_ids):
  new_labels = []
  current_word=None
  for word_id in word_ids:
    if word_id != current_word:
      current_word = word_id
      label = -100 if word_id is None else labels[word_id]
      new_labels.append(label)

    elif word_id is None:
      new_labels.append(-100)

    else:
      label = labels[word_id]

      if label%2==1:
        label = label + 1
      new_labels.append(label)

  return new_labels

In [13]:
labels = data['train'][0]['ner_tags']
word_ids = inputs.word_ids()
print(labels, word_ids)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [None, 0, 1, 2, 3, 4, 5, 5, 6, 7, 8, 8, 9, 10, 11, 12, 13, 14, 14, 15, 15, 15, 15, 15, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 29, 30, 31, 31, 32, 33, 34, 34, 35, 35, 36, 37, 38, 38, 39, 40, 40, 41, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 51, 52, 53, 54, 54, 55, 56, 57, 58, 59, 60, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 72, None]


In [14]:
print(align_labels_with_tokens(labels, word_ids))

[-100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13, 14, 14, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -100]


In [15]:
def tokenize_and_align_labels(examples):
  tokenized_inputs = tokenizer(examples['tokens'], truncation=True, is_split_into_words=True)

  all_labels = examples['ner_tags']

  new_labels = []
  for i, labels in enumerate(all_labels):
    word_ids = tokenized_inputs.word_ids(i)
    new_labels.append(align_labels_with_tokens(labels, word_ids))

  tokenized_inputs['labels'] = new_labels

  return tokenized_inputs

In [16]:
tokenized_datasets = data.map(tokenize_and_align_labels, batched=True, remove_columns=data['train'].column_names)

Map: 100%|██████████| 9895/9895 [00:00<00:00, 13007.66 examples/s]
Map: 100%|██████████| 1100/1100 [00:00<00:00, 12734.78 examples/s]
Map: 100%|██████████| 1074/1074 [00:00<00:00, 13031.21 examples/s]


In [17]:
tokenized_datasets

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 9895
    })
    validation: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 1100
    })
    test: Dataset({
        features: ['input_ids', 'attention_mask', 'labels'],
        num_rows: 1074
    })
})

#### Data collation and Metrics

In [18]:
from transformers import DataCollatorForTokenClassification

data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)

In [19]:
batch = data_collator([tokenized_datasets['train'][i] for i in range(2)])
print(batch)

You're using a XLMRobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


{'input_ids': tensor([[     0,     15,    361,   1388,   2161,  29458,     41,   1294,    390,
             70,   3419,    206,   1672,    142,  42805,    111, 115034,      6,
              5,  46963,    418,      4,  10945,      4,   4283,     98,  40370,
           5609,    111, 185934,   4932,  15426,    111,   3129,     10,  16186,
          43658,     83, 108975,    214,     99,    915,      5,   1112,    111,
         202120,   7039,    242,      7,  15122,  12877,      6,      4,  97384,
          42179,  52021,  99638,     13, 230121,    450,    442,    509,  62548,
             47, 111628,   1295, 115835,      6,      4, 191367,    619,   1311,
              5,     98,     70,  18231,    111,   1919,   1614,  21150,     10,
          63559,  16188,     83,   3884,    390,   1821,     98,    450,  16186,
          43658,      6,      5,      2],
        [     0,   1529,    509,   2843,  37170,  36766,  12342,    395,   4426,
          27734,  18507,  22422,  15080,    555,    4

#### Metrics

In [20]:
import evaluate
from seqeval.scheme import IOB2

metric = evaluate.load('seqeval')

In [21]:
metric

EvaluationModule(name: "seqeval", module_type: "metric", features: {'predictions': Sequence(feature=Value(dtype='string', id='label'), length=-1, id='sequence'), 'references': Sequence(feature=Value(dtype='string', id='label'), length=-1, id='sequence')}, usage: """
Produces labelling scores along with its sufficient statistics
from a source against one or more references.

Args:
    predictions: List of List of predicted labels (Estimated targets as returned by a tagger)
    references: List of List of reference labels (Ground truth (correct) target values)
    suffix: True if the IOB prefix is after type, False otherwise. default: False
    scheme: Specify target tagging scheme. Should be one of ["IOB1", "IOB2", "IOE1", "IOE2", "IOBES", "BILOU"].
        default: None
    mode: Whether to count correct entity labels with incorrect I/B tags as true positives or not.
        If you want to only count exact matches, pass mode="strict". default: None.
    sample_weight: Array-like of sha

In [22]:
import numpy as np

def compute_metrics(eval_preds):
  logits, labels = eval_preds

  predictions = np.argmax(logits, axis=-1)

  true_labels = [[id2label[l] for l in label if l!=-100] for label in labels]

  true_predictions = [[id2label[p] for p,l in zip(prediction, label) if l!=-100]
                      for prediction, label in zip(predictions, labels)]

  all_metrics = metric.compute(predictions=true_predictions, references=true_labels, scheme="IOB2", mode="strict", zero_division=0)

  return {"precision": all_metrics['overall_precision'],
          "recall": all_metrics['overall_recall'],
          "f1": all_metrics['overall_f1'],
          "accuracy": all_metrics['overall_accuracy']}

### Model training

In [23]:
from transformers import AutoModelForTokenClassification, TrainingArguments, Trainer

In [24]:
#model = AutoModelForTokenClassification.from_pretrained(
#                                                     model_checkpoint,
#                                                     id2label=id2label,
#                                                     label2id=label2id)
model = AutoModelForTokenClassification.from_pretrained(
                                                    "/media/axelrom16/Axel/Master/3rd_Semester/HLE/LegalNER/Models/xlm-roberta-base/checkpoint-61850/",
                                                    id2label=id2label,
                                                    label2id=label2id)

In [25]:
model.config.num_labels

29

In [27]:
args = TrainingArguments("xml-roberta-legal-ner",
                         evaluation_strategy = "epoch",
                         save_strategy="epoch",
                         learning_rate = 2e-5,
                         num_train_epochs=3,
                         weight_decay=0.01)

In [28]:
trainer = Trainer(model=model,
                  args=args,
                  train_dataset = tokenized_datasets['train'],
                  eval_dataset = tokenized_datasets['validation'],
                  data_collator=data_collator,
                  compute_metrics=compute_metrics,
                  tokenizer=tokenizer)

In [27]:
trainer.train()

 14%|█▎        | 501/3711 [02:24<17:17,  3.09it/s]

{'loss': 0.3888, 'learning_rate': 1.7305308542171922e-05, 'epoch': 0.4}


 27%|██▋       | 1000/3711 [04:45<15:11,  2.97it/s]

{'loss': 0.1448, 'learning_rate': 1.4610617084343843e-05, 'epoch': 0.81}


                                                   
 33%|███▎      | 1237/3711 [06:07<09:39,  4.27it/s]

{'eval_loss': 0.09700857847929001, 'eval_precision': 0.7925084175084175, 'eval_recall': 0.8144463667820069, 'eval_f1': 0.8033276450511946, 'eval_accuracy': 0.972731768196482, 'eval_runtime': 10.1166, 'eval_samples_per_second': 108.732, 'eval_steps_per_second': 13.641, 'epoch': 1.0}


 40%|████      | 1501/3711 [07:28<10:02,  3.67it/s]  

{'loss': 0.0999, 'learning_rate': 1.1915925626515765e-05, 'epoch': 1.21}


 54%|█████▍    | 2001/3711 [10:04<08:03,  3.54it/s]

{'loss': 0.0768, 'learning_rate': 9.221234168687686e-06, 'epoch': 1.62}


                                                   
 67%|██████▋   | 2474/3711 [12:44<06:18,  3.27it/s]

{'eval_loss': 0.07688409090042114, 'eval_precision': 0.8522483940042827, 'eval_recall': 0.860726643598616, 'eval_f1': 0.8564665375511082, 'eval_accuracy': 0.9797607554100831, 'eval_runtime': 10.6256, 'eval_samples_per_second': 103.524, 'eval_steps_per_second': 12.988, 'epoch': 2.0}


 67%|██████▋   | 2501/3711 [12:57<06:25,  3.14it/s]  

{'loss': 0.0704, 'learning_rate': 6.5265427108596065e-06, 'epoch': 2.02}


 81%|████████  | 3001/3711 [15:34<02:32,  4.65it/s]

{'loss': 0.052, 'learning_rate': 3.831851253031528e-06, 'epoch': 2.43}


 94%|█████████▍| 3501/3711 [18:00<00:45,  4.57it/s]

{'loss': 0.0496, 'learning_rate': 1.1371597952034493e-06, 'epoch': 2.83}


                                                   
100%|██████████| 3711/3711 [19:11<00:00,  5.58it/s]

{'eval_loss': 0.07549183070659637, 'eval_precision': 0.8709953011533532, 'eval_recall': 0.8819204152249135, 'eval_f1': 0.8764238125940254, 'eval_accuracy': 0.981609249534933, 'eval_runtime': 10.1082, 'eval_samples_per_second': 108.823, 'eval_steps_per_second': 13.652, 'epoch': 3.0}


100%|██████████| 3711/3711 [19:16<00:00,  3.21it/s]

{'train_runtime': 1156.0786, 'train_samples_per_second': 25.677, 'train_steps_per_second': 3.21, 'train_loss': 0.12144086046766252, 'epoch': 3.0}





TrainOutput(global_step=3711, training_loss=0.12144086046766252, metrics={'train_runtime': 1156.0786, 'train_samples_per_second': 25.677, 'train_steps_per_second': 3.21, 'train_loss': 0.12144086046766252, 'epoch': 3.0})

In [30]:
trainer.evaluate()

100%|██████████| 138/138 [00:09<00:00, 14.31it/s]


{'eval_loss': 0.07905382663011551,
 'eval_precision': 0.8680942184154176,
 'eval_recall': 0.8767301038062284,
 'eval_f1': 0.8723907897568324,
 'eval_accuracy': 0.9800668754562366,
 'eval_runtime': 9.9979,
 'eval_samples_per_second': 110.023,
 'eval_steps_per_second': 13.803}

## Inference

In [38]:
predictions, labels, _ = trainer.predict(tokenized_datasets["test"])
predictions = np.argmax(predictions, axis=2)

100%|██████████| 135/135 [00:06<00:00, 22.30it/s]


In [39]:
label_list = list(label2id.keys())
print(len(label_list))

29


In [40]:
true_predictions = [
    [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
    for prediction, label in zip(predictions, labels)
]
true_labels = [
    [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
    for prediction, label in zip(predictions, labels)
]

In [41]:
results = metric.compute(predictions=true_predictions, references=true_labels)
results

{'CASE_NUMBER': {'precision': 0.7846153846153846,
  'recall': 0.85,
  'f1': 0.816,
  'number': 60},
 'COURT': {'precision': 0.8961538461538462,
  'recall': 0.9031007751937985,
  'f1': 0.8996138996138996,
  'number': 258},
 'DATE': {'precision': 0.9545454545454546,
  'recall': 1.0,
  'f1': 0.9767441860465117,
  'number': 168},
 'GPE': {'precision': 0.7589743589743589,
  'recall': 0.8222222222222222,
  'f1': 0.7893333333333332,
  'number': 180},
 'JUDGE': {'precision': 0.8525641025641025,
  'recall': 0.95,
  'f1': 0.8986486486486486,
  'number': 140},
 'LAWYER': {'precision': 0.823170731707317,
  'recall': 0.9854014598540146,
  'f1': 0.8970099667774087,
  'number': 411},
 'ORG': {'precision': 0.6871165644171779,
  'recall': 0.8421052631578947,
  'f1': 0.7567567567567567,
  'number': 133},
 'OTHER_PERSON': {'precision': 0.88,
  'recall': 0.9272030651340997,
  'f1': 0.9029850746268656,
  'number': 261},
 'PETITIONER': {'precision': 0.7555555555555555,
  'recall': 0.912751677852349,
  'f1':

In [42]:
# Convert any int64 values to Python native integers for JSON serialization
def convert_int64(obj):
    if isinstance(obj, dict):
        for key, value in obj.items():
            obj[key] = convert_int64(value)
    elif isinstance(obj, (list, tuple)):
        obj = [convert_int64(item) for item in obj]
    elif isinstance(obj, np.int64):
        obj = int(obj)
    return obj

# Applying the conversion to the results dictionary
convertible_results = convert_int64(results)

In [43]:
# Generation of json file 
json_structure = {
    "data": {
        "text/plain": [convertible_results]
    }
}

# Defining the file path
file_path = '/media/axelrom16/Axel/Master/3rd_Semester/HLE/LegalNER/results/xlm_roberta_test.json'

# Writing the data to a JSON file
with open(file_path, 'w') as file:
    json.dump(json_structure, file, indent=4)