#Setup

In [1]:
!pip install accelerate
!pip install transformers[torch]



In [2]:
!pip3 install transformers
!pip3 install datasets
!pip3 install sentencepiece
!pip3 install seqeval

Collecting seqeval
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m642.9 kB/s[0m eta [36m0:00:00[0m [36m0:00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: seqeval
  Building wheel for seqeval (setup.py) ... [?25ldone
[?25h  Created wheel for seqeval: filename=seqeval-1.2.2-py3-none-any.whl size=16162 sha256=44357c1d57fd33ebabccfe7326d1711efdc42bea923e1c28b98a92f931fe97d7
  Stored in directory: /root/.cache/pip/wheels/1a/67/4a/ad4082dd7dfc30f2abfe4d80a2ed5926a506eb8a972b4767fa
Successfully built seqeval
Installing collected packages: seqeval
Successfully installed seqeval-1.2.2


# Running the IndicNER Model

Let's try annotating some Indian language sentences and get the named entities

In [6]:
# from google.colab import drive
# drive.mount('/content/drive')

In [4]:
def get_predictions( sentence, tokenizer, model ):
  # Let us first tokenize the sentence - split words into subwords
  tok_sentence = tokenizer(sentence, return_tensors='pt')

  with torch.no_grad():
    # we will send the tokenized sentence to the model to get predictions
    logits = model(**tok_sentence).logits.argmax(-1)

    # We will map the maximum predicted class id with the class label
    predicted_tokens_classes = [model.config.id2label[t.item()] for t in logits[0]]

    predicted_labels = []

    previous_token_id = 0
    # we need to assign the named entity label to the head word and not the following sub-words
    word_ids = tok_sentence.word_ids()
    for word_index in range(len(word_ids)):
        if word_ids[word_index] == None:
            previous_token_id = word_ids[word_index]
        elif word_ids[word_index] == previous_token_id:
            previous_token_id = word_ids[word_index]
        else:
            predicted_labels.append( predicted_tokens_classes[ word_index ] )
            previous_token_id = word_ids[word_index]

    return predicted_labels

#Naampadam Dataset

The _Naampadam_ Dataset is a large dataset for Named Entity Recognition in 11 Indian languages.  _Naampadam_ means "named entity" in Sanskrit.

In [5]:
# Let's download the Naampadam (Indic NER) dataset
from datasets import ClassLabel, load_dataset, load_metric, DownloadMode

lang='hi'

raw_datasets = load_dataset('ai4bharat/naamapadam', lang)

Downloading builder script:   0%|          | 0.00/2.86k [00:00<?, ?B/s]

Downloading and preparing dataset naamapadam_pr/hi to /root/.cache/huggingface/datasets/ai4bharat___naamapadam_pr/hi/1.0.0/99b5ec77eabfaa3fbff510d8cf70d7c34519486cb7dbee99ede19474ddff9b20...


Downloading data:   0%|          | 0.00/82.3M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Dataset naamapadam_pr downloaded and prepared to /root/.cache/huggingface/datasets/ai4bharat___naamapadam_pr/hi/1.0.0/99b5ec77eabfaa3fbff510d8cf70d7c34519486cb7dbee99ede19474ddff9b20. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

In [7]:
# let's now print how the Dataset looks like
raw_datasets

DatasetDict({
    train: Dataset({
        features: ['tokens', 'ner_tags'],
        num_rows: 985787
    })
    test: Dataset({
        features: ['tokens', 'ner_tags'],
        num_rows: 867
    })
    validation: Dataset({
        features: ['tokens', 'ner_tags'],
        num_rows: 13460
    })
})

In [8]:
raw_datasets.column_names

{'train': ['tokens', 'ner_tags'],
 'test': ['tokens', 'ner_tags'],
 'validation': ['tokens', 'ner_tags']}

In [9]:
# # let's print an instance of dataset
# idx=1000
# rec=raw_datasets['train'][idx]
# for w, t in zip(rec['tokens'],rec['ner_tags']):
#   print('{}\t{}'.format(w,t))


In [10]:
from datasets import Dataset
reduced_train_data = raw_datasets['train'].select(range(20000))

In [11]:
column_names = reduced_train_data.column_names
print(column_names)

features = reduced_train_data.features
print(features)

['tokens', 'ner_tags']
{'tokens': Sequence(feature=Value(dtype='string', id=None), length=-1, id=None), 'ner_tags': Sequence(feature=ClassLabel(num_classes=7, names=['O', 'B-PER', 'I-PER', 'B-ORG', 'I-ORG', 'B-LOC', 'I-LOC'], id=None), length=-1, id=None)}


In [12]:
text_column_name = "tokens"
label_column_name = "ner_tags"

In [13]:
# If the labels are of type ClassLabel, they are already integers and we have the map stored somewhere.

label_list = features[label_column_name].feature.names

label_to_id = {label_list[i]: features[label_column_name].feature.str2int( label_list[i] ) for i in range(len(label_list))}

print(label_to_id)

num_labels = len(label_list)


{'O': 0, 'B-PER': 1, 'I-PER': 2, 'B-ORG': 3, 'I-ORG': 4, 'B-LOC': 5, 'I-LOC': 6}


# Training an NER Model with the dataset

We have already seen how to get predictions from fine-tuned NER model. We will now use the pre-trained IndicBERT model and fine-tune it for NER task.

Let us download a pre-trained model and fine-tune it for the task of NER. We will have to use the `AutoModelForTokenClassification` class to fine-tune the model

**Load Pre-trained Model**

In [16]:
from transformers import AutoModelForTokenClassification, AutoConfig, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForTokenClassification, EarlyStoppingCallback, IntervalStrategy
import numpy as np

config = AutoConfig.from_pretrained('ai4bharat/IndicNER', num_labels=num_labels, finetuning_task='ner')
tokenizer = AutoTokenizer.from_pretrained("ai4bharat/IndicNER")
model = AutoModelForTokenClassification.from_pretrained('ai4bharat/IndicNER', num_labels=num_labels )

# Run the next cell if you want to use a GPU. Make sure that the Colab runtime is set accordingly
# model=model.to("cuda")

**Tokenize all texts and align the labels with them**

In [17]:
# Tokenize all texts and align the labels with them.
padding = "max_length"
def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(
        examples[text_column_name],
        padding=padding,
        truncation=True,
        max_length=512,
        # We use this argument because the texts in our dataset are lists of words (with a label for each word).
        is_split_into_words=True,
    )
    labels = []
    for i, label in enumerate(examples[label_column_name]):
        # print('=====')
        # print('{} {}'.format(i,label)) #ak
        word_ids = tokenized_inputs.word_ids(batch_index=i)

        previous_word_idx = None
        label_ids = []
        for word_idx in word_ids:
            # Special tokens have a word id that is None. We set the label to -100 so they are automatically
            # ignored in the loss function.
            if word_idx is None:
                label_ids.append(-100)
            # We set the label for the first token of each word.
            elif word_idx != previous_word_idx:
                label_ids.append(label[word_idx])
            # For the other tokens in a word, we set the label to either the current label or -100, depending on
            # the label_all_tokens flag.
            else:
                label_ids.append(-100)
            previous_word_idx = word_idx

        labels.append(label_ids)
    tokenized_inputs["labels"] = labels
    return tokenized_inputs

In [18]:
train_dataset = reduced_train_data
train_dataset = train_dataset.map(
    tokenize_and_align_labels,
    batched=True,
    num_proc=4,
    load_from_cache_file=True,
    desc="Running tokenizer on train dataset",
)

        

Running tokenizer on train dataset #0:   0%|          | 0/5 [00:00<?, ?ba/s]

Running tokenizer on train dataset #1:   0%|          | 0/5 [00:00<?, ?ba/s]

Running tokenizer on train dataset #3:   0%|          | 0/5 [00:00<?, ?ba/s]

Running tokenizer on train dataset #2:   0%|          | 0/5 [00:00<?, ?ba/s]

In [19]:
eval_dataset = raw_datasets["validation"]
eval_dataset = eval_dataset.map(
    tokenize_and_align_labels,
    batched=True,
    num_proc=4,
    load_from_cache_file=True,
    desc="Running tokenizer on Validation dataset",
)

       

Running tokenizer on Validation dataset #0:   0%|          | 0/4 [00:00<?, ?ba/s]

 

Running tokenizer on Validation dataset #1:   0%|          | 0/4 [00:00<?, ?ba/s]

Running tokenizer on Validation dataset #2:   0%|          | 0/4 [00:00<?, ?ba/s]

Running tokenizer on Validation dataset #3:   0%|          | 0/4 [00:00<?, ?ba/s]

**Create Data Collator, Metrics**

In [20]:
data_collator = DataCollatorForTokenClassification(tokenizer)

In [21]:
# Metrics
metric = load_metric("seqeval")

def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)

    # Remove ignored index (special tokens)
    true_predictions = [
        [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    true_labels = [
        [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]

    results = metric.compute(predictions=true_predictions, references=true_labels)
    # Unpack nested dictionaries
    final_results = {}
    for key, value in results.items():
        if isinstance(value, dict):
            for n, v in value.items():
                final_results[f"{key}_{n}"] = v
        else:
            final_results[key] = value
    return final_results

Downloading builder script:   0%|          | 0.00/2.47k [00:00<?, ?B/s]

**Set Training Arguments**

In [22]:
!pip install accelerate
!pip install transformers[torch]



In [20]:
# args=TrainingArguments(output_dir='output_dir',max_steps=5)

args=TrainingArguments(
    output_dir='/content/drive/MyDrive/ModelNER',
    per_device_train_batch_size=16,
    # metric_for_best_model = "f1",
    # evaluation_strategy = "steps",
    # load_best_model_at_end = True,
    per_device_eval_batch_size=8,
    num_train_epochs = 3,
    save_steps = 1875,
    save_safetensors = False

    )


**Training**

In [21]:
# Initialize our Trainer
# early_stopping_callback = EarlyStoppingCallback(early_stopping_patience=2)
# args.metric_for_best_model = "f1"
# args.load_best_model_at_end = True
# args.evaluation_strategy = IntervalStrategy.STEPS
# args.eval_steps = args.save_steps
# args.greater_is_better = True

trainer = Trainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    # callbacks=[early_stopping_callback],
    args=args,
)


dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)



In [22]:
trainer.args

TrainingArguments(
_n_gpu=0,
accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True},
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_persistent_workers=False,
dataloader_pin_memory=True,
dataloader_prefetch_factor=None,
ddp_backend=None,
ddp_broadcast_buffers=None,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
dispatch_batches=None,
do_eval=False,
do_predict=False,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_steps=None,
evaluation_strategy=no,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
fsdp=[],
fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},
fsdp_min_num_params=0,
fsdp_transformer_la

In [22]:
train_result = trainer.train()
metrics = train_result.metrics

Step,Training Loss
500,0.3262
1000,0.1541
1500,0.1331
2000,0.1096
2500,0.1023
3000,0.073
3500,0.074




---
**Getting the predicted Values**

In [24]:
true_tags = raw_datasets['test']['ner_tags']

In [26]:
def get_predictions( sentence, tokenizer, model ):
  # Let us first tokenize the sentence - split words into subwords
  tok_sentence = tokenizer(sentence, return_tensors='pt',padding=True,truncation=True,max_length=128)

  with torch.no_grad():
    # we will send the tokenized sentence to the model to get predictions
    logits = model(**tok_sentence).logits.argmax(-1)

    # We will map the maximum predicted class id with the class label
    predicted_tokens_classes = [model.config.id2label[t.item()] for t in logits[0]]

    predicted_labels = []

    previous_token_id = 0
    # we need to assign the named entity label to the head word and not the following sub-words
    word_ids = tok_sentence.word_ids()
    for word_index in range(len(word_ids)):
        if word_ids[word_index] == None:
            previous_token_id = word_ids[word_index]
        elif word_ids[word_index] == previous_token_id:
            previous_token_id = word_ids[word_index]
        else:
            predicted_labels.append( predicted_tokens_classes[ word_index ] )
            previous_token_id = word_ids[word_index]

    return predicted_labels

In [27]:
def remove_imbalance_sentences(true_tags,predicted_tags):
      true_balance_tags = []
      predicted_balance_tags = []
      print(type(predicted_tags), type(true_tags))
      for i in range(len(true_tags)):
          if(len(true_tags[i])==len(predicted_tags[i])):
                true_balance_tags.append(true_tags[i])
                predicted_balance_tags.append(predicted_tags[i])
      return true_balance_tags,predicted_balance_tags




In [28]:
check = None

In [29]:
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score

def calculate_scores(true_tags, predicted_tags):
    # remove tags having  different 
    
#     global check
    true_tags,predicted_tags = remove_imbalance_sentences(true_tags,predicted_tags)

    true_tags_flat = np.concatenate(true_tags)
    predicted_tags_flat = np.concatenate(predicted_tags)

    # Find unique classes
    classes = np.unique(true_tags_flat)

    # Initialize lists to store precision, recall, and F1 scores for each class
    precisions = []
    recalls = []
    f1_scores = []
    
#     check = (true_tags_flat, predicted_tags_flat)
    predicted_tags_flat = [label_to_tags[int_id_to_label[ele]] for ele in predicted_tags_flat]

    # Calculate precision, recall, and F1 score for each class
    for class_id in classes:
#         print(np.array(predicted_tags_flat), class_id)
        true_class = np.array(true_tags_flat) == class_id
        pred_class = np.array(predicted_tags_flat) == class_id

        precision = precision_score(true_class, pred_class, zero_division=1)
        recall = recall_score(true_class, pred_class, zero_division=1)
        f1 = f1_score(true_class, pred_class, zero_division=1)

        # Append scores to respective lists
        precisions.append(precision)
        recalls.append(recall)
        f1_scores.append(f1)

#         print(f"Class {label_to_tags[int_id_to_label[class_id]]}:")
        print(f"Class {class_id}:")
        print(f"  Precision: {precision}")
        print(f"  Recall: {recall}")
        print(f"  F1 Score: {f1}")

    # Calculate macro F1 score
    macro_f1 = sum(f1_scores) / len(f1_scores)

    print("----------------------------")
    print(f"Macro F1 Score: {macro_f1}")

In [30]:
last_part = "/kaggle/input/indicner/pytorch/indicner2/1"

In [31]:
import torch
def  prediction_test(path,data):
      model,tokenizer  =  load_model(path)
      predicted_tags = []
      for input_text in data:
          sentence = " ".join(input_text)
          prediction = get_predictions(sentence=sentence,
                                    tokenizer=tokenizer,
                                    model=model
                                    )
         
          last_part = path.split('/')[-1]
          if last_part != "my_model_filtrered_Indic_NEr":
              predicted_tags.append([label_to_int_id[tag] for tag in prediction])
          else:
              print("here")
              new_prediction = []
              for x in prediction:
                  new_prediction.append(x)
              # print(type(new_prediction))

              predicted_tags.append(list(label_to_int_id[label_to_tags_swapped[tag]] for tag in new_prediction))

      return predicted_tags

In [32]:
label_to_tags = {'LABEL_0':'O', 'LABEL_1':'B-PER', 'LABEL_2':'I-PER', 'LABEL_3':'B-ORG', 'LABEL_4':'I-ORG', 'LABEL_5':'B-LOC', 'LABEL_6':'I-LOC', 'LABEL_7':'B-MISC', 'LABEL_8':'I-MISC'}
label_to_int_id = {'LABEL_0':0, 'LABEL_1':1, 'LABEL_2':2, 'LABEL_3':3, 'LABEL_4':4, 'LABEL_5':5, 'LABEL_6':6, 'LABEL_7':7, 'LABEL_8':8}
int_id_to_label = {0: 'LABEL_0', 1: 'LABEL_1', 2: 'LABEL_2', 3: 'LABEL_3', 4: 'LABEL_4', 5: 'LABEL_5', 6: 'LABEL_6', 7: 'LABEL_7', 8: 'LABEL_8'}
label_to_tags_swapped = {value: key for key, value in label_to_tags.items()}
# label_to_tags_swapped


In [34]:
Indic_NER = "/kaggle/input/indicner/pytorch/indicner2/1"

all_models_path = {0:Indic_NER
            }

all_models_names = {0:"Indic_NER"
              }


In [35]:
# load saved fine-tuned model
import numpy as np
from transformers import AutoModelForTokenClassification, AutoConfig, AutoTokenizer, TrainingArguments, Trainer, DataCollatorForTokenClassification, EarlyStoppingCallback, IntervalStrategy
def load_model(path):
    tokenizer = AutoTokenizer.from_pretrained(path)
    model = AutoModelForTokenClassification.from_pretrained(path)
    return model,tokenizer


In [36]:
def each_model_score(data,true_tags_1):
    for path in all_models_path.values():
#         print(f"\n\nModel : {str(last_part)}\n")
        predicted_tags = prediction_test(path,data)
        if last_part!="my_model_filtrered_Indic_NEr":
            calculate_scores(true_tags_1,predicted_tags)
#         return predicted_tags

In [37]:
test_data = raw_datasets['test']['tokens']
test_true_tags = raw_datasets['test']['ner_tags']
len(test_data) , len(test_true_tags)

(867, 867)

In [None]:
each_model_score(ground_truth_1_path['train']['tokens'],ground_truth_1_path['train']['ner_tags'])
# print(ground_truth_1_path)

**Manually Annotated sentences**

In [40]:
import numpy as np
from datasets import load_dataset, ClassLabel, load_metric, DownloadMode


# Path to your JSON file in Google Drive
file_path = '/kaggle/input/ground-truth-path/Ground Truth Value of Ques 1.json'

# Load dataset from the local JSON file
ground_truth_1_path = load_dataset('json', data_files=file_path)

# Print the information about the loaded dataset
print(ground_truth_1_path)

# import json
# # Load the JSON file
# with open('/content/Ground Truth Value of Ques 1.json', 'r') as file:
#     data = json.load(file)
# ground_truth_1 = []
# for x in data:
#   ground_truth_1.append(x['ner'])

# print(ground_truth_1)

Downloading and preparing dataset json/default to /root/.cache/huggingface/datasets/json/default-43751b87d5a5a8b0/0.0.0/ac0ca5f5289a6cf108e706efcf040422dbbfa8e658dee6a819f20d76bb84d26b...


Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Dataset json downloaded and prepared to /root/.cache/huggingface/datasets/json/default-43751b87d5a5a8b0/0.0.0/ac0ca5f5289a6cf108e706efcf040422dbbfa8e658dee6a819f20d76bb84d26b. Subsequent calls will reuse this data.


  0%|          | 0/1 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['tokens', 'ner_tags'],
        num_rows: 25
    })
})


In [41]:
true_tags_1 = ground_truth_1_path['train']['ner_tags']

In [42]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
model_path = "/kaggle/input/indicner/pytorch/indicner2/1"
model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /kaggle/input/indicner/pytorch/indicner2/1 and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [43]:
train_dataset = ground_truth_1_path
train_dataset = train_dataset.map(
    batched=True,
    num_proc=4,
    load_from_cache_file=True,
    desc="Running tokenizer on train dataset",
)

        

Running tokenizer on train dataset #0:   0%|          | 0/1 [00:00<?, ?ba/s]

Running tokenizer on train dataset #1:   0%|          | 0/1 [00:00<?, ?ba/s]

Running tokenizer on train dataset #2:   0%|          | 0/1 [00:00<?, ?ba/s]

Running tokenizer on train dataset #3:   0%|          | 0/1 [00:00<?, ?ba/s]

In [None]:
each_model_score(ground_truth_1_path['train']['tokens'],ground_truth_1_path['train']['ner_tags'])
# print(ground_truth_1_path)

**This is for the ChatGPT's response**

In [45]:
import numpy as np
from datasets import load_dataset, ClassLabel, load_metric, DownloadMode


# Path to your JSON file in Google Drive
file_path = '/kaggle/input/ground-truth-path/Ground Truth Value of Ques 3.json'

# Load dataset from the local JSON file
ground_truth_2_path = load_dataset('json', data_files=file_path)

# Print the information about the loaded dataset
print(ground_truth_2_path)

# import json
# # Load the JSON file
# with open('/content/Ground Truth Value of Ques 3.json', 'r') as file:
#     data = json.load(file)
# ground_truth_2 = []
# for x in data:
#   ground_truth_2.append(x['ner'])

# print(ground_truth_2)


Downloading and preparing dataset json/default to /root/.cache/huggingface/datasets/json/default-5a0e1f2afe4232db/0.0.0/ac0ca5f5289a6cf108e706efcf040422dbbfa8e658dee6a819f20d76bb84d26b...


Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Dataset json downloaded and prepared to /root/.cache/huggingface/datasets/json/default-5a0e1f2afe4232db/0.0.0/ac0ca5f5289a6cf108e706efcf040422dbbfa8e658dee6a819f20d76bb84d26b. Subsequent calls will reuse this data.


  0%|          | 0/1 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['tokens', 'ner_tags'],
        num_rows: 25
    })
})


In [46]:
true_tags_2 = ground_truth_2_path['train']['ner_tags']
# print(true_tags_2)

In [47]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
model_path = "/kaggle/input/indicner/pytorch/indicner2/1"
model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at /kaggle/input/indicner/pytorch/indicner2/1 and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [48]:
train_dataset = ground_truth_2_path
train_dataset = train_dataset.map(
    batched=True,
    num_proc=4,
    load_from_cache_file=True,
    desc="Running tokenizer on train dataset",
)

        

Running tokenizer on train dataset #0:   0%|          | 0/1 [00:00<?, ?ba/s]

Running tokenizer on train dataset #1:   0%|          | 0/1 [00:00<?, ?ba/s]

Running tokenizer on train dataset #3:   0%|          | 0/1 [00:00<?, ?ba/s]

Running tokenizer on train dataset #2:   0%|          | 0/1 [00:00<?, ?ba/s]

In [None]:
each_model_score(ground_truth_2_path['train']['tokens'],ground_truth_1_path['train']['ner_tags'])
