In [None]:
# Cell 1: Mount Google Drive to access data files
from google.colab import drive
drive.mount('/content/drive', force_remount=True)


Mounted at /content/drive


In [None]:

!pip install transformers[torch]
!pip install datasets
!pip install tqdm
!pip install seqeval


Collecting transformers[torch]
  Downloading transformers-4.32.0-py3-none-any.whl (7.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.5/7.5 MB[0m [31m28.2 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.15.1 (from transformers[torch])
  Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m30.1 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers[torch])
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m87.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers[torch])
  Downloading safetensors-0.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m7

In [None]:

# Cell 1: Import necessary libraries
import warnings
warnings.filterwarnings("ignore", category=FutureWarning, module="transformers")
from sklearn.exceptions import UndefinedMetricWarning
warnings.filterwarnings("ignore", category=UndefinedMetricWarning, module="seqeval")
from transformers import  Trainer, TrainingArguments
from datasets import Dataset
import numpy as np
import torch
from seqeval.metrics import precision_score, recall_score, f1_score, classification_report
import pickle
import os
from torch.utils.data import DataLoader




In [None]:
# Label Grouping dictionary
labels = {
    'B-Disease': 'B-Medical',
    'I-Disease': 'I-Medical',
    'B-Symptom': 'B-Medical',
    'I-Symptom': 'I-Medical',
    'B-AnatomicalStructure': 'B-Medical',
    'I-AnatomicalStructure': 'I-Medical',
    'B-MedicalProcedure': 'B-Medical',
    'I-MedicalProcedure': 'I-Medical',
    'B-Medication/Vaccine': 'B-Medical',
    'I-Medication/Vaccine': 'I-Medical',

    'B-OtherPROD': 'B-Product',
    'I-OtherPROD': 'I-Product',
    'B-Drink': 'B-Product',
    'I-Drink': 'I-Product',
    'B-Food': 'B-Product',
    'I-Food': 'I-Product',
    'B-Vehicle': 'B-Product',
    'I-Vehicle': 'I-Product',
    'B-Clothing': 'B-Product',
    'I-Clothing': 'I-Product',

    'B-OtherPER': 'B-Person',
    'I-OtherPER': 'I-Person',
    'B-SportsManager': 'B-Person',
    'I-SportsManager': 'I-Person',
    'B-Cleric': 'B-Person',
    'I-Cleric': 'I-Person',
    'B-Politician': 'B-Person',
    'I-Politician': 'I-Person',
    'B-Athlete': 'B-Person',
    'I-Athlete': 'I-Person',
    'B-Artist': 'B-Person',
    'I-Artist': 'I-Person',
    'B-Scientist': 'B-Person',
    'I-Scientist': 'I-Person',

    'B-MusicalGRP': 'B-Group',
    'I-MusicalGRP': 'I-Group',
    'B-PublicCorp': 'B-Group',
    'I-PublicCorp': 'I-Group',
    'B-PrivateCorp': 'B-Group',
    'I-PrivateCorp': 'I-Group',
    'B-AerospaceManufacturer': 'B-Group',
    'I-AerospaceManufacturer': 'I-Group',
    'B-SportsGRP': 'B-Group',
    'I-SportsGRP': 'I-Group',
    'B-CarManufacturer': 'B-Group',
    'I-CarManufacturer': 'I-Group',
    'B-ORG': 'B-Group',
    'I-ORG': 'I-Group',

    'B-VisualWork': 'B-CW',
    'I-VisualWork': 'I-CW',
    'B-MusicalWork': 'B-CW',
    'I-MusicalWork': 'I-CW',
    'B-WrittenWork': 'B-CW',
    'I-WrittenWork': 'I-CW',
    'B-ArtWork': 'B-CW',
    'I-ArtWork': 'I-CW',
    'B-Software': 'B-CW',
    'I-Software': 'I-CW',

    'B-Facility': 'B-Location',
    'I-Facility': 'I-Location',
    'B-OtherLOC': 'B-Location',
    'I-OtherLOC': 'I-Location',
    'B-HumanSettlement': 'B-Location',
    'I-HumanSettlement': 'I-Location',
    'B-Station': 'B-Location',
    'I-Station': 'I-Location',

    'O': 'O'
}




def convert_to_general_label(label):
    return labels.get(label)


In [None]:

def compute_metrics(p):
    predictions = np.argmax(p.predictions, axis=2)
    true_labels = p.label_ids
    true_label_list = [[] for _ in range(true_labels.shape[0])]
    pred_label_list = [[] for _ in range(true_labels.shape[0])]

    # Convert label indices to label strings
    index_to_label = {i: label for label, i in label_to_index.items()}

    for i in range(true_labels.shape[0]):
        for j in range(true_labels.shape[1]):
            if true_labels[i, j] != -100: # Ignore padding and other special tokens
                true_label_list[i].append(index_to_label[true_labels[i, j]])
                pred_label_list[i].append(index_to_label[predictions[i, j]])

        # Convert detailed labels to general categories
        true_label_list[i] = [convert_to_general_label(label) for label in true_label_list[i]]
        pred_label_list[i] = [convert_to_general_label(label) for label in pred_label_list[i]]

    # Use seqeval for evaluation
    results = {
        "precision": precision_score(true_label_list, pred_label_list),
        "recall": recall_score(true_label_list, pred_label_list),
        "f1": f1_score(true_label_list, pred_label_list),
    }

    # If you want a more detailed report, you can use this:
    report = classification_report(true_label_list, pred_label_list)
    print(report)

    return results


In [None]:


# Define the base paths to the models, label mappings, test datasets, and prediction files on Google Drive
models_base_path = '/content/drive/MyDrive/xlm_fin_models'
label_mappings_base_path = '/content/drive/MyDrive/xlm_fin_models'
test_datasets_base_path = '/content/drive/MyDrive/tokenized_test_datasets/XLMR'
predictions_base_path = '/content/drive/MyDrive/predictions'

# Check if the predictions directory exists and create it if necessary
if not os.path.exists(predictions_base_path):
    os.makedirs(predictions_base_path)


# Create empty dictionaries to store the loaded models, label mappings, and test datasets
models = {}
label_mappings = {}
test_datasets = {}

# Define the languages for which you have saved models, label mappings, and test datasets
languages = ['EN-English', 'ES-Spanish', 'HI-Hindi', 'ZH-Chinese', 'SV-Swedish', 'FA-Farsi', 'FR-French', 'IT-Italian', 'PT-Portuguese', 'UK-Ukrainian', 'DE-German', 'BN-Bangla']

# Load the models, label mappings, and test datasets, evaluate the models on the corresponding test datasets,
# and store the predictions and label IDs for each language
for lang in languages:
    print(f'Processing language: {lang}')

    # Define the paths to the saved model, label mapping, and test dataset for this language
    model_path = os.path.join(models_base_path, lang, 'model', 'model.pkl')
    label_mapping_path = os.path.join(label_mappings_base_path, lang, 'label_mapping', 'label_mapping.pkl')
    test_dataset_path = os.path.join(test_datasets_base_path, f'XLMR_{lang}_test_dataset.pkl')

    # Load the model
    print(f'Loading model from {model_path}')
    with open(model_path, 'rb') as f:
        model = pickle.load(f)

    # Load the label mapping
    print(f'Loading label mapping from {label_mapping_path}')
    with open(label_mapping_path, 'rb') as f:
        label_to_index = pickle.load(f)

    # Load the test dataset
    print(f'Loading test dataset from {test_dataset_path}')
    with open(test_dataset_path, 'rb') as f:
        test_dataset = pickle.load(f)


    trainer = Trainer(
        model=model,
        args=TrainingArguments(
            output_dir='/content/drive/MyDrive/results',
            do_train=False,
            do_eval=True,
            per_device_eval_batch_size=16,
        ),
        train_dataset=None,
        eval_dataset=test_dataset,
        compute_metrics=compute_metrics,
    )

    # Get the predictions and label IDs for the test dataset
    print('Evaluating model on test dataset')
    predictions,label_ids,_=trainer.predict(test_dataset)

    # Define the path to the prediction file for this language
    prediction_file_path=os.path.join(predictions_base_path,f'xlmr_{lang}_predictions.pkl')

    # Store the predictions and label IDs in a file using the pickle library
    print(f'Storing predictions and label IDs in {prediction_file_path}')
    with open(prediction_file_path,'wb') as f:
        pickle.dump((predictions,label_ids),f)


Processing language: EN-English
Loading model from /content/drive/MyDrive/xlm_fin_models/EN-English/model/model.pkl
Loading label mapping from /content/drive/MyDrive/xlm_fin_models/EN-English/label_mapping/label_mapping.pkl
Loading test dataset from /content/drive/MyDrive/tokenized_test_datasets/XLMR/XLMR_EN-English_test_dataset.pkl
Evaluating model on test dataset


              precision    recall  f1-score   support

          CW       0.86      0.86      0.86     62126
       Group       0.86      0.85      0.85     60026
    Location       0.90      0.91      0.91     67901
     Medical       0.81      0.81      0.81     22491
      Person       0.96      0.96      0.96    137681
     Product       0.74      0.71      0.73     27580

   micro avg       0.89      0.89      0.89    377805
   macro avg       0.85      0.85      0.85    377805
weighted avg       0.89      0.89      0.89    377805

Storing predictions and label IDs in /content/drive/MyDrive/predictions/xlmr_EN-English_predictions.pkl
Processing language: ES-Spanish
Loading model from /content/drive/MyDrive/xlm_fin_models/ES-Spanish/model/model.pkl
Loading label mapping from /content/drive/MyDrive/xlm_fin_models/ES-Spanish/label_mapping/label_mapping.pkl
Loading test dataset from /content/drive/MyDrive/tokenized_test_datasets/XLMR/XLMR_ES-Spanish_test_dataset.pkl
Evaluating model o

              precision    recall  f1-score   support

          CW       0.87      0.87      0.87     55460
       Group       0.89      0.87      0.88     50259
    Location       0.90      0.90      0.90     72996
     Medical       0.82      0.85      0.84     23812
      Person       0.96      0.96      0.96    125379
     Product       0.78      0.78      0.78     28469

   micro avg       0.90      0.90      0.90    356375
   macro avg       0.87      0.87      0.87    356375
weighted avg       0.90      0.90      0.90    356375

Storing predictions and label IDs in /content/drive/MyDrive/predictions/xlmr_ES-Spanish_predictions.pkl
Processing language: HI-Hindi
Loading model from /content/drive/MyDrive/xlm_fin_models/HI-Hindi/model/model.pkl
Loading label mapping from /content/drive/MyDrive/xlm_fin_models/HI-Hindi/label_mapping/label_mapping.pkl
Loading test dataset from /content/drive/MyDrive/tokenized_test_datasets/XLMR/XLMR_HI-Hindi_test_dataset.pkl
Evaluating model on test d

              precision    recall  f1-score   support

          CW       0.78      0.77      0.78      2804
       Group       0.88      0.88      0.88      3897
    Location       0.91      0.92      0.91      7172
     Medical       0.84      0.83      0.83      1979
      Person       0.90      0.91      0.90      5736
     Product       0.70      0.65      0.67      1611

   micro avg       0.87      0.87      0.87     23199
   macro avg       0.83      0.83      0.83     23199
weighted avg       0.87      0.87      0.87     23199

Storing predictions and label IDs in /content/drive/MyDrive/predictions/xlmr_HI-Hindi_predictions.pkl
Processing language: ZH-Chinese
Loading model from /content/drive/MyDrive/xlm_fin_models/ZH-Chinese/model/model.pkl
Loading label mapping from /content/drive/MyDrive/xlm_fin_models/ZH-Chinese/label_mapping/label_mapping.pkl
Loading test dataset from /content/drive/MyDrive/tokenized_test_datasets/XLMR/XLMR_ZH-Chinese_test_dataset.pkl
Evaluating model on 

              precision    recall  f1-score   support

          CW       0.67      0.70      0.68      4369
       Group       0.73      0.73      0.73      4715
    Location       0.78      0.82      0.80      6170
     Medical       0.66      0.67      0.66      1781
      Person       0.86      0.89      0.88      9095
     Product       0.55      0.56      0.55      1869

   micro avg       0.76      0.78      0.77     27999
   macro avg       0.71      0.73      0.72     27999
weighted avg       0.76      0.78      0.77     27999

Storing predictions and label IDs in /content/drive/MyDrive/predictions/xlmr_ZH-Chinese_predictions.pkl
Processing language: SV-Swedish
Loading model from /content/drive/MyDrive/xlm_fin_models/SV-Swedish/model/model.pkl
Loading label mapping from /content/drive/MyDrive/xlm_fin_models/SV-Swedish/label_mapping/label_mapping.pkl
Loading test dataset from /content/drive/MyDrive/tokenized_test_datasets/XLMR/XLMR_SV-Swedish_test_dataset.pkl
Evaluating model o

              precision    recall  f1-score   support

          CW       0.89      0.89      0.89     54806
       Group       0.92      0.91      0.92     46929
    Location       0.96      0.96      0.96    111879
     Medical       0.89      0.89      0.89     13702
      Person       0.97      0.97      0.97    111157
     Product       0.86      0.85      0.86     22686

   micro avg       0.94      0.94      0.94    361159
   macro avg       0.92      0.91      0.92    361159
weighted avg       0.94      0.94      0.94    361159

Storing predictions and label IDs in /content/drive/MyDrive/predictions/xlmr_SV-Swedish_predictions.pkl
Processing language: FA-Farsi
Loading model from /content/drive/MyDrive/xlm_fin_models/FA-Farsi/model/model.pkl
Loading label mapping from /content/drive/MyDrive/xlm_fin_models/FA-Farsi/label_mapping/label_mapping.pkl
Loading test dataset from /content/drive/MyDrive/tokenized_test_datasets/XLMR/XLMR_FA-Farsi_test_dataset.pkl
Evaluating model on test d

              precision    recall  f1-score   support

          CW       0.82      0.84      0.83     53034
       Group       0.81      0.82      0.82     38807
    Location       0.81      0.84      0.83     70907
     Medical       0.75      0.79      0.77     15287
      Person       0.87      0.89      0.88    115868
     Product       0.75      0.77      0.76     18212

   micro avg       0.83      0.85      0.84    312115
   macro avg       0.80      0.83      0.81    312115
weighted avg       0.83      0.85      0.84    312115

Storing predictions and label IDs in /content/drive/MyDrive/predictions/xlmr_FA-Farsi_predictions.pkl
Processing language: FR-French
Loading model from /content/drive/MyDrive/xlm_fin_models/FR-French/model/model.pkl
Loading label mapping from /content/drive/MyDrive/xlm_fin_models/FR-French/label_mapping/label_mapping.pkl
Loading test dataset from /content/drive/MyDrive/tokenized_test_datasets/XLMR/XLMR_FR-French_test_dataset.pkl
Evaluating model on test

              precision    recall  f1-score   support

          CW       0.88      0.88      0.88     84952
       Group       0.87      0.86      0.87     52989
    Location       0.89      0.89      0.89     73373
     Medical       0.82      0.82      0.82     17208
      Person       0.96      0.96      0.96    141401
     Product       0.79      0.77      0.78     28274

   micro avg       0.90      0.90      0.90    398197
   macro avg       0.87      0.86      0.87    398197
weighted avg       0.90      0.90      0.90    398197

Storing predictions and label IDs in /content/drive/MyDrive/predictions/xlmr_FR-French_predictions.pkl
Processing language: IT-Italian
Loading model from /content/drive/MyDrive/xlm_fin_models/IT-Italian/model/model.pkl
Loading label mapping from /content/drive/MyDrive/xlm_fin_models/IT-Italian/label_mapping/label_mapping.pkl
Loading test dataset from /content/drive/MyDrive/tokenized_test_datasets/XLMR/XLMR_IT-Italian_test_dataset.pkl
Evaluating model on

              precision    recall  f1-score   support

          CW       0.93      0.93      0.93     79873
       Group       0.90      0.90      0.90     46271
    Location       0.91      0.91      0.91     68564
     Medical       0.83      0.84      0.84     19029
      Person       0.97      0.97      0.97    160598
     Product       0.80      0.79      0.79     22887

   micro avg       0.93      0.93      0.93    397222
   macro avg       0.89      0.89      0.89    397222
weighted avg       0.93      0.93      0.93    397222

Storing predictions and label IDs in /content/drive/MyDrive/predictions/xlmr_IT-Italian_predictions.pkl
Processing language: PT-Portuguese
Loading model from /content/drive/MyDrive/xlm_fin_models/PT-Portuguese/model/model.pkl
Loading label mapping from /content/drive/MyDrive/xlm_fin_models/PT-Portuguese/label_mapping/label_mapping.pkl
Loading test dataset from /content/drive/MyDrive/tokenized_test_datasets/XLMR/XLMR_PT-Portuguese_test_dataset.pkl
Evalua

              precision    recall  f1-score   support

          CW       0.90      0.89      0.89     58246
       Group       0.90      0.89      0.90     48994
    Location       0.92      0.92      0.92     70923
     Medical       0.83      0.84      0.83     21062
      Person       0.97      0.96      0.97    120413
     Product       0.82      0.81      0.81     21115

   micro avg       0.92      0.91      0.92    340753
   macro avg       0.89      0.88      0.89    340753
weighted avg       0.92      0.91      0.92    340753

Storing predictions and label IDs in /content/drive/MyDrive/predictions/xlmr_PT-Portuguese_predictions.pkl
Processing language: UK-Ukrainian
Loading model from /content/drive/MyDrive/xlm_fin_models/UK-Ukrainian/model/model.pkl
Loading label mapping from /content/drive/MyDrive/xlm_fin_models/UK-Ukrainian/label_mapping/label_mapping.pkl
Loading test dataset from /content/drive/MyDrive/tokenized_test_datasets/XLMR/XLMR_UK-Ukrainian_test_dataset.pkl
Evaluat

              precision    recall  f1-score   support

          CW       0.87      0.86      0.86     43291
       Group       0.91      0.90      0.91     39709
    Location       0.92      0.92      0.92     84643
     Medical       0.87      0.88      0.88     20796
      Person       0.97      0.96      0.97     96864
     Product       0.85      0.83      0.84     30071

   micro avg       0.92      0.91      0.92    315374
   macro avg       0.90      0.89      0.90    315374
weighted avg       0.92      0.91      0.92    315374

Storing predictions and label IDs in /content/drive/MyDrive/predictions/xlmr_UK-Ukrainian_predictions.pkl
Processing language: DE-German
Loading model from /content/drive/MyDrive/xlm_fin_models/DE-German/model/model.pkl
Loading label mapping from /content/drive/MyDrive/xlm_fin_models/DE-German/label_mapping/label_mapping.pkl
Loading test dataset from /content/drive/MyDrive/tokenized_test_datasets/XLMR/XLMR_DE-German_test_dataset.pkl
Evaluating model on 

              precision    recall  f1-score   support

          CW       0.84      0.85      0.85      4777
       Group       0.84      0.84      0.84      4418
    Location       0.88      0.91      0.89      5306
     Medical       0.81      0.83      0.82      1434
      Person       0.93      0.94      0.94     11299
     Product       0.75      0.70      0.72      1643

   micro avg       0.88      0.89      0.88     28877
   macro avg       0.84      0.85      0.84     28877
weighted avg       0.88      0.89      0.88     28877

Storing predictions and label IDs in /content/drive/MyDrive/predictions/xlmr_DE-German_predictions.pkl
Processing language: BN-Bangla
Loading model from /content/drive/MyDrive/xlm_fin_models/BN-Bangla/model/model.pkl
Loading label mapping from /content/drive/MyDrive/xlm_fin_models/BN-Bangla/label_mapping/label_mapping.pkl
Loading test dataset from /content/drive/MyDrive/tokenized_test_datasets/XLMR/XLMR_BN-Bangla_test_dataset.pkl
Evaluating model on tes

              precision    recall  f1-score   support

          CW       0.78      0.75      0.77      3640
       Group       0.88      0.84      0.86      3651
    Location       0.90      0.92      0.91      7375
     Medical       0.80      0.81      0.81      1919
      Person       0.90      0.90      0.90      6935
     Product       0.66      0.65      0.66      1493

   micro avg       0.86      0.85      0.85     25013
   macro avg       0.82      0.81      0.82     25013
weighted avg       0.86      0.85      0.85     25013

Storing predictions and label IDs in /content/drive/MyDrive/predictions/xlmr_BN-Bangla_predictions.pkl


In [None]:
# Define the base paths to the models, label mappings, test datasets, and prediction files on Google Drive
models_base_path = '/content/drive/MyDrive/mbert_fin_models'
label_mappings_base_path = '/content/drive/MyDrive/mbert_fin_models'
test_datasets_base_path = '/content/drive/MyDrive/tokenized_test_datasets/mbert'
predictions_base_path = '/content/drive/MyDrive/predictions'

# Check if the predictions directory exists and create it if necessary
if not os.path.exists(predictions_base_path):
    os.makedirs(predictions_base_path)


# Create empty dictionaries to store the loaded models, label mappings, and test datasets
models = {}
label_mappings = {}
test_datasets = {}

# Define the languages for which you have saved models, label mappings, and test datasets
languages = ['EN-English', 'ES-Spanish', 'HI-Hindi', 'ZH-Chinese', 'SV-Swedish', 'FA-Farsi', 'FR-French', 'IT-Italian', 'PT-Portuguese', 'UK-Ukrainian', 'DE-German', 'BN-Bangla']

# Load the models, label mappings, and test datasets, evaluate the models on the corresponding test datasets,
# and store the predictions and label IDs for each language
for lang in languages:
    print(f'Processing language: {lang}')

    # Define the paths to the saved model, label mapping, and test dataset for this language
    model_path = os.path.join(models_base_path, lang, 'model', 'model.pkl')
    label_mapping_path = os.path.join(label_mappings_base_path, lang, 'label_mapping', 'label_mapping.pkl')
    test_dataset_path = os.path.join(test_datasets_base_path, f'mbert_{lang}_test_dataset.pkl')

    # Load the model
    print(f'Loading model from {model_path}')
    with open(model_path, 'rb') as f:
        model = pickle.load(f)

    # Load the label mapping
    print(f'Loading label mapping from {label_mapping_path}')
    with open(label_mapping_path, 'rb') as f:
        label_to_index = pickle.load(f)

    # Load the test dataset
    print(f'Loading test dataset from {test_dataset_path}')
    with open(test_dataset_path, 'rb') as f:
        test_dataset = pickle.load(f)



    trainer = Trainer(
        model=model,
        args=TrainingArguments(
            output_dir='/content/drive/MyDrive/results',
            do_train=False,
            do_eval=True,
            per_device_eval_batch_size=16,
        ),
        train_dataset=None,
        eval_dataset=test_dataset,
        compute_metrics=compute_metrics,
    )

    # Get the predictions and label IDs for the test dataset
    print('Evaluating model on test dataset')
    predictions,label_ids,_=trainer.predict(test_dataset)

    # Define the path to the prediction file for this language
    prediction_file_path=os.path.join(predictions_base_path,f'mbert_{lang}_predictions.pkl')

    # Store the predictions and label IDs in a file using the pickle library
    print(f'Storing predictions and label IDs in {prediction_file_path}')
    with open(prediction_file_path,'wb') as f:
        pickle.dump((predictions,label_ids),f)


Processing language: EN-English
Loading model from /content/drive/MyDrive/mbert_fin_models/EN-English/model/model.pkl
Loading label mapping from /content/drive/MyDrive/mbert_fin_models/EN-English/label_mapping/label_mapping.pkl
Loading test dataset from /content/drive/MyDrive/tokenized_test_datasets/mbert/mbert_EN-English_test_dataset.pkl
Evaluating model on test dataset


              precision    recall  f1-score   support

          CW       0.89      0.89      0.89     62126
       Group       0.89      0.88      0.89     60026
    Location       0.93      0.93      0.93     67901
     Medical       0.84      0.85      0.84     22491
      Person       0.97      0.97      0.97    137681
     Product       0.79      0.76      0.77     27580

   micro avg       0.92      0.91      0.91    377805
   macro avg       0.88      0.88      0.88    377805
weighted avg       0.91      0.91      0.91    377805

Storing predictions and label IDs in /content/drive/MyDrive/predictions/mbert_EN-English_predictions.pkl
Processing language: ES-Spanish
Loading model from /content/drive/MyDrive/mbert_fin_models/ES-Spanish/model/model.pkl
Loading label mapping from /content/drive/MyDrive/mbert_fin_models/ES-Spanish/label_mapping/label_mapping.pkl
Loading test dataset from /content/drive/MyDrive/tokenized_test_datasets/mbert/mbert_ES-Spanish_test_dataset.pkl
Evaluating 

              precision    recall  f1-score   support

          CW       0.90      0.91      0.90     55460
       Group       0.92      0.91      0.91     50259
    Location       0.93      0.92      0.92     72996
     Medical       0.85      0.88      0.86     23812
      Person       0.97      0.97      0.97    125379
     Product       0.82      0.81      0.82     28469

   micro avg       0.92      0.92      0.92    356375
   macro avg       0.90      0.90      0.90    356375
weighted avg       0.92      0.92      0.92    356375

Storing predictions and label IDs in /content/drive/MyDrive/predictions/mbert_ES-Spanish_predictions.pkl
Processing language: HI-Hindi
Loading model from /content/drive/MyDrive/mbert_fin_models/HI-Hindi/model/model.pkl
Loading label mapping from /content/drive/MyDrive/mbert_fin_models/HI-Hindi/label_mapping/label_mapping.pkl
Loading test dataset from /content/drive/MyDrive/tokenized_test_datasets/mbert/mbert_HI-Hindi_test_dataset.pkl
Evaluating model on

              precision    recall  f1-score   support

          CW       0.80      0.80      0.80      2804
       Group       0.89      0.89      0.89      3897
    Location       0.92      0.93      0.92      7172
     Medical       0.85      0.86      0.85      1979
      Person       0.91      0.91      0.91      5736
     Product       0.72      0.70      0.71      1611

   micro avg       0.88      0.88      0.88     23199
   macro avg       0.85      0.85      0.85     23199
weighted avg       0.88      0.88      0.88     23199

Storing predictions and label IDs in /content/drive/MyDrive/predictions/mbert_HI-Hindi_predictions.pkl
Processing language: ZH-Chinese
Loading model from /content/drive/MyDrive/mbert_fin_models/ZH-Chinese/model/model.pkl
Loading label mapping from /content/drive/MyDrive/mbert_fin_models/ZH-Chinese/label_mapping/label_mapping.pkl
Loading test dataset from /content/drive/MyDrive/tokenized_test_datasets/mbert/mbert_ZH-Chinese_test_dataset.pkl
Evaluating mo

              precision    recall  f1-score   support

          CW       0.81      0.84      0.82      4369
       Group       0.84      0.85      0.85      4715
    Location       0.89      0.91      0.90      6170
     Medical       0.79      0.80      0.79      1781
      Person       0.93      0.94      0.94      9095
     Product       0.69      0.71      0.70      1869

   micro avg       0.86      0.88      0.87     27999
   macro avg       0.82      0.84      0.83     27999
weighted avg       0.86      0.88      0.87     27999

Storing predictions and label IDs in /content/drive/MyDrive/predictions/mbert_ZH-Chinese_predictions.pkl
Processing language: SV-Swedish
Loading model from /content/drive/MyDrive/mbert_fin_models/SV-Swedish/model/model.pkl
Loading label mapping from /content/drive/MyDrive/mbert_fin_models/SV-Swedish/label_mapping/label_mapping.pkl
Loading test dataset from /content/drive/MyDrive/tokenized_test_datasets/mbert/mbert_SV-Swedish_test_dataset.pkl
Evaluating 

              precision    recall  f1-score   support

          CW       0.91      0.91      0.91     54806
       Group       0.94      0.93      0.93     46929
    Location       0.97      0.97      0.97    111879
     Medical       0.90      0.90      0.90     13702
      Person       0.98      0.98      0.98    111157
     Product       0.88      0.87      0.87     22686

   micro avg       0.95      0.95      0.95    361159
   macro avg       0.93      0.93      0.93    361159
weighted avg       0.95      0.95      0.95    361159

Storing predictions and label IDs in /content/drive/MyDrive/predictions/mbert_SV-Swedish_predictions.pkl
Processing language: FA-Farsi
Loading model from /content/drive/MyDrive/mbert_fin_models/FA-Farsi/model/model.pkl
Loading label mapping from /content/drive/MyDrive/mbert_fin_models/FA-Farsi/label_mapping/label_mapping.pkl
Loading test dataset from /content/drive/MyDrive/tokenized_test_datasets/mbert/mbert_FA-Farsi_test_dataset.pkl
Evaluating model on

              precision    recall  f1-score   support

          CW       0.85      0.87      0.86     53034
       Group       0.84      0.86      0.85     38807
    Location       0.83      0.85      0.84     70907
     Medical       0.77      0.80      0.78     15287
      Person       0.89      0.91      0.90    115868
     Product       0.78      0.79      0.79     18212

   micro avg       0.85      0.87      0.86    312115
   macro avg       0.83      0.84      0.84    312115
weighted avg       0.85      0.87      0.86    312115

Storing predictions and label IDs in /content/drive/MyDrive/predictions/mbert_FA-Farsi_predictions.pkl
Processing language: FR-French
Loading model from /content/drive/MyDrive/mbert_fin_models/FR-French/model/model.pkl
Loading label mapping from /content/drive/MyDrive/mbert_fin_models/FR-French/label_mapping/label_mapping.pkl
Loading test dataset from /content/drive/MyDrive/tokenized_test_datasets/mbert/mbert_FR-French_test_dataset.pkl
Evaluating model 

              precision    recall  f1-score   support

          CW       0.90      0.90      0.90     84952
       Group       0.91      0.89      0.90     52989
    Location       0.92      0.91      0.91     73373
     Medical       0.85      0.86      0.85     17208
      Person       0.97      0.97      0.97    141401
     Product       0.83      0.81      0.82     28274

   micro avg       0.92      0.92      0.92    398197
   macro avg       0.90      0.89      0.89    398197
weighted avg       0.92      0.92      0.92    398197

Storing predictions and label IDs in /content/drive/MyDrive/predictions/mbert_FR-French_predictions.pkl
Processing language: IT-Italian
Loading model from /content/drive/MyDrive/mbert_fin_models/IT-Italian/model/model.pkl
Loading label mapping from /content/drive/MyDrive/mbert_fin_models/IT-Italian/label_mapping/label_mapping.pkl
Loading test dataset from /content/drive/MyDrive/tokenized_test_datasets/mbert/mbert_IT-Italian_test_dataset.pkl
Evaluating m

              precision    recall  f1-score   support

          CW       0.94      0.95      0.95     79873
       Group       0.93      0.92      0.92     46271
    Location       0.93      0.93      0.93     68564
     Medical       0.86      0.88      0.87     19029
      Person       0.98      0.98      0.98    160598
     Product       0.84      0.83      0.83     22887

   micro avg       0.94      0.94      0.94    397222
   macro avg       0.91      0.91      0.91    397222
weighted avg       0.94      0.94      0.94    397222

Storing predictions and label IDs in /content/drive/MyDrive/predictions/mbert_IT-Italian_predictions.pkl
Processing language: PT-Portuguese
Loading model from /content/drive/MyDrive/mbert_fin_models/PT-Portuguese/model/model.pkl
Loading label mapping from /content/drive/MyDrive/mbert_fin_models/PT-Portuguese/label_mapping/label_mapping.pkl
Loading test dataset from /content/drive/MyDrive/tokenized_test_datasets/mbert/mbert_PT-Portuguese_test_dataset.pkl

              precision    recall  f1-score   support

          CW       0.92      0.91      0.91     58246
       Group       0.93      0.92      0.92     48994
    Location       0.94      0.93      0.94     70923
     Medical       0.85      0.87      0.86     21062
      Person       0.97      0.97      0.97    120413
     Product       0.85      0.84      0.85     21115

   micro avg       0.94      0.93      0.93    340753
   macro avg       0.91      0.91      0.91    340753
weighted avg       0.94      0.93      0.93    340753

Storing predictions and label IDs in /content/drive/MyDrive/predictions/mbert_PT-Portuguese_predictions.pkl
Processing language: UK-Ukrainian
Loading model from /content/drive/MyDrive/mbert_fin_models/UK-Ukrainian/model/model.pkl
Loading label mapping from /content/drive/MyDrive/mbert_fin_models/UK-Ukrainian/label_mapping/label_mapping.pkl
Loading test dataset from /content/drive/MyDrive/tokenized_test_datasets/mbert/mbert_UK-Ukrainian_test_dataset.pkl


              precision    recall  f1-score   support

          CW       0.88      0.85      0.86     43291
       Group       0.92      0.91      0.91     39709
    Location       0.94      0.93      0.93     84643
     Medical       0.88      0.89      0.89     20796
      Person       0.97      0.97      0.97     96864
     Product       0.85      0.84      0.85     30071

   micro avg       0.93      0.92      0.92    315374
   macro avg       0.91      0.90      0.90    315374
weighted avg       0.93      0.92      0.92    315374

Storing predictions and label IDs in /content/drive/MyDrive/predictions/mbert_UK-Ukrainian_predictions.pkl
Processing language: DE-German
Loading model from /content/drive/MyDrive/mbert_fin_models/DE-German/model/model.pkl
Loading label mapping from /content/drive/MyDrive/mbert_fin_models/DE-German/label_mapping/label_mapping.pkl
Loading test dataset from /content/drive/MyDrive/tokenized_test_datasets/mbert/mbert_DE-German_test_dataset.pkl
Evaluating mo

              precision    recall  f1-score   support

          CW       0.86      0.87      0.87      4777
       Group       0.86      0.87      0.87      4418
    Location       0.92      0.92      0.92      5306
     Medical       0.83      0.87      0.85      1434
      Person       0.94      0.95      0.95     11299
     Product       0.77      0.73      0.75      1643

   micro avg       0.90      0.90      0.90     28877
   macro avg       0.87      0.87      0.87     28877
weighted avg       0.90      0.90      0.90     28877

Storing predictions and label IDs in /content/drive/MyDrive/predictions/mbert_DE-German_predictions.pkl
Processing language: BN-Bangla
Loading model from /content/drive/MyDrive/mbert_fin_models/BN-Bangla/model/model.pkl
Loading label mapping from /content/drive/MyDrive/mbert_fin_models/BN-Bangla/label_mapping/label_mapping.pkl
Loading test dataset from /content/drive/MyDrive/tokenized_test_datasets/mbert/mbert_BN-Bangla_test_dataset.pkl
Evaluating model

              precision    recall  f1-score   support

          CW       0.79      0.79      0.79      3640
       Group       0.89      0.87      0.88      3651
    Location       0.92      0.93      0.93      7375
     Medical       0.80      0.83      0.82      1919
      Person       0.92      0.92      0.92      6935
     Product       0.66      0.64      0.65      1493

   micro avg       0.87      0.87      0.87     25013
   macro avg       0.83      0.83      0.83     25013
weighted avg       0.87      0.87      0.87     25013

Storing predictions and label IDs in /content/drive/MyDrive/predictions/mbert_BN-Bangla_predictions.pkl


In [None]:
# Define the base paths to the models, label mappings, test datasets, and prediction files on Google Drive
models_base_path = '/content/drive/MyDrive/RemBert_fin_models'
label_mappings_base_path = '/content/drive/MyDrive/RemBert_fin_models'
test_datasets_base_path = '/content/drive/MyDrive/tokenized_test_datasets/rembert'
predictions_base_path = '/content/drive/MyDrive/predictions'

# Check if the predictions directory exists and create it if necessary
if not os.path.exists(predictions_base_path):
    os.makedirs(predictions_base_path)
    print('Prediction directory created')

# Create empty dictionaries to store the loaded models, label mappings, and test datasets
models = {}
label_mappings = {}
test_datasets = {}

# Define the languages for which you have saved models, label mappings, and test datasets
languages = ['EN-English', 'ES-Spanish', 'HI-Hindi', 'ZH-Chinese', 'SV-Swedish', 'FA-Farsi', 'FR-French', 'IT-Italian', 'PT-Portuguese', 'UK-Ukrainian', 'DE-German', 'BN-Bangla']

# Load the models, label mappings, and test datasets, evaluate the models on the corresponding test datasets,
# and store the predictions and label IDs for each language
for lang in languages:
    print(f'Processing language: {lang}')

    # Define the paths to the saved model, label mapping, and test dataset for this language
    model_path = os.path.join(models_base_path, lang, 'model', 'model.pkl')
    label_mapping_path = os.path.join(label_mappings_base_path, lang, 'label_mapping', 'label_mapping.pkl')
    test_dataset_path = os.path.join(test_datasets_base_path, f'ReMBERT_{lang}_test_dataset.pkl')
    # Load the model
    print(f'Loading model from {model_path}')
    with open(model_path, 'rb') as f:
        model = pickle.load(f)

    # Load the label mapping
    print(f'Loading label mapping from {label_mapping_path}')
    with open(label_mapping_path, 'rb') as f:
        label_to_index = pickle.load(f)

    # Load the test dataset
    print(f'Loading test dataset from {test_dataset_path}')
    with open(test_dataset_path, 'rb') as f:
        test_dataset = pickle.load(f)


    trainer = Trainer(
        model=model,
        args=TrainingArguments(
            output_dir='/content/drive/MyDrive/results',
            do_train=False,
            do_eval=True,
            per_device_eval_batch_size=8,
        ),
        train_dataset=None,
        eval_dataset=test_dataset,
        compute_metrics=compute_metrics,
    )

    # Get the predictions and label IDs for the test dataset
    print('Evaluating model on test dataset')
    predictions,label_ids,_=trainer.predict(test_dataset)

    # Define the path to the prediction file for this language
    prediction_file_path=os.path.join(predictions_base_path,f'RemBert_{lang}_predictions.pkl')

    # Store the predictions and label IDs in a file using the pickle library
    print(f'Storing predictions and label IDs in {prediction_file_path}')
    with open(prediction_file_path,'wb') as f:
        pickle.dump((predictions,label_ids),f)


Processing language: EN-English
Loading model from /content/drive/MyDrive/RemBert_fin_models/EN-English/model/model.pkl
Loading label mapping from /content/drive/MyDrive/RemBert_fin_models/EN-English/label_mapping/label_mapping.pkl
Loading test dataset from /content/drive/MyDrive/tokenized_test_datasets/rembert/ReMBERT_EN-English_test_dataset.pkl
Evaluating model on test dataset


              precision    recall  f1-score   support

          CW       0.93      0.93      0.93     62126
       Group       0.93      0.93      0.93     60026
    Location       0.95      0.95      0.95     67901
     Medical       0.91      0.90      0.90     22491
      Person       0.98      0.98      0.98    137681
     Product       0.87      0.84      0.86     27580

   micro avg       0.95      0.94      0.95    377805
   macro avg       0.93      0.92      0.93    377805
weighted avg       0.95      0.94      0.95    377805

Storing predictions and label IDs in /content/drive/MyDrive/predictions/RemBert_EN-English_predictions.pkl
Processing language: ES-Spanish
Loading model from /content/drive/MyDrive/RemBert_fin_models/ES-Spanish/model/model.pkl
Loading label mapping from /content/drive/MyDrive/RemBert_fin_models/ES-Spanish/label_mapping/label_mapping.pkl
Loading test dataset from /content/drive/MyDrive/tokenized_test_datasets/rembert/ReMBERT_ES-Spanish_test_dataset.pkl
E

              precision    recall  f1-score   support

          CW       0.95      0.94      0.94     55460
       Group       0.96      0.95      0.96     50259
    Location       0.96      0.95      0.95     72996
     Medical       0.94      0.93      0.93     23812
      Person       0.99      0.98      0.98    125379
     Product       0.92      0.90      0.91     28469

   micro avg       0.96      0.95      0.96    356375
   macro avg       0.95      0.94      0.95    356375
weighted avg       0.96      0.95      0.96    356375

Storing predictions and label IDs in /content/drive/MyDrive/predictions/RemBert_ES-Spanish_predictions.pkl
Processing language: HI-Hindi
Loading model from /content/drive/MyDrive/RemBert_fin_models/HI-Hindi/model/model.pkl
Loading label mapping from /content/drive/MyDrive/RemBert_fin_models/HI-Hindi/label_mapping/label_mapping.pkl
Loading test dataset from /content/drive/MyDrive/tokenized_test_datasets/rembert/ReMBERT_HI-Hindi_test_dataset.pkl
Evaluatin

              precision    recall  f1-score   support

          CW       0.99      0.99      0.99      2804
       Group       0.99      0.99      0.99      3897
    Location       0.99      0.99      0.99      7172
     Medical       1.00      1.00      1.00      1979
      Person       0.99      0.99      0.99      5736
     Product       0.99      0.99      0.99      1611

   micro avg       0.99      0.99      0.99     23199
   macro avg       0.99      0.99      0.99     23199
weighted avg       0.99      0.99      0.99     23199

Storing predictions and label IDs in /content/drive/MyDrive/predictions/RemBert_HI-Hindi_predictions.pkl
Processing language: ZH-Chinese
Loading model from /content/drive/MyDrive/RemBert_fin_models/ZH-Chinese/model/model.pkl
Loading label mapping from /content/drive/MyDrive/RemBert_fin_models/ZH-Chinese/label_mapping/label_mapping.pkl
Loading test dataset from /content/drive/MyDrive/tokenized_test_datasets/rembert/ReMBERT_ZH-Chinese_test_dataset.pkl
Eva

              precision    recall  f1-score   support

          CW       0.94      0.95      0.94      4369
       Group       0.94      0.95      0.94      4715
    Location       0.95      0.97      0.96      6170
     Medical       0.96      0.95      0.96      1781
      Person       0.96      0.97      0.97      9095
     Product       0.92      0.93      0.92      1869

   micro avg       0.95      0.96      0.95     27999
   macro avg       0.94      0.95      0.95     27999
weighted avg       0.95      0.96      0.95     27999

Storing predictions and label IDs in /content/drive/MyDrive/predictions/RemBert_ZH-Chinese_predictions.pkl
Processing language: SV-Swedish
Loading model from /content/drive/MyDrive/RemBert_fin_models/SV-Swedish/model/model.pkl
Loading label mapping from /content/drive/MyDrive/RemBert_fin_models/SV-Swedish/label_mapping/label_mapping.pkl
Loading test dataset from /content/drive/MyDrive/tokenized_test_datasets/rembert/ReMBERT_SV-Swedish_test_dataset.pkl
E

              precision    recall  f1-score   support

          CW       0.95      0.93      0.94     54806
       Group       0.97      0.96      0.97     46929
    Location       0.98      0.98      0.98    111879
     Medical       0.96      0.95      0.96     13702
      Person       0.99      0.99      0.99    111157
     Product       0.95      0.94      0.94     22686

   micro avg       0.97      0.97      0.97    361159
   macro avg       0.97      0.96      0.96    361159
weighted avg       0.97      0.97      0.97    361159

Storing predictions and label IDs in /content/drive/MyDrive/predictions/RemBert_SV-Swedish_predictions.pkl
Processing language: FA-Farsi
Loading model from /content/drive/MyDrive/RemBert_fin_models/FA-Farsi/model/model.pkl
Loading label mapping from /content/drive/MyDrive/RemBert_fin_models/FA-Farsi/label_mapping/label_mapping.pkl
Loading test dataset from /content/drive/MyDrive/tokenized_test_datasets/rembert/ReMBERT_FA-Farsi_test_dataset.pkl
Evaluatin

              precision    recall  f1-score   support

          CW       0.92      0.92      0.92     53034
       Group       0.92      0.91      0.91     38807
    Location       0.90      0.89      0.90     70907
     Medical       0.89      0.89      0.89     15287
      Person       0.93      0.94      0.93    115868
     Product       0.90      0.89      0.89     18212

   micro avg       0.92      0.92      0.92    312115
   macro avg       0.91      0.91      0.91    312115
weighted avg       0.92      0.92      0.92    312115

Storing predictions and label IDs in /content/drive/MyDrive/predictions/RemBert_FA-Farsi_predictions.pkl
Processing language: FR-French
Loading model from /content/drive/MyDrive/RemBert_fin_models/FR-French/model/model.pkl
Loading label mapping from /content/drive/MyDrive/RemBert_fin_models/FR-French/label_mapping/label_mapping.pkl
Loading test dataset from /content/drive/MyDrive/tokenized_test_datasets/rembert/ReMBERT_FR-French_test_dataset.pkl
Evaluat

              precision    recall  f1-score   support

          CW       0.93      0.92      0.92     84952
       Group       0.94      0.94      0.94     52989
    Location       0.95      0.93      0.94     73373
     Medical       0.92      0.91      0.92     17208
      Person       0.98      0.98      0.98    141401
     Product       0.90      0.88      0.89     28274

   micro avg       0.95      0.94      0.95    398197
   macro avg       0.94      0.93      0.93    398197
weighted avg       0.95      0.94      0.95    398197

Storing predictions and label IDs in /content/drive/MyDrive/predictions/RemBert_FR-French_predictions.pkl
Processing language: IT-Italian
Loading model from /content/drive/MyDrive/RemBert_fin_models/IT-Italian/model/model.pkl
Loading label mapping from /content/drive/MyDrive/RemBert_fin_models/IT-Italian/label_mapping/label_mapping.pkl
Loading test dataset from /content/drive/MyDrive/tokenized_test_datasets/rembert/ReMBERT_IT-Italian_test_dataset.pkl
Ev

              precision    recall  f1-score   support

          CW       0.97      0.97      0.97     79873
       Group       0.96      0.95      0.96     46271
    Location       0.96      0.95      0.95     68564
     Medical       0.93      0.93      0.93     19029
      Person       0.99      0.99      0.99    160598
     Product       0.90      0.89      0.90     22887

   micro avg       0.97      0.97      0.97    397222
   macro avg       0.95      0.95      0.95    397222
weighted avg       0.97      0.97      0.97    397222

Storing predictions and label IDs in /content/drive/MyDrive/predictions/RemBert_IT-Italian_predictions.pkl
Processing language: PT-Portuguese
Loading model from /content/drive/MyDrive/RemBert_fin_models/PT-Portuguese/model/model.pkl
Loading label mapping from /content/drive/MyDrive/RemBert_fin_models/PT-Portuguese/label_mapping/label_mapping.pkl
Loading test dataset from /content/drive/MyDrive/tokenized_test_datasets/rembert/ReMBERT_PT-Portuguese_test_d

              precision    recall  f1-score   support

          CW       0.95      0.94      0.95     58246
       Group       0.96      0.95      0.96     48994
    Location       0.96      0.95      0.96     70923
     Medical       0.92      0.92      0.92     21062
      Person       0.99      0.98      0.98    120413
     Product       0.92      0.90      0.91     21115

   micro avg       0.96      0.96      0.96    340753
   macro avg       0.95      0.94      0.95    340753
weighted avg       0.96      0.96      0.96    340753

Storing predictions and label IDs in /content/drive/MyDrive/predictions/RemBert_PT-Portuguese_predictions.pkl
Processing language: UK-Ukrainian
Loading model from /content/drive/MyDrive/RemBert_fin_models/UK-Ukrainian/model/model.pkl
Loading label mapping from /content/drive/MyDrive/RemBert_fin_models/UK-Ukrainian/label_mapping/label_mapping.pkl
Loading test dataset from /content/drive/MyDrive/tokenized_test_datasets/rembert/ReMBERT_UK-Ukrainian_test_da

              precision    recall  f1-score   support

          CW       0.94      0.91      0.92     43291
       Group       0.96      0.95      0.96     39709
    Location       0.97      0.96      0.96     84643
     Medical       0.95      0.94      0.95     20796
      Person       0.98      0.98      0.98     96864
     Product       0.94      0.91      0.92     30071

   micro avg       0.96      0.95      0.96    315374
   macro avg       0.96      0.94      0.95    315374
weighted avg       0.96      0.95      0.96    315374

Storing predictions and label IDs in /content/drive/MyDrive/predictions/RemBert_UK-Ukrainian_predictions.pkl
Processing language: DE-German
Loading model from /content/drive/MyDrive/RemBert_fin_models/DE-German/model/model.pkl
Loading label mapping from /content/drive/MyDrive/RemBert_fin_models/DE-German/label_mapping/label_mapping.pkl
Loading test dataset from /content/drive/MyDrive/tokenized_test_datasets/rembert/ReMBERT_DE-German_test_dataset.pkl
Eva

              precision    recall  f1-score   support

          CW       0.98      0.98      0.98      4777
       Group       0.98      0.98      0.98      4418
    Location       0.99      0.99      0.99      5306
     Medical       0.99      0.99      0.99      1434
      Person       0.99      0.99      0.99     11299
     Product       0.98      0.98      0.98      1643

   micro avg       0.98      0.99      0.99     28877
   macro avg       0.98      0.98      0.98     28877
weighted avg       0.98      0.99      0.99     28877

Storing predictions and label IDs in /content/drive/MyDrive/predictions/RemBert_DE-German_predictions.pkl
Processing language: BN-Bangla
Loading model from /content/drive/MyDrive/RemBert_fin_models/BN-Bangla/model/model.pkl
Loading label mapping from /content/drive/MyDrive/RemBert_fin_models/BN-Bangla/label_mapping/label_mapping.pkl
Loading test dataset from /content/drive/MyDrive/tokenized_test_datasets/rembert/ReMBERT_BN-Bangla_test_dataset.pkl
Evalua

              precision    recall  f1-score   support

          CW       0.98      0.98      0.98      3640
       Group       0.99      0.99      0.99      3651
    Location       0.99      0.99      0.99      7375
     Medical       0.99      0.99      0.99      1919
      Person       0.99      0.99      0.99      6935
     Product       0.97      0.97      0.97      1493

   micro avg       0.99      0.99      0.99     25013
   macro avg       0.99      0.98      0.98     25013
weighted avg       0.99      0.99      0.99     25013

Storing predictions and label IDs in /content/drive/MyDrive/predictions/RemBert_BN-Bangla_predictions.pkl
