Finetune IndicBERT model using WikiAnn dataset for Kannada

#### Import dataset and libraries

In [1]:
!pip3 install datasets
!pip3 install tokenizers
!pip3 install transformers[sentencepiece]
#!pip3 install wandb
!pip3 install seqeval
from torch.utils.data import DataLoader

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting datasets
  Downloading datasets-2.3.2-py3-none-any.whl (362 kB)
[K     |████████████████████████████████| 362 kB 22.3 MB/s 
Collecting aiohttp
  Downloading aiohttp-3.8.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.1 MB)
[K     |████████████████████████████████| 1.1 MB 61.4 MB/s 
Collecting xxhash
  Downloading xxhash-3.0.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)
[K     |████████████████████████████████| 212 kB 76.0 MB/s 
[?25hCollecting fsspec[http]>=2021.05.0
  Downloading fsspec-2022.5.0-py3-none-any.whl (140 kB)
[K     |████████████████████████████████| 140 kB 66.1 MB/s 
Collecting huggingface-hub<1.0.0,>=0.1.0
  Downloading huggingface_hub-0.8.1-py3-none-any.whl (101 kB)
[K     |████████████████████████████████| 101 kB 14.8 MB/s 
[?25hCollecting responses<0.19
  Downloading responses-

In [2]:
from datasets import load_dataset

dataset = load_dataset("wikiann", "kn")

Downloading builder script:   0%|          | 0.00/3.94k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/12.6k [00:00<?, ?B/s]

Downloading and preparing dataset wikiann/kn (download: 223.17 MiB, generated: 101.05 KiB, post-processed: Unknown size, total: 223.27 MiB) to /root/.cache/huggingface/datasets/wikiann/kn/1.1.0/4bfd4fe4468ab78bb6e096968f61fab7a888f44f9d3371c2f3fea7e74a5a354e...


Downloading data:   0%|          | 0.00/234M [00:00<?, ?B/s]

Generating validation split:   0%|          | 0/100 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/100 [00:00<?, ? examples/s]

Generating train split:   0%|          | 0/100 [00:00<?, ? examples/s]

Dataset wikiann downloaded and prepared to /root/.cache/huggingface/datasets/wikiann/kn/1.1.0/4bfd4fe4468ab78bb6e096968f61fab7a888f44f9d3371c2f3fea7e74a5a354e. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

In [3]:
labels = dataset["train"].features["ner_tags"].feature.names
labels

['O', 'B-PER', 'I-PER', 'B-ORG', 'I-ORG', 'B-LOC', 'I-LOC']

In [4]:
from transformers import AutoTokenizer, AutoConfig

config = AutoConfig.from_pretrained("ai4bharat/indic-bert")
label2id = {"O": 0,
"B-PER": 1,
"I-PER": 2,
"B-ORG": 3,
"I-ORG": 4,
"B-LOC": 5,
"I-LOC": 6
}
id2label = {y:x for x,y in label2id.items()}
config.id2label = id2label
config.label2id = label2id
config.num_labels = len(id2label.keys())
tokenizer = AutoTokenizer.from_pretrained('ai4bharat/indic-bert', keep_accents=True)
tokenizer.config = config

Downloading:   0%|          | 0.00/507 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/5.38M [00:00<?, ?B/s]

#### Preprocessing

In [5]:
#Get the values for input_ids, token_type_ids, attention_mask
def tokenize_adjust_labels(all_samples_per_split):
  tokenized_samples = tokenizer.batch_encode_plus(all_samples_per_split["tokens"], is_split_into_words=True, truncation=True)  
  total_adjusted_labels = []

  for k, label in enumerate(all_samples_per_split[f"ner_tags"]):
    prev_wid = None
    word_ids_list = tokenized_samples.word_ids(batch_index=k)
    #existing_label_ids = all_samples_per_split["ner_tags"][k]    #i = None
    label_ids = []

    for wid in word_ids_list:
      if(wid is None): #special tokens
        label_ids.append(-100)
      elif(wid!=prev_wid): #single word in a single token
        label_ids.append(label[wid])
      else: #word split in different tokens
        label_ids.append(-100)
      prev_wid = wid
    total_adjusted_labels.append(label_ids)
  tokenized_samples["labels"] = total_adjusted_labels
  return tokenized_samples

tokenized_dataset = dataset.map(tokenize_adjust_labels, batched=True, remove_columns=['tokens', 'ner_tags', 'langs', 'spans'])



  0%|          | 0/1 [00:00<?, ?ba/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

In [6]:
from transformers import DataCollatorForTokenClassification

data_collator = DataCollatorForTokenClassification(tokenizer)
data_collator

DataCollatorForTokenClassification(tokenizer=PreTrainedTokenizerFast(name_or_path='ai4bharat/indic-bert', vocab_size=200000, model_max_len=1000000000000000019884624838656, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '[CLS]', 'eos_token': '[SEP]', 'unk_token': '<unk>', 'sep_token': '[SEP]', 'pad_token': '<pad>', 'cls_token': '[CLS]', 'mask_token': AddedToken("[MASK]", rstrip=False, lstrip=True, single_word=False, normalized=False)}), padding=True, max_length=None, pad_to_multiple_of=None, label_pad_token_id=-100, return_tensors='pt')

#### Finetuning

In [7]:
import torch
from transformers import AutoModelForTokenClassification, AdamW

In [8]:
#check for gpu
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cuda')

In [9]:
from transformers import TrainingArguments, Trainer
import numpy as np
from datasets import load_metric
metric = load_metric("seqeval")
def compute_metrics(p):
    prediction, label = p
    prediction = np.argmax(prediction, axis=-1) #axis=2

    # Remove ignored index (special tokens)
    true_predictions = [
        [labels[p] for (p, l) in zip(pred, lab) if l != -100]
        for pred, lab in zip(prediction, label)
    ]
  
    true_labels = [
                   [labels[l] for l in lab if l!=-100]
                   for lab in label
    ]
    results = metric.compute(predictions=true_predictions, references=true_labels)
    
    flattened_results = {
        "overall_precision": results["overall_precision"],
        "overall_recall": results["overall_recall"],
        "overall_f1": results["overall_f1"],
        "overall_accuracy": results["overall_accuracy"],}
    return flattened_results

Downloading builder script:   0%|          | 0.00/2.47k [00:00<?, ?B/s]

In [10]:
fine_tuned_model = './output_indic_bert/'
model = AutoModelForTokenClassification.from_pretrained("ai4bharat/indic-bert", num_labels=len(labels))
model.config = config
model.to(device)

Downloading:   0%|          | 0.00/129M [00:00<?, ?B/s]

Some weights of the model checkpoint at ai4bharat/indic-bert were not used when initializing AlbertForTokenClassification: ['predictions.LayerNorm.bias', 'predictions.dense.weight', 'predictions.decoder.bias', 'predictions.dense.bias', 'predictions.decoder.weight', 'predictions.LayerNorm.weight', 'predictions.bias', 'sop_classifier.classifier.bias', 'sop_classifier.classifier.weight']
- This IS expected if you are initializing AlbertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForTokenClassification were not initialized from the model checkpoint at ai4bharat/indic-bert and a

AlbertForTokenClassification(
  (albert): AlbertModel(
    (embeddings): AlbertEmbeddings(
      (word_embeddings): Embedding(200000, 128, padding_idx=0)
      (position_embeddings): Embedding(512, 128)
      (token_type_embeddings): Embedding(2, 128)
      (LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0, inplace=False)
    )
    (encoder): AlbertTransformer(
      (embedding_hidden_mapping_in): Linear(in_features=128, out_features=768, bias=True)
      (albert_layer_groups): ModuleList(
        (0): AlbertLayerGroup(
          (albert_layers): ModuleList(
            (0): AlbertLayer(
              (full_layer_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
              (attention): AlbertAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768, b

In [11]:
#reuse saved model
#model = AutoModelForTokenClassification.from_pretrained(fine_tuned_model, num_labels=len(labels))
#model.to(device)

In [12]:
training_args = TrainingArguments(
    output_dir='./output_indic_bert/', 
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16, 
    evaluation_strategy="epoch",
    num_train_epochs=100,
    save_strategy="no",
    report_to="none"
    )

In [13]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics)

trainer.train()
#wandb.finish()

***** Running training *****
  Num examples = 100
  Num Epochs = 100
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 700


Epoch,Training Loss,Validation Loss,Overall Precision,Overall Recall,Overall F1,Overall Accuracy
1,No log,1.033587,0.0,0.0,0.0,0.728834
2,No log,0.968691,0.0,0.0,0.0,0.728834
3,No log,0.875115,0.0,0.0,0.0,0.728834
4,No log,0.818974,0.0,0.0,0.0,0.730061
5,No log,0.752941,0.228571,0.071429,0.108844,0.776687
6,No log,0.710159,0.282609,0.116071,0.164557,0.795092
7,No log,0.706265,0.327273,0.160714,0.215569,0.798773
8,No log,0.667233,0.337838,0.223214,0.268817,0.815951
9,No log,0.667827,0.275641,0.383929,0.320896,0.793865
10,No log,0.706667,0.302632,0.205357,0.244681,0.808589


***** Running Evaluation *****
  Num examples = 100
  Batch size = 16
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
***** Running Evaluation *****
  Num examples = 100
  Batch size = 16
***** Running Evaluation *****
  Num examples = 100
  Batch size = 16
***** Running Evaluation *****
  Num examples = 100
  Batch size = 16
***** Running Evaluation *****
  Num examples = 100
  Batch size = 16
***** Running Evaluation *****
  Num examples = 100
  Batch size = 16
***** Running Evaluation *****
  Num examples = 100
  Batch size = 16
***** Running Evaluation *****
  Num examples = 100
  Batch size = 16
***** Running Evaluation *****
  Num examples = 100
  Batch size = 16
***** Running Evaluation *****
  Num examples = 100
  Batch size = 16
***** Running Evaluation *****
  Num examples = 100
  Batch size = 16
***** Running Evaluation *****
  Num examples = 100
  Batch size = 16
***** Running Evaluation *****
  Num examples = 10

TrainOutput(global_step=700, training_loss=0.0914642448084695, metrics={'train_runtime': 86.7643, 'train_samples_per_second': 115.255, 'train_steps_per_second': 8.068, 'total_flos': 14153104502088.0, 'train_loss': 0.0914642448084695, 'epoch': 100.0})

In [14]:
#fine_tuned_model = './output_indic_bert/'
tokenizer.save_pretrained(fine_tuned_model)
model.save_pretrained(fine_tuned_model)

tokenizer config file saved in ./output_indic_bert/tokenizer_config.json
Special tokens file saved in ./output_indic_bert/special_tokens_map.json
Configuration saved in ./output_indic_bert/config.json
Model weights saved in ./output_indic_bert/pytorch_model.bin


In [15]:
trainer.evaluate()

***** Running Evaluation *****
  Num examples = 100
  Batch size = 16


{'epoch': 100.0,
 'eval_loss': 1.0944163799285889,
 'eval_overall_accuracy': 0.8184049079754602,
 'eval_overall_f1': 0.4173913043478261,
 'eval_overall_precision': 0.4067796610169492,
 'eval_overall_recall': 0.42857142857142855,
 'eval_runtime': 0.3308,
 'eval_samples_per_second': 302.333,
 'eval_steps_per_second': 21.163}

In [16]:
prediction, label, _ = trainer.predict(tokenized_dataset["test"])
prediction = np.argmax(prediction, axis=-1)

true_predictions = [
        [labels[p] for (p, l) in zip(pred, lab) if l != -100]
        for pred, lab in zip(prediction, label)
    ]
true_labels = [
                   [labels[l] for l in lab if l!=-100]
                   for lab in label
    ]
results = metric.compute(predictions=true_predictions, references=true_labels)
results

***** Running Prediction *****
  Num examples = 100
  Batch size = 16


{'LOC': {'f1': 0.46017699115044247,
  'number': 47,
  'precision': 0.3939393939393939,
  'recall': 0.5531914893617021},
 'ORG': {'f1': 0.375, 'number': 32, 'precision': 0.375, 'recall': 0.375},
 'PER': {'f1': 0.41666666666666663,
  'number': 33,
  'precision': 0.38461538461538464,
  'recall': 0.45454545454545453},
 'overall_accuracy': 0.7959459459459459,
 'overall_f1': 0.42570281124497994,
 'overall_precision': 0.38686131386861317,
 'overall_recall': 0.4732142857142857}

### Huggingface pipeline

In [17]:
from transformers import pipeline
tokenizer = AutoTokenizer.from_pretrained(fine_tuned_model)
model = AutoModelForTokenClassification.from_pretrained(fine_tuned_model)

nlp = pipeline("ner", model=model, tokenizer=tokenizer)

Didn't find file ./output_indic_bert/added_tokens.json. We won't load it.
loading file ./output_indic_bert/spiece.model
loading file ./output_indic_bert/tokenizer.json
loading file None
loading file ./output_indic_bert/special_tokens_map.json
loading file ./output_indic_bert/tokenizer_config.json
loading configuration file ./output_indic_bert/config.json
Model config AlbertConfig {
  "_name_or_path": "./output_indic_bert/",
  "architectures": [
    "AlbertForTokenClassification"
  ],
  "attention_probs_dropout_prob": 0,
  "bos_token_id": 2,
  "classifier_dropout_prob": 0.1,
  "down_scale_factor": 1,
  "embedding_size": 128,
  "eos_token_id": 3,
  "gap_size": 0,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0,
  "hidden_size": 768,
  "id2label": {
    "0": "O",
    "1": "B-PER",
    "2": "I-PER",
    "3": "B-ORG",
    "4": "I-ORG",
    "5": "B-LOC",
    "6": "I-LOC"
  },
  "initializer_range": 0.02,
  "inner_group_num": 1,
  "intermediate_size": 3072,
  "label2id": {
    "B-LOC": 5,


In [18]:
example = "ಭಾರತ ನನ್ನ ದೇಶ"
for entity in nlp(example):
  print(entity)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


{'entity': 'B-ORG', 'score': 0.6418692, 'index': 1, 'word': '▁ಭಾರತ', 'start': 0, 'end': 4}
{'entity': 'I-ORG', 'score': 0.97633547, 'index': 2, 'word': '▁ನನ್ನ', 'start': 4, 'end': 9}
{'entity': 'I-ORG', 'score': 0.9838991, 'index': 3, 'word': '▁ದೇಶ', 'start': 9, 'end': 13}


In [19]:
example = "ರಷ್ಯಾ ಮತ್ತು ಉಕ್ರೇನ್ ಯುದ್ಧದಲ್ಲಿದೆ"
for entity in nlp(example):
  print(entity)

{'entity': 'B-LOC', 'score': 0.9109797, 'index': 1, 'word': '▁ರಷ್ಯಾ', 'start': 0, 'end': 5}
{'entity': 'I-ORG', 'score': 0.7892665, 'index': 2, 'word': '▁ಮತ್ತು', 'start': 5, 'end': 11}
{'entity': 'I-ORG', 'score': 0.7633666, 'index': 3, 'word': '▁ಉಕ್ರೇನ್', 'start': 11, 'end': 19}
{'entity': 'I-ORG', 'score': 0.9295051, 'index': 4, 'word': '▁ಯುದ್ಧದಲ್ಲಿ', 'start': 19, 'end': 30}
{'entity': 'I-ORG', 'score': 0.92649204, 'index': 5, 'word': 'ದೆ', 'start': 30, 'end': 32}


In [20]:
example = " ಎಲ್ಲರಿಗು ನಮಸ್ತೆ ಇದು ನಮ್ಮ ಹಳ್ಳಿ ರೇಡಿಯೋ ನಿಮ್ಮ ಜೊತೆ ನಾನು ಅನು ಕೊರೋನಾ ವಿರುದ್ಧ ಹೋರಾಡುತ್ತಿರುವ ಸೇನೆಯಲ್ಲಿ ಸೇವೆ ಸಲ್ಲಿಸುತ್ತಿರುವ  ಕೊರೋನಾ ಸೈನಿಕಲು\
 ಇತ್ತೀಚಿಗೆ ಎಲ್ಲಿ ನೋಡಿದ್ರು ಕೋರೋಣ ಬಗ್ಗೆಯೇ ಗುಣಗಾನ ನಾಮೇಲರಿಗೂ ಅನಿಸಿರಬೊಹುದು ಏನಪ್ಪಾ ಇದು ಎಲ್ಲಿ ನೋಡಿದರೇನು ಕೊರೋನಾ ಕೊರೋನಾ ಅಂತ  ನೇ ಮಾತಾಡ್ತಾರೆ   "
for entity in nlp(example):
  print(entity)

{'entity': 'B-ORG', 'score': 0.9164086, 'index': 52, 'word': '▁ಕೊರ', 'start': 229, 'end': 233}


In [21]:
example = "ಎಲ್ಲರಿಗೂ ನಮಸ್ಕಾರ ನಮ್ಮ ಹಳ್ಳಿ ರೇಡಿಯೋ ವಾಹಿನಿ ಮುಖಾಂತರ ನಾನುಡಿ ನಾನು ಡಾಕ್ಟರ್ ಜೆ ಅಚ್ಚುತರಾ ಜನಾಭಿವೃದ್ಧಿ ಮಾಲಿಕೆಯಲ್ಲಿ ಕರೋನವೈರಸ್ ಕುರಿತು\
 ಮೂಲ ಮಾಹಿತಿಗಳನ್ನು ಪ್ರಸ್ತುತಪಡಿಸಲು ನಾವು ಪ್ರಯತ್ನ ಮಾಡ್ತಾಯಿದೀವಿ ನಂಜೊತೆಗೆ ಡಾಕ್ಟರ್ ಶೀಲ ಕರೆಯದಿದ್ದರೆ ಹಾಗೆನೇ "
for entity in nlp(example):
  print(entity)

{'entity': 'B-ORG', 'score': 0.9794591, 'index': 2, 'word': '▁ನಮಸ್ಕಾರ', 'start': 8, 'end': 16}
{'entity': 'I-ORG', 'score': 0.93944985, 'index': 3, 'word': '▁ನಮ್ಮ', 'start': 16, 'end': 21}
{'entity': 'I-ORG', 'score': 0.80985487, 'index': 4, 'word': '▁ಹಳ್ಳಿ', 'start': 21, 'end': 27}
{'entity': 'B-ORG', 'score': 0.97427326, 'index': 20, 'word': '▁ಕರ', 'start': 105, 'end': 108}
{'entity': 'I-ORG', 'score': 0.72641456, 'index': 21, 'word': 'ೋನ', 'start': 108, 'end': 110}


In [22]:
example = "ನನ್ನ ಹೆಸರು ದೇವರಾಜ್ ಅಂತ ತುರುವೇಕೆರೆ ತಾಲೂಕು ಸಂಘದ ಅಧ್ಯಕ್ಷರು ನಾನು ಈ ಕೊರೋನಾ ಬಗ್ಗೆ ಜನಗಳಿಗೆ ಮಾಹಿತಿ ನೆಡುವುದು ಏನು ಅಂತ ಅಂದರೆ \
 ಈಗ ಮಾಸ್ಕ ಎಲ್ಲಾ ಹಳ್ಳಿ ಕಡೆ ಹೋದರೆ ರಾಜಕಾರಣಿಗಳನ್ನ ಕೇಳುತ್ತಾರೆ ನಮಗೇನು ಸೌಲತ್ತು ಬರಲಿಲ್ಲ ನಮಗಿಂದ ಅಧಿಕಾರಿಗಳು ಎಲ್ಲಾ ಅಧಿಕಾರಿಗಳು ಏನು ಕೊಡುತ್ತ ಇಲ್ಲ \
 ಆಮೇಲೆ ರಾಜಕಾರಣಿಗಳು ಏನು ಕೊಡುತ್ತಿಲ್ಲ ಅಂತ"
for entity in nlp(example):
  print(entity)

{'entity': 'B-LOC', 'score': 0.9355152, 'index': 5, 'word': '▁ತು', 'start': 22, 'end': 25}
{'entity': 'I-LOC', 'score': 0.8075414, 'index': 7, 'word': 'ವೇ', 'start': 27, 'end': 29}
{'entity': 'I-LOC', 'score': 0.8902915, 'index': 8, 'word': 'ಕೆರೆ', 'start': 29, 'end': 33}


In [23]:
example = "ಚಿಕ್ಕನಾಯಕನಹಳ್ಳಿ ತಾಲೂಕಿನ ಹಾಗೂ ತುಮಕೂರು ಜಿಲ್ಲೆಯೆಲ್ಲಾ ಸಾರ್ವಜನಿಕ ಬಂಧುಗಳೇ ಇತ್ತೀಚಿನ ದಿನಗಳಲ್ಲಿ ಕೋವಿಡ್ 19 ಕರೋನ ವೈರಸ್ \
ಹರಡಿಕ್ಕೆ ಆರಂಭವಾಗಿದ್ದು ಈ ಬಗ್ಗೆ ಬಹಳ ರೀತಿಯ ತೊಂದರೆಗಳು ಮಾತುಕತೆಗಳು ತರದಿಕ್ಕೆ ಅನಿಸುತ್ತ ಇದ್ದೀರಿ ವೈರಸ್ಸು ಹೊರಡೋದು "
for entity in nlp(example):
  print(entity)

{'entity': 'B-LOC', 'score': 0.98895335, 'index': 6, 'word': '▁ತುಮಕೂರು', 'start': 28, 'end': 36}


In [24]:
example = "ಎಲ್ಲರಿಗೂ ನಮಸ್ಕಾರ ನನ್ ಹೆಸರು ಸಾಗರ್ ನಲ್ಲಿ ಪ್ರಭು ಅಂತ ಹೇಳಿ ಭಾರತೀಯ ರೆಡ್ ಕ್ರಾಸ್ ಸಂಸ್ಥೆ ತುಮಕೂರು ಶಾಖೆ ಸ್ಕಿಲ್ ದೆವಲಪ್ಮೆಂಟ್ ಚೇರ್ಮನ್ \
ಆಗಿ ಕೆಲಸ ಮಾಡ್ತಾ ಇದ್ದೀನಿ ಈಗ ಎಲ್ಲರೂ ಕರೋನ ಬಗ್ಗೆ ಮಾತಾಡ್ತಾರೆ ಇಡೀ ವಿಶ್ವ ಬಿಡಿ ಪ್ರಪಂಚ ಕರೋನ ಬಗ್ಗೆ ಮಾತಾಡುತ್ತಿದೆ "
for entity in nlp(example):
  print(entity)

{'entity': 'B-ORG', 'score': 0.9897398, 'index': 11, 'word': '▁ಭಾರತೀಯ', 'start': 53, 'end': 60}
{'entity': 'I-ORG', 'score': 0.985305, 'index': 12, 'word': '▁ರೆಡ್', 'start': 60, 'end': 65}
{'entity': 'I-ORG', 'score': 0.9839917, 'index': 13, 'word': '▁ಕ್ರಾಸ್', 'start': 65, 'end': 72}
{'entity': 'B-ORG', 'score': 0.6725203, 'index': 33, 'word': '▁ಕರ', 'start': 153, 'end': 156}
