In [None]:
!pip install transformers



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:


# Python version of the evaluation script from CoNLL'00-

# Intentional differences:
# - accept any space as delimiter by default
# - optional file argument (default STDIN)
# - option to set boundary (-b argument)
# - LaTeX output (-l argument) not supported
# - raw tags (-r argument) not supported

import sys
import re

from collections import defaultdict, namedtuple

ANY_SPACE = '<SPACE>'

class FormatError(Exception):
    pass

Metrics = namedtuple('Metrics', 'tp fp fn prec rec fscore')

class EvalCounts(object):
    def __init__(self):
        self.correct_chunk = 0    # number of correctly identified chunks
        self.correct_tags = 0     # number of correct chunk tags
        self.found_correct = 0    # number of chunks in corpus
        self.found_guessed = 0    # number of identified chunks
        self.token_counter = 0    # token counter (ignores sentence breaks)

        # counts by type
        self.t_correct_chunk = defaultdict(int)
        self.t_found_correct = defaultdict(int)
        self.t_found_guessed = defaultdict(int)

def parse_args(argv):
    import argparse
    parser = argparse.ArgumentParser(
        description='evaluate tagging results using CoNLL criteria',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )
    arg = parser.add_argument
    arg('-b', '--boundary', metavar='STR', default='-X-',
        help='sentence boundary')
    arg('-d', '--delimiter', metavar='CHAR', default=ANY_SPACE,
        help='character delimiting items in input')
    arg('-o', '--otag', metavar='CHAR', default='O',
        help='alternative outside tag')
    arg('file', nargs='?', default=None)
    return parser.parse_args(argv)

def parse_tag(t):
    m = re.match(r'^([^-]*)-(.*)$', t)
    return m.groups() if m else (t, '')

# ... [rest of the script remains unchanged up to the evaluate function]

def evaluate(iterable, options=None):
    if options is None:
        options = parse_args([])    # use defaults

    counts = EvalCounts()
    num_features = None       # number of features per line

    for line in iterable:
        line = line.rstrip('\r\n')

        if options.delimiter == ANY_SPACE:
            features = line.split()
        else:
            features = line.split(options.delimiter)

        if num_features is None:
            num_features = len(features)
        elif num_features != len(features) and len(features) != 0:
            raise FormatError('unexpected number of features: %d (%d)' %
                              (len(features), num_features))

        if len(features) == 0 or features[0] == options.boundary:
            continue
        if len(features) < 2:
            raise FormatError('unexpected number of features in line %s' % line)

        guessed, correct = features[-2], features[-1]  # Modify this line if the order is different

        # Start and end of chunks are just the tags themselves in your format
        start_correct = correct != options.otag
        start_guessed = guessed != options.otag
        end_correct = start_correct
        end_guessed = start_guessed

        if start_correct:
            counts.found_correct += 1
            counts.t_found_correct[correct] += 1
        if start_guessed:
            counts.found_guessed += 1
            counts.t_found_guessed[guessed] += 1
        if correct == guessed:
            counts.correct_tags += 1
            if start_guessed:
                counts.correct_chunk += 1
                counts.t_correct_chunk[guessed] += 1

        counts.token_counter += 1

    return counts

# ... [rest of the script remains unchanged]


def uniq(iterable):
  seen = set()
  return [i for i in iterable if not (i in seen or seen.add(i))]

def calculate_metrics(correct, guessed, total):
    tp, fp, fn = correct, guessed-correct, total-correct
    p = 0 if tp + fp == 0 else 1.*tp / (tp + fp)
    r = 0 if tp + fn == 0 else 1.*tp / (tp + fn)
    f = 0 if p + r == 0 else 2 * p * r / (p + r)
    return Metrics(tp, fp, fn, p, r, f)

def metrics(counts):
    c = counts
    overall = calculate_metrics(
        c.correct_chunk, c.found_guessed, c.found_correct
    )
    by_type = {}
    # print(c.t_found_guessed.keys())
    # print(uniq(c.t_found_correct.keys() + c.t_found_guessed.keys()))
    # dict_keys = c.t_found_correct.copy()
    # dict_keys.update(c.t_found_guessed.keys)
    list_keys = list(c.t_found_correct.keys())
    list_keys += list(c.t_found_guessed.keys())

    for t in set(list_keys):  # uniq(c.t_found_correct.keys() + c.t_found_guessed.keys()):
        by_type[t] = calculate_metrics(
            c.t_correct_chunk[t], c.t_found_guessed[t], c.t_found_correct[t]
        )
    return overall, by_type

def report(counts, out=None):
    if out is None:
        out = sys.stdout

    overall, by_type = metrics(counts)

    c = counts
    out.write('processed %d tokens with %d phrases; ' %
              (c.token_counter, c.found_correct))
    out.write('found: %d phrases; correct: %d.\n' %
              (c.found_guessed, c.correct_chunk))

    results_arr = []

    if c.token_counter > 0:
        out.write('accuracy: %6.2f%%; ' %
                  (100.*c.correct_tags/c.token_counter))
        out.write('precision: %6.2f%%; ' % (100.*overall.prec))
        out.write('recall: %6.2f%%; ' % (100.*overall.rec))
        out.write('FB1: %6.2f\n' % (100.*overall.fscore))

    for i, m in sorted(by_type.items()):
        out.write('%17s: ' % i)
        out.write('precision: %6.2f%%; ' % (100.*m.prec))
        out.write('recall: %6.2f%%; ' % (100.*m.rec))
        out.write('FB1: %6.2f  %d\n' % (100.*m.fscore, c.t_found_guessed[i]))
        results_arr.append(100.*m.fscore)

    results_arr.append(100.*overall.fscore)
    return overall.fscore, results_arr

def end_of_chunk(prev_tag, tag, prev_type, type_):
    # check if a chunk ended between the previous and current word
    # arguments: previous and current chunk tags, previous and current types
    chunk_end = False

    if prev_tag == 'E': chunk_end = True
    if prev_tag == 'S': chunk_end = True

    if prev_tag == 'B' and tag == 'B': chunk_end = True
    if prev_tag == 'B' and tag == 'S': chunk_end = True
    if prev_tag == 'B' and tag == 'O': chunk_end = True
    if prev_tag == 'I' and tag == 'B': chunk_end = True
    if prev_tag == 'I' and tag == 'S': chunk_end = True
    if prev_tag == 'I' and tag == 'O': chunk_end = True

    if prev_tag != 'O' and prev_tag != '.' and prev_type != type_:
        chunk_end = True

    # these chunks are assumed to have length 1
    if prev_tag == ']': chunk_end = True
    if prev_tag == '[': chunk_end = True

    return chunk_end

def start_of_chunk(prev_tag, tag, prev_type, type_):
    # check if a chunk started between the previous and current word
    # arguments: previous and current chunk tags, previous and current types
    chunk_start = False

    if tag == 'B': chunk_start = True
    if tag == 'S': chunk_start = True

    if prev_tag == 'E' and tag == 'E': chunk_start = True
    if prev_tag == 'E' and tag == 'I': chunk_start = True
    if prev_tag == 'S' and tag == 'E': chunk_start = True
    if prev_tag == 'S' and tag == 'I': chunk_start = True
    if prev_tag == 'O' and tag == 'E': chunk_start = True
    if prev_tag == 'O' and tag == 'I': chunk_start = True

    if tag != 'O' and tag != '.' and prev_type != type_:
        chunk_start = True

    # these chunks are assumed to have length 1
    if tag == '[': chunk_start = True
    if tag == ']': chunk_start = True

    return chunk_start

def eval_f1score(file_):

    with open(file_) as f:
        counts = evaluate(f)
    f1score, fscore_arr = report(counts)
    print(fscore_arr)

    return f1score, fscore_arr

# def main():
#     print(eval_f1score('test.txt'))
# if __name__ == '__main__':
#     main()

In [None]:
from collections import namedtuple
import torch
import pandas as pd
import torch.nn as nn
import numpy as np
# from conlleval import eval_f1score
from tqdm import tqdm
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from transformers import AutoTokenizer, AutoModel, BertPreTrainedModel, BertModel, AdamW

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.cuda.is_available()

True

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Paths for the dataset and the split files
dataset_path = '/content/drive/My Drive/mk-pucit.txt'
train_path = '/content/drive/My Drive/Dataset/train_dataset.txt'
val_path = '/content/drive/My Drive/Dataset/val_dataset.txt'
test_path = '/content/drive/My Drive/Dataset/test_dataset.txt'

# Load your dataset
df = pd.read_csv(dataset_path, delimiter='\t', header=None)

# Split the dataset into training and temporary dataset
train_df, temp_df = train_test_split(df, test_size=0.3, random_state=42)

# Split the temporary dataset into validation and test dataset
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)

# Save the datasets to Google Drive
train_df.to_csv(train_path, sep='\t', index=False, header=False)
val_df.to_csv(val_path, sep='\t', index=False, header=False)
test_df.to_csv(test_path, sep='\t', index=False, header=False)

In [None]:
PATH_TRAIN = '/content/drive/My Drive/Dataset/train_dataset.txt'
PATH_VAL = '/content/drive/My Drive/Dataset/val_dataset.txt'
PATH_TEST = '/content/drive/My Drive/Dataset/test_dataset.txt'
# Define a function to read the data from a given path
def read_data(path):
    # Assuming the file is tab-separated and has no header
    # Adjust the parameters accordingly if this is not the case
    data = pd.read_csv(path, delimiter='\t', header=None)
    return data

# Read the datasets
train_data = read_data(PATH_TRAIN)
val_data = read_data(PATH_VAL)
test_data = read_data(PATH_TEST)

# Check the first few entries of the training data
print(train_data.head())

      0      1
0    سے  Other
1    پر  Other
2    جی  Other
3   ٹیم  Other
4  ایسے  Other


In [None]:
label_to_id = {
    "Other": 0,
    "Organization": 1,
    "Person": 2,
    "Location": 3  # Assuming you have 'Location' entities as well.
    # Add any other entities you might have in your dataset
}

id_to_label = {value: key for key, value in label_to_id.items()}

In [None]:
urdu_tokenizer = AutoTokenizer.from_pretrained("bert-base-multilingual-cased", do_lower_case=False)

In [None]:
def clean_label(label):
    if "Organization" in label:
        return "Organization"
    elif "Person" in label:
        return "Person"
    elif "Location" in label:
        return "Location"
    elif "Other" in label:
        return "Other"
    else:
        return "Other"  # Default case if label is not recognized


In [None]:
import pandas as pd
from collections import namedtuple
from transformers import AutoTokenizer

class UrduNERDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, label_to_id, max_length):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.label_to_id = label_to_id
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        labels = self.labels[idx]
        # Tokenize the text and map labels to token ids
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            is_split_into_words=True,  # Important for word-level tasks
            return_tensors='pt'
        )
        input_ids = encoding['input_ids'].flatten()
        attention_mask = encoding['attention_mask'].flatten()

        # Create a list to hold the label ids which we will create in the next loop
        label_ids = []

        # Here we need to map each token with its corresponding label.
        # Special tokens will get a label of -100 by default, which will be ignored in the loss function.
        last_word_id = None
        for word_id in encoding.word_ids():
            if word_id is None or word_id != last_word_id:  # Special or new word token
                label_ids.append(label_to_id[labels[word_id]] if word_id is not None else -100)
            else:  # Subword token
                label_ids.append(-100)
            last_word_id = word_id

        label_ids = label_ids[:self.max_length]  # Truncate labels to `max_length`
        label_ids += [-100] * (self.max_length - len(label_ids))  # Pad label_ids

        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'labels': torch.tensor(label_ids, dtype=torch.long)
        }

def read_dataset(path):
    with open(path, 'r', encoding='utf-8') as file:
        lines = file.read().splitlines()

    # Split the dataset into tokens and labels
    tokens, labels = [], []
    texts, label_sequences = [], []
    for line in lines:
        if line.strip() == "":
            if tokens:
                texts.append(tokens)
                label_sequences.append(labels)
                tokens, labels = [], []
        else:
            token, label = line.split('\t')
            tokens.append(token)
            labels.append(label)

    # Check if the last line was the end of a sentence
    if tokens:
        texts.append(tokens)
        label_sequences.append(labels)

    return texts, label_sequences

def preprocess_data(filepath, tokenizer, label_to_id, max_length):
    # Read the raw text and labels
    raw_texts, raw_labels = read_dataset(filepath)

    # Tokenize and align labels with subword tokens
    dataset = UrduNERDataset(raw_texts, raw_labels, tokenizer, label_to_id, max_length)


    return dataset

In [None]:
def transform_to_tensors(dataset):
    tensors_input_ids = []
    tensors_input_mask = []
    tensors_label_ids = []
    for i in dataset:
        tensors_input_ids.append(i.input_ids)
        tensors_input_mask.append(i.input_mask)
        tensors_label_ids.append(i.label_ids)

    return torch.tensor(tensors_input_ids), torch.tensor(tensors_input_mask), torch.tensor(tensors_label_ids)

In [None]:
class ModifiedBertForTokenClassification(BertPreTrainedModel):
    def __init__(self, config, num_labels=4):  # Set num_labels to the number of entity types you have
        super().__init__(config)
        self.num_labels = num_labels

        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, self.num_labels)

        self.init_weights()

    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        labels=None,
    ):
        outputs = self.bert(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
        )

        sequence_output = outputs[0]

        sequence_output = self.dropout(sequence_output)
        logits = self.classifier(sequence_output)

        outputs = (logits,) + outputs[2:]  # Add hidden states and attention if they are here

        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            # Only keep active parts of the loss
            if attention_mask is not None:
                active_loss = attention_mask.view(-1) == 1
                active_logits = logits.view(-1, self.num_labels)
                active_labels = torch.where(
                    active_loss, labels.view(-1), torch.tensor(loss_fct.ignore_index).type_as(labels)
                )
                loss = loss_fct(active_logits, active_labels)
            else:
                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
            outputs = (loss,) + outputs

        return outputs  # (loss), logits, (hidden_states), (attentions)

In [None]:
from transformers import BertConfig
# Load the configuration from the pre-trained multilingual BERT model
config = BertConfig.from_pretrained("bert-base-multilingual-cased")
config.num_labels = 4  # Update the number of labels to the number of unique labels in your dataset

# Instantiate the custom model for token classification
model = ModifiedBertForTokenClassification(config)

In [None]:
from tqdm import tqdm

def train(model, optimizer, train_dataloader, val_dataloader, epochs=5, device="cpu"):
    model.to(device)
    best_f1_score = 0
    best_model = None

    for epoch in range(epochs):
        model.train()  # Set the model to training mode
        total_train_loss = 0

        for batch in train_dataloader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            optimizer.zero_grad()
            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs[0] if isinstance(outputs, tuple) else outputs.loss
            loss.backward()
            optimizer.step()

            total_train_loss += loss.item()

        average_train_loss = total_train_loss / len(train_dataloader)

        # Validation loop
        model.eval()  # Set the model to evaluation mode
        total_val_loss = 0
        with torch.no_grad():
            for batch in val_dataloader:
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                labels = batch['labels'].to(device)

                outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
                loss = outputs[0] if isinstance(outputs, tuple) else outputs.loss
                total_val_loss += loss.item()

        average_val_loss = total_val_loss / len(val_dataloader)
        print(f"Epoch {epoch}: Average training loss: {average_train_loss}, Average validation loss: {average_val_loss}")
        # Your evaluatemodel function here should return the F1
        f1_score_val = evaluatemodel(model, val_dataloader, device, label_to_id)
        print("Validation F1 Score:", f1_score_val)



    return best_model

In [None]:
from sklearn.metrics import f1_score
from tqdm import tqdm
import numpy as np
import torch

from sklearn.metrics import f1_score
from tqdm import tqdm
import torch
import numpy as np

def evaluatemodel(model, dataloader, device, label_to_id):
    model.eval()
    predictions = []
    true_labels = []

    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Evaluating"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs[0] if isinstance(outputs, tuple) else outputs.logits
            logits = logits.detach().cpu().numpy()
            label_ids = labels.detach().cpu().numpy()

            # Generate predictions and true label list, removing ignored index (-100)
            batch_predictions = np.argmax(logits, axis=2)
            for i, label in enumerate(label_ids):
                batch_labels = []
                batch_preds = []
                for j, label_id in enumerate(label):
                    if label_id != -100:  # Only consider labels that are not ignored
                        batch_labels.append(label_id)
                        batch_preds.append(batch_predictions[i][j])
                true_labels.append(batch_labels)
                predictions.append(batch_preds)

    # Flatten the lists and remove padding (-100) labels
    flat_predictions = [p for sublist in predictions for p in sublist]
    flat_true_labels = [l for sublist in true_labels for l in sublist]

    # Calculate F1 score using sklearn's utility
    f1 = f1_score(flat_true_labels, flat_predictions, average='weighted')

    return f1


In [None]:
dataset_train= preprocess_data(PATH_TRAIN, urdu_tokenizer, label_to_id, max_length=128)
dataset_val = preprocess_data(PATH_VAL, urdu_tokenizer,label_to_id, max_length=128)
dataset_test = preprocess_data(PATH_TEST, urdu_tokenizer,label_to_id, max_length=128)

In [None]:
dataset_train

<__main__.UrduNERDataset at 0x7cff90112ad0>

In [None]:
from torch.utils.data import DataLoader

# Assume dataset_train, dataset_val, and dataset_test are instances of UrduNERDataset
train_dataloader = DataLoader(dataset_train, batch_size=32, shuffle=True)
val_dataloader = DataLoader(dataset_val, batch_size=32, shuffle=False)
test_dataloader = DataLoader(dataset_test, batch_size=32, shuffle=False)

In [None]:
train_tensor_dataset = TensorDataset(train_tensors_input_ids, train_tensors_input_mask, train_tensors_label_ids)
val_tensor_dataset = TensorDataset(val_tensors_input_ids, val_tensors_input_mask, val_tensors_label_ids)
test_tensor_dataset = TensorDataset(test_tensors_input_ids, test_tensors_input_mask, test_tensors_label_ids)


In [None]:
train_dataloader = DataLoader(train_tensor_dataset, batch_size=1)
val_dataloader = DataLoader(val_tensor_dataset, batch_size=1)
test_dataloader = DataLoader(test_tensor_dataset, batch_size=1)

In [None]:
from transformers import AdamW
FULL_FINETUNE = True
# Replace 'urdu_ner_model' with the actual name of your model instance
param_optimizer = list(model.named_parameters())
no_decay = ['bias', 'LayerNorm.weight']  # 'gamma' and 'beta' are replaced with 'LayerNorm.weight'

optimizer_grouped_parameters = []

if FULL_FINETUNE:
    print('ALL FINETUNE')
    # If FULL_FINETUNE is True, we will fine-tune all the parameters
    optimizer_grouped_parameters = [
        {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
         'weight_decay': 0.01},  # Corrected 'weight_decay_rate' to 'weight_decay'
        {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
         'weight_decay': 0.0}
    ]
else:
    print('NO ALL FINETUNE')
    # If FULL_FINETUNE is False, we will only fine-tune the classifier layer parameters
    optimizer_grouped_parameters = [
        {'params': model.classifier.parameters(),
         'weight_decay': 0.01}  # Corrected 'weight_decay_rate' to 'weight_decay'
    ]

# Initialize the optimizer
optimizer = AdamW(optimizer_grouped_parameters, lr=2e-5)

ALL FINETUNE




In [None]:
trained_model = train(model, optimizer, train_dataloader, val_dataloader, epochs=5, device=device)

Epoch 0: Average training loss: 0.4511543810367584, Average validation loss: 0.5202686190605164


Evaluating: 100%|██████████| 1/1 [00:07<00:00,  7.63s/it]


Validation F1 Score: 0.8658008658008658
Epoch 1: Average training loss: 0.3854222893714905, Average validation loss: 0.4530276954174042


Evaluating: 100%|██████████| 1/1 [00:07<00:00,  7.65s/it]


Validation F1 Score: 0.8658008658008658
Epoch 2: Average training loss: 0.3312543034553528, Average validation loss: 0.4104750156402588


Evaluating: 100%|██████████| 1/1 [00:07<00:00,  7.73s/it]


Validation F1 Score: 0.8658008658008658
Epoch 3: Average training loss: 0.29968294501304626, Average validation loss: 0.3889240622520447


Evaluating: 100%|██████████| 1/1 [00:07<00:00,  7.79s/it]


Validation F1 Score: 0.8658008658008658
Epoch 4: Average training loss: 0.3086167275905609, Average validation loss: 0.3887269198894501


Evaluating: 100%|██████████| 1/1 [00:07<00:00,  7.86s/it]

Validation F1 Score: 0.8658008658008658





In [None]:
def predict_entities(sentence, model, tokenizer, label_to_id, id_to_label, device):
    model.eval()

    # Tokenize the sentence and map tokens to their IDs
    encoded_sentence = tokenizer.encode_plus(
        sentence,
        max_length=128,
        padding='max_length',
        truncation=True,
        return_tensors='pt'
    )

    input_ids = encoded_sentence['input_ids'].to(device)
    attention_mask = encoded_sentence['attention_mask'].to(device)

    with torch.no_grad():
        # Get model predictions
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs[0] if isinstance(outputs, tuple) else outputs.logits

        # Get the most likely prediction for each token
        predictions = torch.argmax(logits, dim=2)

    # Convert predictions to labels
    predicted_label_ids = predictions[0].tolist()  # Get the first batch
    # Remove the predictions for [CLS] and [SEP]
    predicted_label_ids = predicted_label_ids[1:len(sentence.split())+1]

    predicted_labels = [id_to_label[label_id] for label_id in predicted_label_ids]

    # Combine tokens and labels
    tokens = tokenizer.convert_ids_to_tokens(input_ids[0])
    tokens = tokens[1:len(sentence.split())+1]  # Remove [CLS] and [SEP]
    return list(zip(tokens, predicted_labels))

# Usage
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
sentence = "اسلام باد عالمی بینک خیبرپختونخوا کے قبائلی اضلاع میں عسکریت پسندی سے پیدا ہونے والے بحران سے متاثرہ خاندانوں کی جلد بحالی بچوں کی صحت کی بہتری اور شہری مراکز ترسیل میں معاونت کے لیے فنڈز فراہم کرے گااس منصوبے کے لیے عالمی بینک ملٹی ڈونر ٹرسٹ کے تحت ایک کروڑ 20 لاکھ ڈالر فراہم کرے گا جس کے تحت شہریوں کی سہولت کے مراکز قائم کیے جائیں گے جو پوری قبائلی بادی اور اس سے منسلک اضلاع کو خدمات فراہم کرے گاڈان اخبار کی رپورٹ کے مطابق یہ جولائی 2019 میں ایک کروڑ 50 لاکھ ڈالر کی منظوری کے بعد سے منصوبے کی تیسری فنانسنگ ہوگییہ بھی پڑھیں ئندہ سال تک پاکستان میں غربت میں اضافے کا امکان ہے عالمی بینکان سہولیات کے مراکز کے ذریعے منتخب خدمات فراہم کی جائیں گی جس میں وائٹل رجسٹریشن سروس وی ایس سول رجسٹریشن منیجمنٹ سسٹم سی ایم ایس اور نادرا ای سہولت شامل ہے"
entities = predict_entities(sentence, model,tokenizer, label_to_id, id_to_label, device)

# Print the tokens with their predicted entity labels
for token, label in entities:
    print(f"{token}: {label}")

اسلام: Other
با: Other
##د: Other
عالمی: Other
بین: Other
##ک: Other
خ: Other
##یب: Other
##ر: Other
##پ: Other
##خت: Other
##ون: Other
##خ: Other
##وا: Other
کے: Other
ق: Other
##با: Other
##ئل: Other
##ی: Other
ا: Other
##ضل: Other
##اع: Other
میں: Other
ع: Other
##س: Other
##کری: Other
##ت: Other
پس: Other
##ندی: Other
سے: Other
پیدا: Other
ہونے: Other
والے: Other
ب: Other
##حر: Other
##ان: Other
سے: Other
م: Other
##تا: Other
##ثر: Other
##ہ: Other
خاندان: Other
##وں: Other
کی: Other
جلد: Other
ب: Other
##حال: Other
##ی: Other
ب: Other
##چ: Other
##وں: Other
کی: Other
ص: Other
##حت: Other
کی: Other
بہت: Other
##ری: Other
اور: Other
شہر: Other
##ی: Other
م: Other
##را: Other
##کز: Other
تر: Other
##سی: Other
##ل: Other
میں: Other
مع: Other
##اون: Other
##ت: Other
کے: Other
لیے: Other
فن: Other
##ڈ: Other
##ز: Other
ف: Other
##را: Other
##ہم: Other
کر: Other
##ے: Other
گا: Other
##اس: Other
من: Other
##ص: Other
##وب: Other
##ے: Other
کے: Other
لیے: Other
عالمی: Other
بین: Other
##ک: 