In [None]:
!pip install transformers seqeval[gpu] pandas numpy sklearn mlflow

# Import

In [86]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertForTokenClassification, PreTrainedTokenizer, PreTrainedModel
import mlflow
from seqeval.metrics import precision_score, recall_score
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report

In [87]:
from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'
print(device)

cuda


# Load the dataset

In [88]:
data_gpt = pd.read_csv("./data/chatgpt_data_generated.csv")
data_gpt.head()

Unnamed: 0.1,Unnamed: 0,sentence,label
0,0.0,سلام دنبال یه دکتر فوق تخصص ریه داخل بندر انزل...,O O O O B-srt I-srt B-spy O B-cty I-cty O O B-...
1,1.0,یه متخصص کلیه در ارومیه میخوام که تو درمان نار...,O B-srt B-spy O B-cty O O O O B-cnd I-cnd B-tr...
2,2.0,لطفاً یه دکتر متخصص غدد توی بروجرد برام پیدا ک...,O O O B-srt B-spy O B-cty O O O O O O B-trt O ...
3,3.0,یه دکتر مغز و اعصاب توی ساری میخوام که در درما...,O O B-spy O B-spy O B-cty O O O O B-cnd O O O ...
4,4.0,توی زنجان دنبال یک متخصص ارتوپدی می گردم که تج...,O B-cty O O B-srt B-spy O O O B-trt I-trt O O ...


In [89]:
data_form = pd.read_csv("./data/form_data.csv", index_col=0)
data_form.head()

Unnamed: 0,sentence,label
0,سلام یک دکتر فوق تخصص زنان خانوم تو اصفهان سمت...,"O,O,O,B-srt,I-srt,B-spy,B-gnd,O,B-cty,O,B-nhd,..."
1,سلام . یک متخصص مو داخل قم سمت پارک ملی برام پ...,"O,O,O,B-srt,B-spy,O,B-cty,O,B-nhd,I-nhd,O,O,O,..."
2,سلام . یک تخصص پوست و مو مرد درون قم سمت پارک ...,"O,O,O,B-srt,B-spy,O,B-spy,B-gnd,O,B-cty,O,B-nh..."
3,سلام ، یک متخصص تغذیه شهر علویجه معرفی کن با ت...,"O,O,O,B-srt,B-spy,O,B-cty,O,O,O,B-inc,I-inc,O,..."
4,سلام ، یک متخصص تغذیه مرد شهر گراش معرفی کن بر...,"O,O,O,B-srt,B-spy,B-gnd,O,B-cty,O,O,O,O,B-cnd,..."


In [90]:
# Extract all unique BIO tags used in the dataset
unique_bio_tags = set(tag for bio_tag in data_gpt['label'] for tag in bio_tag.split(' '))

# Convert to a sorted list
unique_bio_tags_list = sorted(unique_bio_tags)

unique_bio_tags_list

['B-apt',
 'B-cnd',
 'B-cty',
 'B-nhd',
 'B-spy',
 'B-srt',
 'B-trt',
 'I-apt',
 'I-cnd',
 'I-cty',
 'I-nhd',
 'I-srt',
 'I-trt',
 'O']

In [91]:
# Extract all unique BIO tags used in the dataset
unique_bio_tags = set(tag for bio_tag in data_form['label'] for tag in bio_tag.split(','))

# Convert to a sorted list
unique_bio_tags_list = sorted(unique_bio_tags)

unique_bio_tags_list

['B-apt',
 'B-cnd',
 'B-cty',
 'B-gnd',
 'B-inc',
 'B-nhd',
 'B-spy',
 'B-srt',
 'B-trt',
 'B-vtp',
 'B-wtt',
 'I-apt',
 'I-cnd',
 'I-inc',
 'I-nhd',
 'I-spy',
 'I-srt',
 'I-trt',
 'I-vtp',
 'I-wtt',
 'O']

In [92]:
label2id = {
    'O': 0,
    'B-apt': 1,
    'I-apt': 2,
    'B-cty': 3,
    'I-cty': 4,
    'B-cnd': 5,
    'I-cnd': 6,
    'B-gnd': 7,
    'I-gnd': 8,
    'B-inc': 9,
    'I-inc': 10,
    'B-nhd': 11,
    'I-nhd': 12,
    'B-srt': 13,
    'I-srt': 14,
    'B-spy': 15,
    'I-spy': 16,
    'B-trt': 17,
    'I-trt': 18,
    'B-vtp': 19,
    'I-vtp': 20,
    'B-wtt': 21,
    'I-wtt': 22
}

id2label = {v: k for k, v in label2id.items()}

In [93]:
data_gpt['label'] = data_gpt['label'].apply(lambda x: x.replace(" ", ","))

In [94]:
data = pd.concat([data_form, data_gpt], ignore_index=True)

In [95]:
data = data[['sentence', 'label']]

In [96]:
data

Unnamed: 0,sentence,label
0,سلام یک دکتر فوق تخصص زنان خانوم تو اصفهان سمت...,"O,O,O,B-srt,I-srt,B-spy,B-gnd,O,B-cty,O,B-nhd,..."
1,سلام . یک متخصص مو داخل قم سمت پارک ملی برام پ...,"O,O,O,B-srt,B-spy,O,B-cty,O,B-nhd,I-nhd,O,O,O,..."
2,سلام . یک تخصص پوست و مو مرد درون قم سمت پارک ...,"O,O,O,B-srt,B-spy,O,B-spy,B-gnd,O,B-cty,O,B-nh..."
3,سلام ، یک متخصص تغذیه شهر علویجه معرفی کن با ت...,"O,O,O,B-srt,B-spy,O,B-cty,O,O,O,B-inc,I-inc,O,..."
4,سلام ، یک متخصص تغذیه مرد شهر گراش معرفی کن بر...,"O,O,O,B-srt,B-spy,B-gnd,O,B-cty,O,O,O,O,B-cnd,..."
...,...,...
107,توی سنندج دنبال یه فلوشیپ ریه میگردم که مهارت ...,"O,B-cty,O,O,B-srt,B-spy,O,O,B-trt,I-trt,O,O,B-..."
108,سلام ، یه متخصص اعصاب و روان تو شهرکرد می خوام...,"O,O,O,B-srt,B-spy,O,B-spy,O,B-cty,O,O,O,O,B-tr..."
109,یه دکتر فوق تخصص قلب و عروق تو بوشهر لازم دارم...,"O,O,B-srt,I-srt,B-spy,O,B-spy,O,B-cty,O,O,O,O,..."
110,یه دکتر فوق تخصص قلب و عروق تو بندر عباس لازم ...,"O,O,B-srt,I-srt,B-spy,O,B-spy,O,B-cty,I-cty,O,..."


In [141]:
data = pd.read_csv("./data/form_gpt_generate_with_dif_cities.csv", index_col=0)

In [142]:
data

Unnamed: 0,sentence,label
0,سلام . یک فوق تخصص پوست خانوم واسه هفته آینده ...,"O,O,O,B-srt,I-srt,B-spy,B-gnd,O,B-apt,I-apt,O,..."
1,سلام . یک فوق تخصص پوست مرد شیرازی برای بلفارو...,"O,O,O,B-srt,I-srt,B-spy,B-gnd,B-cty,O,B-cnd,O,..."
2,سلام یک متخصص جراح ترجیحا آقا توی شیراز بلوار ...,"O,O,B-srt,O,O,B-gnd,O,B-cty,B-nhd,I-nhd,I-nhd,..."
3,سلام یک متخصص گوش و حلق و بینی برای اختلالات گ...,"O,O,B-srt,B-spy,O,B-spy,O,B-spy,O,B-cnd,I-cnd,..."
4,سلام من یک دندون پزشک فوق تخصص برای کج بودن دن...,"O,O,O,B-spy,I-spy,B-srt,I-srt,O,B-cnd,I-cnd,I-..."
...,...,...
234,یک پزشک مغز و اعصاب باحوصله که تخصص درمان میگر...,"O,O,B-spy,O,B-spy,B-trt,O,B-srt,O,B-cnd,O,O,O,..."
235,سلام . یه روانپزشک خانوم توی تبریز ترجیحا مرکز...,"O,O,O,B-spy,B-gnd,O,B-cty,O,B-nhd,I-nhd,O,O,B-..."
236,سلام . یک روانپزشک مجرب و امن توی تهران می خوا...,"O,O,O,B-spy,B-trt,O,B-trt,O,B-cty,O,O,O,O,B-gn..."
237,سلام یک چشم پزشک خوب تو شیراز سمت ارم پیدا کن ...,"O,O,B-spy,I-spy,B-trt,O,B-cty,O,B-nhd,O,O,O,O,..."


In [143]:
cnt = {}
for i, d in data.iterrows():
    for u in d['label'].split(','):
        if u not in cnt:
            cnt[u] = 0
        cnt[u] += 1

In [144]:
cnt

{'O': 3787,
 'B-srt': 163,
 'I-srt': 64,
 'B-spy': 271,
 'B-gnd': 77,
 'B-apt': 146,
 'I-apt': 118,
 'B-cty': 225,
 'B-nhd': 65,
 'I-nhd': 49,
 'B-trt': 202,
 'I-trt': 116,
 'B-inc': 70,
 'B-cnd': 169,
 'I-cnd': 155,
 'B-wtt': 47,
 'I-wtt': 101,
 'I-inc': 46,
 'B-vtp': 57,
 'I-spy': 70,
 'I-vtp': 3,
 'I-cty': 42}

# Preprocess

In [145]:
MAX_LEN = 128
TRAIN_BATCH_SIZE = 4
VALID_BATCH_SIZE = 2
PRETRAINED_MODEL = 'HooshvareLab/bert-fa-base-uncased'
tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL)



## Define functions

In [146]:
def tokenize_and_preserve_labels(sentence: str, text_labels: str, tokenizer: PreTrainedTokenizer) -> tuple[list[str], list[str]]:
    tokenized_sentence = []
    labels = []
    sentence = sentence.strip()

    for word, label in zip(sentence.split(), text_labels.split(",")):
        tokenized_word = tokenizer.tokenize(word)
        n_subwords = len(tokenized_word)
        tokenized_sentence.extend(tokenized_word)
        labels.extend([label] * n_subwords)

    return tokenized_sentence, labels

In [147]:
class dataset(Dataset):
    def __init__(self, dataframe: pd.DataFrame, tokenizer: PreTrainedTokenizer, max_len: int) -> None:
        self.len = len(dataframe)
        self.data = dataframe
        self.tokenizer = tokenizer
        self.max_len = max_len
        
    def __getitem__(self, index :int):
        # step 1: tokenize (and adapt corresponding labels)
        sentence = self.data.sentence[index]  
        word_labels = self.data.label[index]  
        tokenized_sentence, labels = tokenize_and_preserve_labels(sentence, word_labels, self.tokenizer)
        
        # step 2: add special tokens (and corresponding labels)
        tokenized_sentence = ["[CLS]"] + tokenized_sentence + ["[SEP]"] # add special tokens
        labels.insert(0, "O") # add outside label for [CLS] token
        labels.insert(-1, "O") # add outside label for [SEP] token

        # step 3: truncating/padding
        maxlen = self.max_len

        if (len(tokenized_sentence) > maxlen):
          # truncate
          tokenized_sentence = tokenized_sentence[:maxlen]
          labels = labels[:maxlen]
        else:
          # pad
          tokenized_sentence = tokenized_sentence + ['[PAD]'for _ in range(maxlen - len(tokenized_sentence))]
          labels = labels + ["O" for _ in range(maxlen - len(labels))]

        # step 4: obtain the attention mask
        attn_mask = [1 if tok != '[PAD]' else 0 for tok in tokenized_sentence]
        
        # step 5: convert tokens to input ids
        ids = self.tokenizer.convert_tokens_to_ids(tokenized_sentence)

        label_ids = [label2id[label] for label in labels]
        
        return {
              'ids': torch.tensor(ids, dtype=torch.long),
              'mask': torch.tensor(attn_mask, dtype=torch.long),
              'targets': torch.tensor(label_ids, dtype=torch.long)
        } 
    
    def __len__(self):
        return self.len

## Split into Train, Test

In [148]:
train_size = 0.85
train_dataset = data.sample(frac=train_size, random_state=200)
test_dataset = data.drop(train_dataset.index).reset_index(drop=True)
train_dataset = train_dataset.reset_index(drop=True)

print("FULL Dataset: {}".format(data.shape))
print("TRAIN Dataset: {}".format(train_dataset.shape))
print("TEST Dataset: {}".format(test_dataset.shape))

training_set = dataset(train_dataset, tokenizer, MAX_LEN)
testing_set = dataset(test_dataset, tokenizer, MAX_LEN)

FULL Dataset: (239, 2)
TRAIN Dataset: (203, 2)
TEST Dataset: (36, 2)


In [149]:
testing_set[0]

{'ids': tensor([    2,  4285,  1012,  2829,  4692,  9084,  4903,  2999,  9921,  2831,
         98401, 47588,  1379, 10850,  3080,  3510,  2860,  2800,  3757, 44118,
          5921, 12139,  1379,  4197,  3878,  1379, 12017, 12139,  1012,  1379,
          3671,  5032,  4202,  4663, 12139,  1379,  3400,  3973,  3551,  3130,
          2861,  6624,  6878,  6041,  2015,  1012,     4,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,  

In [151]:
# print the first 30 tokens and corresponding labels
for i in range(36):
    for token, label in zip(tokenizer.convert_ids_to_tokens(testing_set[i]["ids"][:60]), testing_set[i]["targets"][:60]):
      print('{0:10}  {1}'.format(token, id2label[label.item()]))

[CLS]       O
سلام        O
.           O
یک          O
فوق         B-srt
تخصص        I-srt
پوست        B-spy
مرد         B-gnd
شیرازی      B-cty
برای        O
بلفار       B-cnd
##وپلاستی   B-cnd
و           O
لیزر        B-cnd
صورت        I-cnd
پیدا        O
کن          O
که          O
سمت         O
عفیف        B-nhd
اباد        I-nhd
باشه        O
و           O
خوش         B-trt
برخورد      I-trt
و           O
مهربان      B-trt
باشه        O
.           O
و           O
تحت         O
پوشش        O
بیمه        O
تجارت       B-inc
باشه        O
و           O
حدود        O
نیم         B-wtt
ساعت        I-wtt
بیشتر       O
تو          O
مطب         O
منتظر       O
نمون        O
##م         O
.           O
[SEP]       O
[PAD]       O
[PAD]       O
[PAD]       O
[PAD]       O
[PAD]       O
[PAD]       O
[PAD]       O
[PAD]       O
[PAD]       O
[PAD]       O
[PAD]       O
[PAD]       O
[PAD]       O
[CLS]       O
یک          O
متخصص       B-srt
خانم        B-gnd
طب          B-spy
سوزنی     

In [152]:
train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 0
                }

test_params = {'batch_size': VALID_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 0
                }

training_loader = DataLoader(training_set, **train_params)
testing_loader = DataLoader(testing_set, **test_params)

# Define and track models with mlflow

In [153]:
EPOCHS = 15
LEARNING_RATE = 1e-05
MAX_GRAD_NORM = 10
FREAZING_LAYERS = 6

In [154]:
mlflow.set_tracking_uri("mlflow")
mlflow.set_experiment("NER")

Traceback (most recent call last):
  File "/home/user/anaconda3/envs/nlu/lib/python3.12/site-packages/mlflow/store/tracking/file_store.py", line 317, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/user/anaconda3/envs/nlu/lib/python3.12/site-packages/mlflow/store/tracking/file_store.py", line 410, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/user/anaconda3/envs/nlu/lib/python3.12/site-packages/mlflow/store/tracking/file_store.py", line 1341, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/user/anaconda3/envs/nlu/lib/python3.12/site-packages/mlflow/store/tracking/file_store.py", line 1334, in _read_helper
    result = read_yaml(root, file_na

<Experiment: artifact_location='/home/user/Desktop/hammasir-project/mlflow/968568366775758109', creation_time=1724758511411, experiment_id='968568366775758109', last_update_time=1724758511411, lifecycle_stage='active', name='NER', tags={}>

## Writing Functions

In [155]:
def initialize_model(id2label: dict, label2id: dict, training_set: dataset, freeze_layers: int = 0) -> tuple[PreTrainedModel, torch.Tensor]:
    model = BertForTokenClassification.from_pretrained(
        PRETRAINED_MODEL,
        num_labels=len(id2label),
        id2label=id2label,
        label2id=label2id
    )
    
    # Freeze the first `freeze_layers` transformer layers
    for param in model.bert.embeddings.parameters():
        param.requires_grad = False

    for i in range(freeze_layers):
        for param in model.bert.encoder.layer[i].parameters():
            param.requires_grad = False

    model.to(device)
    ids = training_set[0]["ids"].unsqueeze(0)
    mask = training_set[0]["mask"].unsqueeze(0)
    targets = training_set[0]["targets"].unsqueeze(0)
    ids = ids.to(device)
    mask = mask.to(device)
    targets = targets.to(device)
    outputs = model(input_ids=ids, attention_mask=mask, labels=targets)
    initial_loss = outputs[0]
    return model, initial_loss

In [156]:
# Defining the training function on the 80% of the dataset for tuning the bert model
def train(optimizer: torch.optim.Adam, max_norm: int, training_loader: DataLoader, model: PreTrainedModel) -> tuple[PreTrainedModel, float, float]:
    tr_loss, tr_accuracy = 0, 0
    nb_tr_examples, nb_tr_steps = 0, 0
    tr_preds, tr_labels = [], []
    # put model in training mode
    model.train()
    
    for idx, batch in enumerate(training_loader):
        
        ids = batch['ids'].to(device, dtype = torch.long)
        mask = batch['mask'].to(device, dtype = torch.long)
        targets = batch['targets'].to(device, dtype = torch.long)

        outputs = model(input_ids=ids, attention_mask=mask, labels=targets)
        loss, tr_logits = outputs.loss, outputs.logits
        tr_loss += loss.item()

        nb_tr_steps += 1
        nb_tr_examples += targets.size(0)
        
        if idx % 100==0:
            loss_step = tr_loss/nb_tr_steps
            print(f"Training loss per 100 training steps: {loss_step}")
           
        # compute training accuracy
        flattened_targets = targets.view(-1) # shape (batch_size * seq_len,)
        active_logits = tr_logits.view(-1, model.num_labels) # shape (batch_size * seq_len, num_labels)
        flattened_predictions = torch.argmax(active_logits, axis=1) # shape (batch_size * seq_len,)
        # now, use mask to determine where we should compare predictions with targets (includes [CLS] and [SEP] token predictions)
        active_accuracy = mask.view(-1) == 1 # active accuracy is also of shape (batch_size * seq_len,)
        targets = torch.masked_select(flattened_targets, active_accuracy)
        predictions = torch.masked_select(flattened_predictions, active_accuracy)
        
        tr_preds.extend(predictions)
        tr_labels.extend(targets)
        
        tmp_tr_accuracy = accuracy_score(targets.cpu().numpy(), predictions.cpu().numpy())
        tr_accuracy += tmp_tr_accuracy
    
        # gradient clipping
        torch.nn.utils.clip_grad_norm_(
            parameters=model.parameters(), max_norm=max_norm
        )
        
        # backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    epoch_loss = tr_loss / nb_tr_steps
    tr_accuracy = tr_accuracy / nb_tr_steps
    return model, epoch_loss, tr_accuracy

In [157]:
def valid(model: PreTrainedModel, testing_loader: DataLoader, device: str, id2label: dict, label2id: dict) -> tuple[list[str], list[str], float, float]:
    # put model in evaluation mode
    model.eval()
    
    eval_loss, eval_accuracy = 0, 0
    nb_eval_examples, nb_eval_steps = 0, 0
    eval_preds, eval_labels = [], []
    
    with torch.no_grad():
        for idx, batch in enumerate(testing_loader):
            
            ids = batch['ids'].to(device, dtype = torch.long)
            mask = batch['mask'].to(device, dtype = torch.long)
            targets = batch['targets'].to(device, dtype = torch.long)
            
            outputs = model(input_ids=ids, attention_mask=mask, labels=targets)
            loss, eval_logits = outputs.loss, outputs.logits
            
            eval_loss += loss.item()

            nb_eval_steps += 1
            nb_eval_examples += targets.size(0)
        
            if idx % 100==0:
                loss_step = eval_loss/nb_eval_steps
                print(f"Validation loss per 100 evaluation steps: {loss_step}")
              
            # compute evaluation accuracy
            flattened_targets = targets.view(-1) # shape (batch_size * seq_len,)
            active_logits = eval_logits.view(-1, model.num_labels) # shape (batch_size * seq_len, num_labels)
            flattened_predictions = torch.argmax(active_logits, axis=1) # shape (batch_size * seq_len,)
            # now, use mask to determine where we should compare predictions with targets (includes [CLS] and [SEP] token predictions)
            active_accuracy = mask.view(-1) == 1 # active accuracy is also of shape (batch_size * seq_len,)
            targets = torch.masked_select(flattened_targets, active_accuracy)
            predictions = torch.masked_select(flattened_predictions, active_accuracy)
            
            eval_labels.extend(targets)
            eval_preds.extend(predictions)
            
            tmp_eval_accuracy = accuracy_score(targets.cpu().numpy(), predictions.cpu().numpy())
            eval_accuracy += tmp_eval_accuracy
    

    labels = [id2label[id.item()] for id in eval_labels]
    predictions = [id2label[id.item()] for id in eval_preds]

    
    eval_loss = eval_loss / nb_eval_steps
    eval_accuracy = eval_accuracy / nb_eval_steps

    return labels, predictions, eval_loss, eval_accuracy

In [158]:
def predict(sentence: str, model: PreTrainedModel, tokenizer: BertTokenizer, id2label: dict, device: str) -> str:
    inputs = tokenizer(sentence, padding='max_length', truncation=True, max_length=MAX_LEN, return_tensors="pt")

    # Move to GPU
    ids = inputs["input_ids"].to(device)
    mask = inputs["attention_mask"].to(device)

    # Forward pass
    outputs = model(ids, mask)
    logits = outputs[0]

    active_logits = logits.view(-1, model.num_labels)  # shape (batch_size * seq_len, num_labels)

    # Get top 5 predictions for each token
    top_predictions = torch.topk(active_logits, k=5, dim=1)

    top_indices = top_predictions.indices.cpu().numpy()  # shape (batch_size * seq_len, 5)
    top_scores = top_predictions.values.cpu().detach().numpy()  # shape (batch_size * seq_len, 5)

    tokens = tokenizer.convert_ids_to_tokens(ids.squeeze().tolist())

    wp_preds = []
    for i, token in enumerate(tokens):
        if token not in ['[CLS]', '[SEP]', '[PAD]']:
            preds = [(id2label[idx], score) for idx, score in zip(top_indices[i], top_scores[i])]
            wp_preds.append((token, preds))

    # Print word-level predictions
    print("Word-Level Predictions with Top 5 Labels:")
    for token, preds in wp_preds:
        print(f"Word: {token}")
        for label, score in preds:
            print(f"  Label: {label}, Score: {score:.4f}")
        print()  # New line for readability

    # Construct the final sentence with word and top label
    final_sentence = []
    for token, preds in wp_preds:
        best_label = preds[0][0]  # get the label with the highest score
        final_sentence.append(f"{token} ({best_label})")

    # Join tokens (removing "##" in wordpieces)
    final_str = " ".join(final_sentence).replace(" ##", "")

    return final_str

# Base Model

In [159]:
import mlflow
import torch
from sklearn.metrics import classification_report

# Start your MLflow run
mlflow.transformers.autolog(disable=True)
with mlflow.start_run():
    mlflow.log_params({
        'EPOCHS': EPOCHS,
        'LEARNING_RATE': LEARNING_RATE,
        'MAX_GRAD_NORM': MAX_GRAD_NORM,
        'FREAZING_LAYERS': FREAZING_LAYERS
    })
    model, initial_loss = initialize_model(label2id=label2id, id2label=id2label, training_set=training_set, freeze_layers=FREAZING_LAYERS)
    optimizer = torch.optim.Adam(params=model.parameters(), lr=LEARNING_RATE)
    
    for i in range(EPOCHS):
        model, epoch_loss, tr_accuracy = train(optimizer, MAX_GRAD_NORM, training_loader, model)
        if i == EPOCHS - 1:
            mlflow.log_metric(f'loss_epoch{i+1}', epoch_loss)
            mlflow.log_metric(f'accuracy_epoch{i+1}', tr_accuracy)
        print("tr_accuracy = ", tr_accuracy)    
        
    labels, predictions, eval_loss, eval_accuracy = valid(model, testing_loader, device, id2label, label2id)
    mlflow.log_metric('eval_loss', eval_loss)
    mlflow.log_metric('eval_accuracy', eval_accuracy)
    mlflow.pytorch.log_model(model, 'model')
    print("eval_accuracy = ", eval_accuracy)
    
    # Generate classification report
    report = classification_report(labels, predictions, output_dict=True)
    
    # Calculate the weighted average F1-score excluding 'O'
    exlude = ['O', 'macro avg', 'accuracy', 'weighted avg']
    total_support = sum(report[label]['support'] for label in report if label not in ['O', 'accuracy'])
    weighted_f1_score = sum(report[label]['f1-score'] * report[label]['support'] for label in report if label not in ['O', 'accuracy']) / total_support
    mlflow.log_metric('Slot avg F1-Score', weighted_f1_score)
    mlflow.log_metric('macro avg F1-Score', report['macro avg']['f1-score'])
    for label in report:
        if label == 'accuracy':
            break
        mlflow.log_metric(f"f1-score label {label}", report[label]['f1-score'])
    print(classification_report(labels, predictions))
    print(f"Slot F1-Score: {weighted_f1_score:.4f}")

Some weights of BertForTokenClassification were not initialized from the model checkpoint at HooshvareLab/bert-fa-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training loss per 100 training steps: 3.1570820808410645
tr_accuracy =  0.5821145150896112
Training loss per 100 training steps: 0.5306795239448547
tr_accuracy =  0.6598833505022362
Training loss per 100 training steps: 0.31264516711235046
tr_accuracy =  0.7291399895489423
Training loss per 100 training steps: 0.19879400730133057
tr_accuracy =  0.8209702204370659
Training loss per 100 training steps: 0.10510006546974182
tr_accuracy =  0.8777426253469406
Training loss per 100 training steps: 0.18138110637664795
tr_accuracy =  0.9084168591911046
Training loss per 100 training steps: 0.08566900342702866
tr_accuracy =  0.9262537809159572
Training loss per 100 training steps: 0.08404365181922913
tr_accuracy =  0.9433312377551794
Training loss per 100 training steps: 0.017064601182937622
tr_accuracy =  0.9513149697735143
Training loss per 100 training steps: 0.03262428939342499
tr_accuracy =  0.9580690411202726
Training loss per 100 training steps: 0.12495730817317963
tr_accuracy =  0.965206



eval_accuracy =  0.9579354590224156
              precision    recall  f1-score   support

       B-apt       1.00      0.95      0.98        22
       B-cnd       0.90      0.82      0.86        33
       B-cty       0.94      0.97      0.96        34
       B-gnd       1.00      1.00      1.00        12
       B-inc       0.91      0.83      0.87        12
       B-nhd       1.00      0.67      0.80         9
       B-spy       0.95      0.95      0.95        40
       B-srt       1.00      0.91      0.95        22
       B-trt       0.93      0.86      0.89        29
       B-vtp       0.80      0.67      0.73        12
       B-wtt       0.67      0.33      0.44         6
       I-apt       1.00      0.89      0.94        18
       I-cnd       0.89      0.71      0.79        24
       I-cty       0.75      1.00      0.86         3
       I-inc       0.89      0.89      0.89         9
       I-nhd       1.00      0.75      0.86         4
       I-spy       0.69      0.92      0.79  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [123]:
sentence = "سلام یک دکتر فوق تخصص ارتوپدی مذکر برای پوکی استخوان در خیابان امام خمینی گلپایگان بهم معرفی کن با ویزیت مجازی و دکتر خوش خلق و حرفه ای باشه و زمان معطلی کمتر از نیم ساعت باشه و اولین نوبت غیر حضوری در فردا "
str_rep = predict(sentence, model, tokenizer, id2label, device)
print(str_rep, sep="\n")

Word-Level Predictions with Top 5 Labels:
Word: سلام
  Label: O, Score: 10.0005
  Label: B-gnd, Score: 0.6013
  Label: B-trt, Score: 0.5326
  Label: B-vtp, Score: 0.4931
  Label: I-wtt, Score: -0.0640

Word: یک
  Label: O, Score: 9.7221
  Label: B-gnd, Score: 1.0095
  Label: B-srt, Score: 0.2981
  Label: I-wtt, Score: 0.2085
  Label: B-cnd, Score: -0.1997

Word: دکتر
  Label: O, Score: 8.8163
  Label: B-gnd, Score: 1.6750
  Label: B-srt, Score: 1.4052
  Label: I-wtt, Score: 0.3646
  Label: B-spy, Score: 0.1559

Word: فوق
  Label: B-srt, Score: 8.2282
  Label: B-gnd, Score: 0.9190
  Label: B-spy, Score: 0.6459
  Label: B-trt, Score: 0.6145
  Label: I-srt, Score: 0.4690

Word: تخصص
  Label: I-srt, Score: 8.0348
  Label: B-srt, Score: 1.9901
  Label: I-trt, Score: 1.6560
  Label: I-spy, Score: 1.4292
  Label: B-spy, Score: 0.8365

Word: ارتوپدی
  Label: B-spy, Score: 8.2431
  Label: B-cnd, Score: 2.9081
  Label: I-cnd, Score: 1.8165
  Label: I-spy, Score: 1.3391
  Label: B-srt, Score: 0.9

In [31]:
sentence = "من دنبال یک دکتر مرد هستم که توی پاسداران قم باشه و بتونه فشار خون بالا رو درمان کنه. میشه برای جمعه صبح برام وقت غیرحضوری بگیرین؟ تقریبا می تونم واسه یک ساعت منتظر بمونم. بیمه من سلامت ایرانیان هست"
str_rep = predict(sentence, model, tokenizer, id2label, device)
print(str_rep, sep="\n")

Word-Level Predictions with Top 5 Labels:
Word: من
  Label: O, Score: 9.0407
  Label: I-cnd, Score: 0.4223
  Label: B-vtp, Score: 0.3003
  Label: B-cty, Score: 0.1948
  Label: B-cnd, Score: 0.1611

Word: دنبال
  Label: O, Score: 9.3755
  Label: B-vtp, Score: 0.7642
  Label: B-nhd, Score: -0.0213
  Label: I-inc, Score: -0.0971
  Label: B-cnd, Score: -0.1955

Word: یک
  Label: O, Score: 9.5064
  Label: B-srt, Score: 0.3927
  Label: B-nhd, Score: 0.0370
  Label: B-wtt, Score: -0.0015
  Label: B-vtp, Score: -0.2009

Word: دکتر
  Label: O, Score: 8.4703
  Label: B-srt, Score: 1.9527
  Label: I-spy, Score: 0.5245
  Label: B-spy, Score: 0.4713
  Label: B-gnd, Score: 0.2545

Word: مرد
  Label: B-gnd, Score: 6.8434
  Label: B-spy, Score: 2.5402
  Label: B-srt, Score: 1.3349
  Label: B-trt, Score: 1.0155
  Label: I-nhd, Score: 0.9872

Word: هستم
  Label: O, Score: 9.1499
  Label: I-cnd, Score: 0.0171
  Label: B-vtp, Score: -0.0143
  Label: B-gnd, Score: -0.0316
  Label: B-cty, Score: -0.0706

Wo