In [None]:
!pip install mlflow

# Import

In [30]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertForTokenClassification, PreTrainedTokenizer, PreTrainedModel
import mlflow
from seqeval.metrics import precision_score, recall_score
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report
import torch.nn as nn
from transformers.modeling_outputs import TokenClassifierOutput

In [31]:
from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'
print(device)

cuda


In [32]:
PATH = "./data/form_gpt_generate_with_dif_cities.csv"
ANALYSIS = False
PRETRAINED_MODEL = 'HooshvareLab/bert-fa-base-uncased'

# Load the dataset

In [33]:
data = pd.read_csv(PATH, index_col=0)

In [34]:
data

Unnamed: 0,sentence,label
0.0,سلام . یک فوق تخصص پوست خانوم واسه هفته آینده ...,"O,O,O,B-srt,I-srt,B-spy,B-gnd,O,B-apt,I-apt,O,..."
1.0,سلام . یک فوق تخصص پوست مرد شیرازی برای بلفارو...,"O,O,O,B-srt,I-srt,B-spy,B-gnd,B-cty,O,B-cnd,O,..."
2.0,سلام یک متخصص جراح ترجیحا آقا توی شیراز بلوار ...,"O,O,B-srt,B-spy,O,B-gnd,O,B-cty,B-nhd,I-nhd,I-..."
3.0,سلام یک متخصص گوش و حلق و بینی برای اختلالات گ...,"O,O,B-srt,B-spy,O,B-spy,O,B-spy,O,B-cnd,I-cnd,..."
4.0,سلام من یک دندون پزشک فوق تخصص برای کج بودن دن...,"O,O,O,B-spy,I-spy,B-srt,I-srt,O,B-cnd,I-cnd,I-..."
...,...,...
,آیا می‌توانید یک متخصص عفونی برای همسرم در منط...,"O,O,O,B-srt,B-spy,O,O,O,O,B-cty,O,O,O,O,O,O,O,..."
,نیاز به یک پزشک مشهدی ارتوپد برای شوهرم دارم ک...,"O,O,O,O,B-cty,B-spy,O,O,O,O,O,O,O,B-vtp,I-vtp,..."
,آیا در یزد یک پزشک متخصص اطفال میشناسید که بتو...,"O,O,B-cty,O,O,B-srt,B-spy,O,O,O,O,O,B-cnd,I-cn..."
,برای درمان پوکی استخوان پدرم به دنبال یک متخصص...,"O,O,B-cnd,I-cnd,O,O,O,O,B-srt,B-spy,O,O,B-cty,..."


In [35]:
# Extract all unique BIO tags used in the dataset
unique_bio_tags = set(tag for bio_tag in data['label'] for tag in bio_tag.split(','))

# Convert to a sorted list
unique_bio_tags_list = sorted(unique_bio_tags)

unique_bio_tags_list

['B-apt',
 'B-cnd',
 'B-cty',
 'B-gnd',
 'B-inc',
 'B-nhd',
 'B-spy',
 'B-srt',
 'B-trt',
 'B-vtp',
 'B-wtt',
 'I-apt',
 'I-cnd',
 'I-cty',
 'I-inc',
 'I-nhd',
 'I-spy',
 'I-srt',
 'I-trt',
 'I-vtp',
 'I-wtt',
 'O']

In [36]:
# Define the substrings to be replaced and their replacement
to_replace = ['I-nhd', 'B-nhd', 'I-wtt', 'B-wtt']
replacement = 'O'

# Use the replace method with regex to replace the values in the column
data['label'] = data['label'].str.replace(r'\b(I-wtt|B-wtt)\b', 'O', regex=True)
data['label'] = data['label'].str.replace(r'\b(I-nhd)\b', 'I-loc', regex=True)
data['label'] = data['label'].str.replace(r'\b(B-nhd)\b', 'B-loc', regex=True)
data['label'] = data['label'].str.replace(r'\b(I-cty)\b', 'I-loc', regex=True)
data['label'] = data['label'].str.replace(r'\b(B-cty)\b', 'B-loc', regex=True)

In [37]:
# Extract all unique BIO tags used in the dataset
unique_bio_tags = set(tag for bio_tag in data['label'] for tag in bio_tag.split(','))

# Convert to a sorted list
unique_bio_tags_list = sorted(unique_bio_tags)

unique_bio_tags_list

['B-apt',
 'B-cnd',
 'B-gnd',
 'B-inc',
 'B-loc',
 'B-spy',
 'B-srt',
 'B-trt',
 'B-vtp',
 'I-apt',
 'I-cnd',
 'I-inc',
 'I-loc',
 'I-spy',
 'I-srt',
 'I-trt',
 'I-vtp',
 'O']

In [38]:
label2id = {
    'O': 0,
    'B-apt': 1,
    'I-apt': 2,
    'B-loc': 3,
    'I-loc': 4,
    'B-cnd': 5,
    'I-cnd': 6,
    'B-gnd': 7,
    'I-gnd': 8,
    'B-inc': 9,
    'I-inc': 10,
    'B-srt': 11,
    'I-srt': 12,
    'B-spy': 13,
    'I-spy': 14,
    'B-trt': 15,
    'I-trt': 16,
    'B-vtp': 17,
    'I-vtp': 18
}

id2label = {v: k for k, v in label2id.items()}

In [39]:
if ANALYSIS:
    datas = []
    for i, d in data.iterrows():
      sen = d['sentence'].split()
      lab = d['label'].split(',')
      print(len(sen), len(lab))
      for j in range(max(len(sen), len(lab))):
        print(sen[j], lab[j])
      print('---------------------')

# Preprocess

In [40]:
MAX_LEN = 256
TRAIN_BATCH_SIZE = 4
VALID_BATCH_SIZE = 2

In [41]:
tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL)



## Define functions

In [13]:
def tokenize_and_preserve_labels(sentence: str, text_labels: str, tokenizer: PreTrainedTokenizer) -> tuple[list[str], list[str]]:
    tokenized_sentence = []
    labels = []
    sentence = sentence.strip()

    for word, label in zip(sentence.split(), text_labels.split(",")):
        tokenized_word = tokenizer.tokenize(word)
        n_subwords = len(tokenized_word)
        tokenized_sentence.extend(tokenized_word)
        labels.extend([label] * n_subwords)

    return tokenized_sentence, labels

In [14]:
class dataset(Dataset):
    def __init__(self, dataframe: pd.DataFrame, tokenizer: PreTrainedTokenizer, max_len: int) -> None:
        self.len = len(dataframe)
        self.data = dataframe
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __getitem__(self, index :int):
        # step 1: tokenize (and adapt corresponding labels)
        sentence = self.data.sentence[index]
        word_labels = self.data.label[index]
        tokenized_sentence, labels = tokenize_and_preserve_labels(sentence, word_labels, self.tokenizer)

        # step 2: add special tokens (and corresponding labels)
        tokenized_sentence = ["[CLS]"] + tokenized_sentence + ["[SEP]"] # add special tokens
        labels.insert(0, "O") # add outside label for [CLS] token
        labels.insert(-1, "O") # add outside label for [SEP] token

        # step 3: truncating/padding
        maxlen = self.max_len

        if (len(tokenized_sentence) > maxlen):
          # truncate
          tokenized_sentence = tokenized_sentence[:maxlen]
          labels = labels[:maxlen]
        else:
          # pad
          tokenized_sentence = tokenized_sentence + ['[PAD]'for _ in range(maxlen - len(tokenized_sentence))]
          labels = labels + ["O" for _ in range(maxlen - len(labels))]

        # step 4: obtain the attention mask
        attn_mask = [1 if tok != '[PAD]' else 0 for tok in tokenized_sentence]

        # step 5: convert tokens to input ids
        ids = self.tokenizer.convert_tokens_to_ids(tokenized_sentence)

        label_ids = [label2id[label] for label in labels]

        return {
              'ids': torch.tensor(ids, dtype=torch.long),
              'mask': torch.tensor(attn_mask, dtype=torch.long),
              'targets': torch.tensor(label_ids, dtype=torch.long)
        }

    def __len__(self):
        return self.len

## Split into Train, Test

In [17]:
TRAIN_SIZE = 0.8
train_dataset = data.sample(frac=TRAIN_SIZE, random_state=15)
test_dataset = data.drop(train_dataset.index).reset_index(drop=True)
train_dataset = train_dataset.reset_index(drop=True)

print("FULL Dataset: {}".format(data.shape))
print("TRAIN Dataset: {}".format(train_dataset.shape))
print("TEST Dataset: {}".format(test_dataset.shape))

training_set = dataset(train_dataset, tokenizer, MAX_LEN)
testing_set = dataset(test_dataset, tokenizer, MAX_LEN)

FULL Dataset: (265, 2)
TRAIN Dataset: (199, 2)
TEST Dataset: (61, 2)


In [18]:
testing_set[0]

{'ids': tensor([    2,  4285,  1012,  2829,  4692,  9084,  4903,  2999,  9921,  2831,
         98401, 47588,  1379, 10850,  3080,  3510,  2860,  2800,  3757, 44118,
          5921, 12139,  1379,  4197,  3878,  1379, 12017, 12139,  1012,  1379,
          3671,  5032,  4202,  4663, 12139,  1379,  3400,  3973,  3551,  3130,
          2861,  6624,  6878,  6041,  2015,  1012,     4,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,  

In [19]:
if ANALYSIS:
    for i in range(test_dataset.shape[0]):
        for token, label in zip(tokenizer.convert_ids_to_tokens(testing_set[i]["ids"][:60]), testing_set[i]["targets"][:60]):
          print('{0:10}  {1}'.format(token, id2label[label.item()]))

In [20]:
train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 0
                }

test_params = {'batch_size': VALID_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 0
                }

training_loader = DataLoader(training_set, **train_params)
testing_loader = DataLoader(testing_set, **test_params)

# Define and track models with mlflow

In [51]:
EPOCHS = 15
LEARNING_RATE = 1e-05
MAX_GRAD_NORM = 10
FREAZING_LAYERS = 8
NUM_ADDITIONAL_LAYERS_TOP = 2
ACTIVATION = "GELU"

In [52]:
mlflow.set_tracking_uri("mlflow")
mlflow.set_experiment("NER")

Traceback (most recent call last):
  File "/home/user/anaconda3/envs/nlu/lib/python3.12/site-packages/mlflow/store/tracking/file_store.py", line 317, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/user/anaconda3/envs/nlu/lib/python3.12/site-packages/mlflow/store/tracking/file_store.py", line 410, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/user/anaconda3/envs/nlu/lib/python3.12/site-packages/mlflow/store/tracking/file_store.py", line 1341, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/user/anaconda3/envs/nlu/lib/python3.12/site-packages/mlflow/store/tracking/file_store.py", line 1334, in _read_helper
    result = read_yaml(root, file_na

<Experiment: artifact_location='/home/user/Desktop/hammasir-project/mlflow/968568366775758109', creation_time=1724758511411, experiment_id='968568366775758109', last_update_time=1724758511411, lifecycle_stage='active', name='NER', tags={}>

## Writing Functions

In [53]:
class CustomBertForTokenClassification(nn.Module):
    def __init__(self, pretrained_model, num_labels, id2label, label2id, num_additional_layers=1, hidden_dim=768):
        super(CustomBertForTokenClassification, self).__init__()
        self.bert = BertForTokenClassification.from_pretrained(
            pretrained_model,
            num_labels=num_labels,
            id2label=id2label,
            label2id=label2id
        )

        self.num_labels=num_labels

        # Additional layers
        self.additional_layers = nn.ModuleList([
            nn.Linear(hidden_dim, hidden_dim) for _ in range(num_additional_layers)
        ])

        # Activation function
        self.activation = nn.GELU()

        # Final classification layer
        self.classifier = nn.Linear(hidden_dim, num_labels)

    def forward(self, input_ids, attention_mask=None, token_type_ids=None, labels=None):
        outputs = self.bert.bert(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids
        )

        sequence_output = outputs[0]  # [batch_size, seq_length, hidden_size]

        # Pass through additional layers
        for layer in self.additional_layers:
            sequence_output = self.activation(layer(sequence_output))

        # Pass through the final classification layer
        logits = self.classifier(sequence_output)

        loss = None
        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            active_loss = attention_mask.view(-1) == 1
            active_logits = logits.view(-1, logits.size(-1))
            active_labels = torch.where(
                active_loss, labels.view(-1), torch.tensor(loss_fct.ignore_index).type_as(labels)
            )
            loss = loss_fct(active_logits, active_labels)

        return TokenClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )

In [54]:
def initialize_model(id2label: dict, label2id: dict, training_set, freeze_layers: int = 0, num_additional_layers: int = 1) -> tuple[nn.Module, torch.Tensor]:
    model = CustomBertForTokenClassification(
        pretrained_model=PRETRAINED_MODEL,
        num_labels=len(id2label),
        id2label=id2label,
        label2id=label2id,
        num_additional_layers=num_additional_layers
    )

    # Freeze the first `freeze_layers` transformer layers
    for param in model.bert.bert.embeddings.parameters():
        param.requires_grad = False

    for i in range(freeze_layers):
        for param in model.bert.bert.encoder.layer[i].parameters():
            param.requires_grad = False

    model.to(device)
    ids = training_set[0]["ids"].unsqueeze(0)
    mask = training_set[0]["mask"].unsqueeze(0)
    targets = training_set[0]["targets"].unsqueeze(0)
    ids = ids.to(device)
    mask = mask.to(device)
    targets = targets.to(device)
    outputs = model(input_ids=ids, attention_mask=mask, labels=targets)
    initial_loss = outputs[0]
    return model, initial_loss

In [55]:
# Defining the training function on the 80% of the dataset for tuning the bert model
def train(optimizer: torch.optim.Adam, max_norm: int, training_loader: DataLoader, model: PreTrainedModel) -> tuple[PreTrainedModel, float, float]:
    tr_loss, tr_accuracy = 0, 0
    nb_tr_examples, nb_tr_steps = 0, 0
    tr_preds, tr_labels = [], []
    # put model in training mode
    model.train()

    for idx, batch in enumerate(training_loader):

        ids = batch['ids'].to(device, dtype = torch.long)
        mask = batch['mask'].to(device, dtype = torch.long)
        targets = batch['targets'].to(device, dtype = torch.long)

        outputs = model(input_ids=ids, attention_mask=mask, labels=targets)
        loss, tr_logits = outputs.loss, outputs.logits
        tr_loss += loss.item()

        nb_tr_steps += 1
        nb_tr_examples += targets.size(0)

        if idx % 100==0:
            loss_step = tr_loss/nb_tr_steps
            print(f"Training loss per 100 training steps: {loss_step}")

        # compute training accuracy
        flattened_targets = targets.view(-1) # shape (batch_size * seq_len,)
        active_logits = tr_logits.view(-1, model.num_labels) # shape (batch_size * seq_len, num_labels)
        flattened_predictions = torch.argmax(active_logits, axis=1) # shape (batch_size * seq_len,)
        # now, use mask to determine where we should compare predictions with targets (includes [CLS] and [SEP] token predictions)
        active_accuracy = mask.view(-1) == 1 # active accuracy is also of shape (batch_size * seq_len,)
        targets = torch.masked_select(flattened_targets, active_accuracy)
        predictions = torch.masked_select(flattened_predictions, active_accuracy)

        tr_preds.extend(predictions)
        tr_labels.extend(targets)

        tmp_tr_accuracy = accuracy_score(targets.cpu().numpy(), predictions.cpu().numpy())
        tr_accuracy += tmp_tr_accuracy

        # gradient clipping
        torch.nn.utils.clip_grad_norm_(
            parameters=model.parameters(), max_norm=max_norm
        )

        # backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    epoch_loss = tr_loss / nb_tr_steps
    tr_accuracy = tr_accuracy / nb_tr_steps
    return model, epoch_loss, tr_accuracy

In [56]:
def valid(model: PreTrainedModel, testing_loader: DataLoader, device: str, id2label: dict, label2id: dict) -> tuple[list[str], list[str], float, float]:
    # put model in evaluation mode
    model.eval()

    eval_loss, eval_accuracy = 0, 0
    nb_eval_examples, nb_eval_steps = 0, 0
    eval_preds, eval_labels = [], []

    with torch.no_grad():
        for idx, batch in enumerate(testing_loader):

            ids = batch['ids'].to(device, dtype = torch.long)
            mask = batch['mask'].to(device, dtype = torch.long)
            targets = batch['targets'].to(device, dtype = torch.long)

            outputs = model(input_ids=ids, attention_mask=mask, labels=targets)
            loss, eval_logits = outputs.loss, outputs.logits

            eval_loss += loss.item()

            nb_eval_steps += 1
            nb_eval_examples += targets.size(0)

            if idx % 100==0:
                loss_step = eval_loss/nb_eval_steps
                print(f"Validation loss per 100 evaluation steps: {loss_step}")

            # compute evaluation accuracy
            flattened_targets = targets.view(-1) # shape (batch_size * seq_len,)
            active_logits = eval_logits.view(-1, model.num_labels) # shape (batch_size * seq_len, num_labels)
            flattened_predictions = torch.argmax(active_logits, axis=1) # shape (batch_size * seq_len,)
            # now, use mask to determine where we should compare predictions with targets (includes [CLS] and [SEP] token predictions)
            active_accuracy = mask.view(-1) == 1 # active accuracy is also of shape (batch_size * seq_len,)
            targets = torch.masked_select(flattened_targets, active_accuracy)
            predictions = torch.masked_select(flattened_predictions, active_accuracy)

            eval_labels.extend(targets)
            eval_preds.extend(predictions)

            tmp_eval_accuracy = accuracy_score(targets.cpu().numpy(), predictions.cpu().numpy())
            eval_accuracy += tmp_eval_accuracy


    labels = [id2label[id.item()] for id in eval_labels]
    predictions = [id2label[id.item()] for id in eval_preds]


    eval_loss = eval_loss / nb_eval_steps
    eval_accuracy = eval_accuracy / nb_eval_steps

    return labels, predictions, eval_loss, eval_accuracy

In [57]:
def predict(sentence: str, model: PreTrainedModel, tokenizer: BertTokenizer, id2label: dict, device: str, details : bool = False) -> str:
    inputs = tokenizer(sentence, padding='max_length', truncation=True, max_length=MAX_LEN, return_tensors="pt")

    # Move to GPU
    ids = inputs["input_ids"].to(device)
    mask = inputs["attention_mask"].to(device)

    # Forward pass
    outputs = model(ids, mask)
    logits = outputs[0]

    active_logits = logits.view(-1, model.num_labels)  # shape (batch_size * seq_len, num_labels)

    # Get top 5 predictions for each token
    top_predictions = torch.topk(active_logits, k=5, dim=1)

    top_indices = top_predictions.indices.cpu().numpy()  # shape (batch_size * seq_len, 5)
    top_scores = top_predictions.values.cpu().detach().numpy()  # shape (batch_size * seq_len, 5)

    tokens = tokenizer.convert_ids_to_tokens(ids.squeeze().tolist())

    wp_preds = []
    for i, token in enumerate(tokens):
        if token not in ['[CLS]', '[SEP]', '[PAD]']:
            preds = [(id2label[idx], score) for idx, score in zip(top_indices[i], top_scores[i])]
            wp_preds.append((token, preds))

    if details:
    # Print word-level predictions
      print("Word-Level Predictions with Top 5 Labels:")
      for token, preds in wp_preds:
          print(f"Word: {token}")
          for label, score in preds:
              print(f"  Label: {label}, Score: {score:.4f}")
          print()  # New line for readability

    # Construct the final sentence with word and top label
    final_sentence = []
    for token, preds in wp_preds:
        best_label = preds[0][0]  # get the label with the highest score
        final_sentence.append(f"{token} ({best_label})")

    # Join tokens (removing "##" in wordpieces)
    final_str = " ".join(final_sentence).replace(" ##", "")

    return final_str

# Base Model

In [58]:
import mlflow
import torch
from sklearn.metrics import classification_report

# Start your MLflow run
mlflow.transformers.autolog(disable=True)
with mlflow.start_run():
    mlflow.log_params({
        'EPOCHS': EPOCHS,
        'LEARNING_RATE': LEARNING_RATE,
        'MAX_GRAD_NORM': MAX_GRAD_NORM,
        'FREAZING_LAYERS': FREAZING_LAYERS,
        'NUM_ADDITIONAL_LAYERS_TOP': NUM_ADDITIONAL_LAYERS_TOP,
        'ACTIVATION': ACTIVATION
    })
    model, initial_loss = initialize_model(label2id=label2id, id2label=id2label, training_set=training_set,
                                           freeze_layers=FREAZING_LAYERS, num_additional_layers=NUM_ADDITIONAL_LAYERS_TOP)
    
    optimizer = torch.optim.Adam(params=model.parameters(), lr=LEARNING_RATE)

    for i in range(EPOCHS):
        model, epoch_loss, tr_accuracy = train(optimizer, MAX_GRAD_NORM, training_loader, model)
        if i == EPOCHS - 1:
            mlflow.log_metric(f'loss_epoch{i+1}', epoch_loss)
            mlflow.log_metric(f'accuracy_epoch{i+1}', tr_accuracy)
        print("tr_accuracy = ", tr_accuracy)
        labels, predictions, eval_loss, eval_accuracy = valid(model, testing_loader, device, id2label, label2id)
        print("eval_accuracy = ", eval_accuracy)

    labels, predictions, eval_loss, eval_accuracy = valid(model, testing_loader, device, id2label, label2id)
    mlflow.log_metric('eval_loss', eval_loss)
    mlflow.log_metric('eval_accuracy', eval_accuracy)
    mlflow.pytorch.log_model(model, 'model')
    print("eval_accuracy = ", eval_accuracy)

    # Generate classification report
    report = classification_report(labels, predictions, output_dict=True)

    # Calculate the weighted average F1-score excluding 'O'
    exlude = ['O', 'macro avg', 'accuracy', 'weighted avg']
    total_support = sum(report[label]['support'] for label in report if label not in ['O', 'accuracy'])
    weighted_f1_score = sum(report[label]['f1-score'] * report[label]['support'] for label in report if label not in ['O', 'accuracy']) / total_support
    mlflow.log_metric('Slot avg F1-Score', weighted_f1_score)
    mlflow.log_metric('macro avg F1-Score', report['macro avg']['f1-score'])
    for label in report:
        if label == 'accuracy':
            break
        mlflow.log_metric(f"f1-score label {label}", report[label]['f1-score'])
    print(classification_report(labels, predictions))
    print(f"Slot F1-Score: {weighted_f1_score:.4f}")

Some weights of BertForTokenClassification were not initialized from the model checkpoint at HooshvareLab/bert-fa-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Training loss per 100 training steps: 2.9542906284332275
tr_accuracy =  0.6131057078598996
Validation loss per 100 evaluation steps: 1.6958163976669312
eval_accuracy =  0.6705833037115977
Training loss per 100 training steps: 1.7279322147369385
tr_accuracy =  0.6775732326932331
Validation loss per 100 evaluation steps: 1.0639467239379883
eval_accuracy =  0.670008685181426
Training loss per 100 training steps: 1.3504359722137451
tr_accuracy =  0.7045153594843753
Validation loss per 100 evaluation steps: 0.807121217250824
eval_accuracy =  0.7447594937754936
Training loss per 100 training steps: 0.8285091519355774
tr_accuracy =  0.763402140261997
Validation loss per 100 evaluation steps: 0.861348569393158
eval_accuracy =  0.796707273203281
Training loss per 100 training steps: 0.7084516882896423
tr_accuracy =  0.8165329308056415
Validation loss per 100 evaluation steps: 0.398968905210495
eval_accuracy =  0.8532711634082776
Training loss per 100 training steps: 0.6019333600997925
tr_accura



eval_accuracy =  0.9710348957328973
              precision    recall  f1-score   support

       B-apt       0.94      0.94      0.94        36
       B-cnd       0.85      0.85      0.85        41
       B-gnd       1.00      1.00      1.00        19
       B-inc       1.00      1.00      1.00        18
       B-loc       0.92      0.94      0.93        78
       B-spy       0.97      0.99      0.98        73
       B-srt       1.00      0.98      0.99        42
       B-trt       0.90      0.96      0.93        54
       B-vtp       1.00      1.00      1.00        12
       I-apt       1.00      0.89      0.94        27
       I-cnd       0.88      0.64      0.74        33
       I-inc       0.83      1.00      0.91        10
       I-loc       0.91      0.83      0.87        24
       I-spy       0.90      0.95      0.93        20
       I-srt       1.00      1.00      1.00        16
       I-trt       0.94      0.97      0.95        31
       I-vtp       1.00      1.00      1.00  

In [29]:
sentence = "میتونی برام یک دکتر آقای مامایی برای زایمان طبیعی زنم در خیام مشهد معرفی کنی که قیمت ویزیت مناسب داشته باشه و رازدار باشه و کمتر از ۳۵ دقیقه معطل بشم ؟ برای بعد ۲۰ شهریور و بیمه درمان تکمیلی هستم ."
str_rep = predict(sentence, model, tokenizer, id2label, device, True)
print(str_rep, sep="\n")

Word-Level Predictions with Top 5 Labels:
Word: میتونی
  Label: O, Score: 12.4839
  Label: B-cnd, Score: 3.0922
  Label: B-loc, Score: 2.4632
  Label: B-trt, Score: 1.0438
  Label: I-apt, Score: 0.8055

Word: برام
  Label: O, Score: 12.7292
  Label: B-cnd, Score: 3.1873
  Label: B-loc, Score: 2.6843
  Label: B-trt, Score: 1.0688
  Label: I-apt, Score: 0.8031

Word: یک
  Label: O, Score: 12.6431
  Label: B-cnd, Score: 3.0606
  Label: B-loc, Score: 2.5046
  Label: B-trt, Score: 1.0578
  Label: I-apt, Score: 0.8356

Word: دکتر
  Label: O, Score: 12.4559
  Label: B-cnd, Score: 3.2479
  Label: B-loc, Score: 2.5353
  Label: B-trt, Score: 1.1074
  Label: B-srt, Score: 0.5578

Word: اقای
  Label: B-gnd, Score: 6.3486
  Label: I-trt, Score: 3.5339
  Label: B-vtp, Score: 3.4481
  Label: B-trt, Score: 2.8411
  Label: I-srt, Score: 2.6453

Word: مامایی
  Label: B-spy, Score: 8.0290
  Label: B-srt, Score: 3.8382
  Label: B-cnd, Score: 3.2573
  Label: I-cnd, Score: 2.9115
  Label: I-spy, Score: 2.38

In [None]:
sentence = "برای گوش درد پسرم به یک متخصص شنوایی سنجی اصفهانی هستم که ویزیت غیر حضوری داشته باشه و اولین نوبت آن در ۲۵ مرداد باشد و بیمه دی هستم ."
str_rep = predict(sentence, model, tokenizer, id2label, device, True)
print(str_rep, sep="\n")

In [None]:
if ANALYSIS:
    for i, d in test_dataset.iterrows():
        str_rep = predict(d['sentence'], model, tokenizer, id2label, device)
        print(str_rep, sep="\n")
        print(d['label'])