In [3]:
import json
import re

# 将数据转为  BIO 标注形式
def dimension_label(path, save_path, labels_path=None):
    label_dict = ['O']
    with open(save_path, "w", encoding="utf-8") as w:
        #写入模式
        with open(path, "r", encoding="utf-8") as r:
            for line in r:
                # print(line)
                line = json.loads(line)
                text = line['text']
                label = line['label']
                text_label = ['O'] * len(text)
                for label_key in label:  # 遍历实体标签
                    B_label = "B-" + label_key
                    I_label = "I-" + label_key
                    if B_label not in label_dict:
                        label_dict.append(B_label)
                    if I_label not in label_dict:
                        label_dict.append(I_label)
                    label_item = label[label_key]
                    for entity in label_item:  # 遍历实体
                        position = label_item[entity]
                        '''
                        start = position[0][0]
                        end = position[0][1]
                        print(f"实体: {entity}, 起始位置: {start}, 结束位置: {end}, 文本: {text}, 文本标签长度: {len(text_label)}")
                        # 检查 start 和 end 是否在 text_label 的范围内
                        if start < 0 or end >= len(text_label):
                            print(f"错误：实体 {entity} 的索引超出范围。起始位置: {start}, 结束位置: {end}, 文本长度: {len(text_label)}")
                            continue
                        text_label[start] = B_label
                        for i in range(start + 1, end + 1):
                            text_label[i] = I_label
                        '''
                        start = position[0]
                        end = position[1]
                        text_label[start] = B_label
                        # print(start)
                        # print(end)
                        # print(len(text_label))
                        # print('-'*100)
                        for i in range(start + 1, end + 1):
                            text_label[i] = I_label
                line = {
                    "text": text,
                    "label": text_label
                }
                line = json.dumps(line, ensure_ascii=False)
                w.write(line + "\n")
                w.flush()

    if labels_path:  # 保存 label ，后续训练和预测时使用
        label_map = {}
        for i,label in enumerate(label_dict):
            label_map[label] = i
        with open(labels_path, "w", encoding="utf-8") as w:
            labels = json.dumps(label_map, ensure_ascii=False)
            w.write(labels + "\n")
            w.flush()
            


if __name__ == '__main__':
    '''
    path = "./data/NER/dev.json"
    save_path = "./data/NER/new/dev.json"
    dimension_label(path, save_path)

    path = "./data/NER/train.json"
    save_path = "./data/NER/new/train.json"
    dimension_label(path, save_path)'''
    

    data_path = "./data/NER/NERforRE.json"
    save_path = "./data/NER/new/NER_BIO.json"
    labels_path = "./data/NER/new/labels.json"
    
    
    with open(data_path,'r') as w:
        content = w.read()
    with open(data_path,'w') as w:
        content = re.sub("'",'"',content)
        w.write(content)
    
    dimension_label(data_path,save_path,labels_path)

In [4]:
from torch.utils.data import Dataset, DataLoader
import torch
import json


class NERDataset(Dataset):
    def __init__(self, tokenizer, file_path, labels_map, max_length=300):
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.labels_map = labels_map

        self.text_data = []
        self.label_data = []
        with open(file_path, "r", encoding="utf-8") as r:
            for line in r:
                line = json.loads(line)
                text = line['text']
                label = line['label']
                self.text_data.append(text)
                self.label_data.append(label)

    def __len__(self):
        return len(self.text_data)

    def __getitem__(self, idx):
        text = self.text_data[idx]
        labels = self.label_data[idx]

        # 使用分词器对句子进行处理
        inputs = self.tokenizer.encode_plus(
            text,
            None,
            add_special_tokens=True,
            padding='max_length',
            truncation=True,
            max_length=self.max_length,
            return_tensors='pt'
        )

        input_ids = inputs['input_ids'].squeeze()
        attention_mask = inputs['attention_mask'].squeeze()

        # 将标签转换为数字编码
        label_ids = [self.labels_map[l] for l in labels]

        if len(label_ids) > self.max_length:
            label_ids = label_ids[0:self.max_length]

        if len(label_ids) < self.max_length:
            # 标签填充到最大长度
            label_ids.extend([0] * (self.max_length - len(label_ids)))

        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'labels': torch.LongTensor(label_ids)
        }

In [5]:
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForTokenClassification
#from ner_datasets import NERDataset
from tqdm import tqdm
import json
import time, sys
import numpy as np
from sklearn.metrics import f1_score
from torch.utils.tensorboard import SummaryWriter
from sklearn.model_selection import train_test_split


writer0 = SummaryWriter(log_dir = './output/NER/output')

def extract_non_zero_sublists(lst):
    non_zero_sublists = []
    sublist = []

    for i in range(len(lst)):
        if lst[i] != 0:
            sublist.append(i)
        else:
            if sublist: 
                non_zero_sublists.append(sublist)
            sublist = []  
    if sublist:
        non_zero_sublists.append(sublist)

    return non_zero_sublists


def train(epoch, model, device, loader, optimizer, gradient_accumulation_steps):
    model.train()
    time1 = time.time()
    loss = 0
    for index, data in enumerate(tqdm(loader, file=sys.stdout, desc="Train Epoch: " + str(epoch))):
        input_ids = data['input_ids'].to(device)
        attention_mask = data['attention_mask'].to(device)
        labels = data['labels'].to(device)

        outputs = model(
            input_ids,
            attention_mask=attention_mask,
            labels=labels
        )
        loss = outputs.loss
        # 反向传播，计算当前梯度
        loss.backward()
        loss += loss.item()
        # 梯度累积步数
        if (index % gradient_accumulation_steps == 0 and index != 0) or index == len(loader) - 1:
            # 更新网络参数
            optimizer.step()
            # 清空过往梯度
            optimizer.zero_grad()

        # 100轮打印一次 loss
        
        if index % 100 == 0 or index == len(loader) - 1:
            time2 = time.time()
            tqdm.write(
                f"{index}, epoch: {epoch} -loss: {str(loss)} ; each step's time spent: {(str(float(time2 - time1) / float(index + 0.0001)))}")
    writer0.add_scalar('train_loss', loss.item()/len(loader), epoch)


def validate(model, device, loader,epoch):
    model.eval()
    acc = 0
    f1 = 0
    valid_loss = 0

    with torch.no_grad():
        for step, data in enumerate(tqdm(loader, file=sys.stdout, desc="Validation Data")):
            input_ids = data['input_ids'].to(device)
            attention_mask = data['attention_mask'].to(device)
            labels = data['labels'].to(device)

            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            valid_loss += loss.item()

            _, predicted_labels = torch.max(outputs.logits, dim=2)

            predicted_labels = predicted_labels.detach().cpu().numpy().tolist()
            true_labels = labels.detach().cpu().numpy().tolist()

            predicted_labels_flat = [label for sublist in predicted_labels for label in sublist]
            true_labels_flat = [label for sublist in true_labels for label in sublist]
            

            # accuracy = (np.array(predicted_labels_flat) == np.array(true_labels_flat)).mean()
            accuracy = 0
            true_index = extract_non_zero_sublists(true_labels_flat)
            
            if len(true_index)==0:
                acc+=1
                print('----passed one error----')
                continue

            for items in true_index:
                flag = 1
                for i in items:
                    if predicted_labels_flat[i] != true_labels_flat[i]:
                        flag = 0
                        break
                if flag == 1:
                    accuracy += 1
            accuracy /= len(true_index)

            # print("----predicted----")
            # print(predicted_labels_flat)
            # print("----true----")
            # print(true_labels_flat)
            # print("----acc----")
            # print(accuracy)
            acc += accuracy
            f1score = f1_score(true_labels_flat, predicted_labels_flat, average='macro')
            f1 += f1score

    valid_loss /= len(loader)
    acc /= len(loader)
    f1 /= len(loader)

    # 将指标写入TensorBoard
    writer0.add_scalar('valid_loss', valid_loss, epoch)
    writer0.add_scalar('acc', acc, epoch)
    writer0.add_scalar('f1', f1, epoch)

    return acc, f1


def main():
    labels_path = "./data/NER/new/labels.json"
    model_name = './model/roberta/'
    '''
    train_json_path = "./data/NER/new/train.json"
    val_json_path = "./data/NER/new/dev.json"
    '''
    data_json_path = './data/NER/new/NER_BIO.json'
    max_length = 300
    epochs = 100
    batch_size = 1
    lr = 2e-5
    gradient_accumulation_steps = 16
    model_output_dir = "./output/NER"
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # 加载label
    with open(labels_path, "r", encoding="utf-8") as r:
        labels_map = json.loads(r.read())

    # 加载分词器和模型
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForTokenClassification.from_pretrained(model_name, num_labels=len(labels_map))
    model.to(device)

    # 加载数据
    '''
    print("Start Load Train Data...")
    train_dataset = NERDataset(tokenizer, train_json_path, labels_map, max_length)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    print("Start Load Validation Data...")
    val_dataset = NERDataset(tokenizer, val_json_path, labels_map, max_length)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)'''

    data = NERDataset(tokenizer, data_json_path, labels_map, max_length)
    train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(data,batch_size = batch_size, shuffle=True)

    # 定义优化器和损失函数
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
    print("Start Training...")
    best_acc = 0.0
    for epoch in range(epochs):
        train(epoch, model, device, test_loader, optimizer, gradient_accumulation_steps)
        print("Start Validation...")
        acc, f1 = validate(model, device, test_loader,epoch)
        print(f"Validation : acc: {acc} , f1: {f1}")

        if best_acc < acc: # 保存准确率最高的模型
            print("Save Model To ", model_output_dir)
            model.save_pretrained(model_output_dir)
            tokenizer.save_pretrained(model_output_dir)
            best_acc = acc
        

if __name__ == '__main__':
    main()
    writer0.close()



2024-04-10 00:21:08.060034: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-04-10 00:21:08.878254: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Some weights of BertForTokenClassification were not initialized from the model checkpoint at ./model/roberta/ and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Start Training...
0, epoch: 0 -loss: tensor(7.7149, device='cuda:0', grad_fn=<AddBackward0>) ; each step's time spent: 13796.501159667969
100, epoch: 0 -loss: tensor(1.4496, device='cuda:0', grad_fn=<AddBackward0>) ; each step's time spent: 0.06796803830179812
200, epoch: 0 -loss: tensor(0.8947, device='cuda:0', grad_fn=<AddBackward0>) ; each step's time spent: 0.060187452740135934
242, epoch: 0 -loss: tensor(0.5422, device='cuda:0', grad_fn=<AddBackward0>) ; each step's time spent: 0.05911853116789221
Train Epoch: 0: 100%|██████████| 243/243 [00:14<00:00, 16.97it/s]
Start Validation...
Validation Data: 100%|██████████| 243/243 [00:03<00:00, 65.95it/s]
Validation : acc: 0.0 , f1: 0.15697252316965712
0, epoch: 1 -loss: tensor(0.7076, device='cuda:0', grad_fn=<AddBackward0>) ; each step's time spent: 514.9459838867188
100, epoch: 1 -loss: tensor(0.7236, device='cuda:0', grad_fn=<AddBackward0>) ; each step's time spent: 0.05288660959660573
200, epoch: 1 -loss: tensor(1.2118, device='cuda:

KeyboardInterrupt: 

In [7]:
from transformers import AutoTokenizer, AutoModelForTokenClassification
import torch
import json


# 解析实体
def post_processing(outputs, text, labels_map):
    _, predicted_labels = torch.max(outputs.logits, dim=2)

    predicted_labels = predicted_labels.detach().cpu().numpy()

    predicted_tags = [labels_map[label_id] for label_id in predicted_labels[0]]

    result = {}
    entity = ""
    type = ""
    for index, word_token in enumerate(text):
        tag = predicted_tags[index]
        if tag.startswith("B-"):
            type = tag.split("-")[1]
            if entity:
                if type not in result:
                    result[type] = []
                result[type].append(entity)
            entity = word_token
        elif tag.startswith("I-"):
            type = tag.split("-")[1]
            if entity:
                entity += word_token
        else:
            if entity:
                if type not in result:
                    result[type] = []
                result[type].append(entity)
            entity = ""
    return result

def main():
    labels_path = "./data/NER/new/labels.json"
    model_name = './output/NER/'
    max_length = 300
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # 加载label
    labels_map = {}
    with open(labels_path, "r", encoding="utf-8") as r:
        labels = json.loads(r.read())
        for label in labels:
            label_id = labels[label]
            labels_map[label_id] = label

    # 加载分词器和模型
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForTokenClassification.from_pretrained(model_name, num_labels=len(labels_map))
    model.to(device)

    
    while True:
        text = input("请输入：")
        if not text or text == '':
            continue
        if text == 'q':
            break

        encoded_input = tokenizer(text, padding="max_length", truncation=True, max_length=max_length)
        input_ids = torch.tensor([encoded_input['input_ids']]).to(device)
        attention_mask = torch.tensor([encoded_input['attention_mask']]).to(device)

        outputs = model(input_ids, attention_mask=attention_mask)
        result = post_processing(outputs, text, labels_map)
        #print(text)
        print(result)
    



#     # 加载数据
#     print("Start Load Test Data...")
#     with open("./data/NER/NER_data.json", "r", encoding="utf-8") as r:
#         test_data = r.read().split('\n')
        
#     with open("./NER_output.txt",'w') as w:
#         for i in test_data:

#             if len(i) == 0:
#                 continue

#             data_dict = json.loads(i)

#             encoded_input = tokenizer(data_dict['text'], padding="max_length", truncation=True, max_length=max_length)
#             input_ids = torch.tensor([encoded_input['input_ids']]).to(device)
#             attention_mask = torch.tensor([encoded_input['attention_mask']]).to(device)

#             outputs = model(input_ids, attention_mask=attention_mask)
#             result = post_processing(outputs, data_dict['text'], labels_map)

# #             print(data_dict['text'])
# #             print(f'正确答案：',end=' ')
# #             for key in data_dict['label'].keys():
# #                 # print(f'{key}',end = ' ')
# #                 # print(f"{list(data_dict['label'][key].keys())[0]}", end = ' ')
# #                 print({key : list(data_dict['label'][key].keys())},end = ' ')
# #             print('')

# #             print(f'输出：{result}')
# #             print('-'*10)
#             w.write(data_dict['text'])
#             w.write('\n')
#             w.write(f'正确答案：')
#             for key in data_dict['label'].keys():
#                 # print(f'{key}',end = ' ')
#                 # print(f"{list(data_dict['label'][key].keys())[0]}", end = ' ')
#                 ans = {key : list(data_dict['label'][key].keys())}
#                 w.write(str(ans))
#                 w.write(' ')
#             w.write('\n')

#             w.write(f'输出：{result}')
#             w.write('\n')
#             w.write('-'*10)
#             w.write('\n')
#     print('----DONE----')

if __name__ == '__main__':
    main()



请输入： 李白城主在天上飞


{'思想': ['李白']}


请输入： 李白说‘飞流直下三千尺’


{'人名': ['李白'], '思想': ['飞流直下三千尺']}


KeyboardInterrupt: Interrupted by user

In [3]:
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForTokenClassification
#from ner_datasets import NERDataset
from tqdm import tqdm
import json
import time, sys
import numpy as np
from sklearn.metrics import f1_score, recall_score, precision_score
from sklearn.model_selection import train_test_split


class NERDataset(Dataset):
    def __init__(self, tokenizer, file_path, labels_map, max_length=300):
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.labels_map = labels_map

        self.text_data = []
        self.label_data = []
        with open(file_path, "r", encoding="utf-8") as r:
            for line in r:
                line = json.loads(line)
                text = line['text']
                label = line['label']
                self.text_data.append(text)
                self.label_data.append(label)

    def __len__(self):
        return len(self.text_data)

    def __getitem__(self, idx):
        text = self.text_data[idx]
        labels = self.label_data[idx]

        # 使用分词器对句子进行处理
        inputs = self.tokenizer.encode_plus(
            text,
            None,
            add_special_tokens=True,
            padding='max_length',
            truncation=True,
            max_length=self.max_length,
            return_tensors='pt'
        )

        input_ids = inputs['input_ids'].squeeze()
        attention_mask = inputs['attention_mask'].squeeze()

        # 将标签转换为数字编码
        label_ids = [self.labels_map[l] for l in labels]

        if len(label_ids) > self.max_length:
            label_ids = label_ids[0:self.max_length]

        if len(label_ids) < self.max_length:
            # 标签填充到最大长度
            label_ids.extend([0] * (self.max_length - len(label_ids)))

        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'labels': torch.LongTensor(label_ids)
        }

def post_processing(outputs, text, labels_map):
    _, predicted_labels = torch.max(outputs.logits, dim=2)

    predicted_labels = predicted_labels.detach().cpu().numpy()

    predicted_tags = [labels_map[label_id] for label_id in predicted_labels[0]]

    result = {}
    entity = ""
    type = ""
    for index, word_token in enumerate(text):
        tag = predicted_tags[index]
        if tag.startswith("B-"):
            type = tag.split("-")[1]
            if entity:
                if type not in result:
                    result[type] = []
                result[type].append(entity)
            entity = word_token
        elif tag.startswith("I-"):
            type = tag.split("-")[1]
            if entity:
                entity += word_token
        else:
            if entity:
                if type not in result:
                    result[type] = []
                result[type].append(entity)
            entity = ""
    return result

def extract_non_zero_sublists(lst):
    non_zero_sublists = []
    sublist = []

    for i in range(len(lst)):
        if lst[i] != 0:
            sublist.append(i)
        else:
            if sublist:  # Check if the sublist is not empty
                non_zero_sublists.append(sublist)
            sublist = []  # Reset the sublist

    # Check for a non-zero sublist at the end of the list
    if sublist:
        non_zero_sublists.append(sublist)

    return non_zero_sublists


def validate(model, device, loader,epoch):
    model.eval()
    acc = 0
    f1 = 0
    recall = 0
    precision = 0
    valid_loss = 0

    with torch.no_grad():
        for step, data in enumerate(tqdm(loader, file=sys.stdout, desc="Validation Data")):
            input_ids = data['input_ids'].to(device)
            attention_mask = data['attention_mask'].to(device)
            labels = data['labels'].to(device)

            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            loss = outputs.loss
            valid_loss += loss.item()

            _, predicted_labels = torch.max(outputs.logits, dim=2)

            predicted_labels = predicted_labels.detach().cpu().numpy().tolist()
            true_labels = labels.detach().cpu().numpy().tolist()

            predicted_labels_flat = [label for sublist in predicted_labels for label in sublist]
            true_labels_flat = [label for sublist in true_labels for label in sublist]
            

            # accuracy = (np.array(predicted_labels_flat) == np.array(true_labels_flat)).mean()
            accuracy = 0
            true_index = extract_non_zero_sublists(true_labels_flat)
            
            # if(True):
            #     # print(true_labels_flat)
            #     # print(predicted_labels_flat)
            #     # result = post_processing(outputs, data, labels_map)
            #     print(data['input_ids'])
            #     sys.exit(0)
            
            if len(true_index)==0:
                acc+=1
                print('----passed one error----')
                continue

            for items in true_index:
                flag = 1
                for i in items:
                    if predicted_labels_flat[i] != true_labels_flat[i]:
                        flag = 0
                        break
                if flag == 1:
                    accuracy += 1
            
            accuracy /= len(true_index)
            acc += accuracy
            f1score = f1_score(true_labels_flat, predicted_labels_flat, average='macro')
            r_score = recall_score(true_labels_flat, predicted_labels_flat, average='macro')
            p_score = precision_score(true_labels_flat, predicted_labels_flat, average='macro')
            f1 += f1score
            recall += r_score
            precision += p_score

    valid_loss /= len(loader)
    acc /= len(loader)
    f1 /= len(loader)
    recall /= len(loader)
    precision /= len(loader)

    return acc, f1, recall, precision


def main():
    labels_path = "./data/NER/new/labels.json"
    # model_name = './model/roberta/'
    model_name = './output/NER'
    '''
    train_json_path = "./data/NER/new/train.json"
    val_json_path = "./data/NER/new/dev.json"
    '''
    data_json_path = './data/NER/new/NER_BIO.json'
    max_length = 300
    epochs = 1
    batch_size = 1
    lr = 1e-5
    gradient_accumulation_steps = 16
    model_output_dir = "./output/NER"
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # 加载label
    with open(labels_path, "r", encoding="utf-8") as r:
        labels_map = json.loads(r.read())

    # 加载分词器和模型
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForTokenClassification.from_pretrained(model_name, num_labels=len(labels_map))
    model.to(device)

    data = NERDataset(tokenizer, data_json_path, labels_map, max_length)
    train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(test_data, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(data, batch_size=batch_size, shuffle=True)

    # 定义优化器和损失函数
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
    print("Start Training...")
    best_acc = 0.0
    for epoch in range(epochs):
        # train(epoch, model, device, train_loader, optimizer, gradient_accumulation_steps)
        print("Start Validation...")
        acc, f1, recall, precision = validate(model, device, test_loader,epoch)
        print(f"Validation : acc: {acc} , f1: {f1} , recall: {recall} , precision: {precision}")
        

if __name__ == '__main__':
    main()



Start Training...
Start Validation...
Validation Data:  10%|▉         | 24/250 [00:00<00:03, 59.25it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Validation Data:  14%|█▍        | 36/250 [00:00<00:03, 58.22it/s]----passed one error----
Validation Data:  17%|█▋        | 43/250 [00:00<00:03, 59.11it/s]

  _warn_prf(average, modifier, msg_start, len(result))


Validation Data:  22%|██▏       | 55/250 [00:00<00:03, 59.42it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Validation Data:  30%|██▉       | 74/250 [00:01<00:02, 59.72it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Validation Data:  40%|███▉      | 99/250 [00:01<00:02, 59.68it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Validation Data:  65%|██████▍   | 162/250 [00:02<00:01, 59.77it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Validation Data:  81%|████████  | 203/250 [00:03<00:00, 59.74it/s]

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Validation Data: 100%|██████████| 250/250 [00:04<00:00, 59.51it/s]
Validation : acc: 0.9643333333333333 , f1: 0.9429869934741784 , recall: 0.9762574384365971 , precision: 0.9265939293813006


  _warn_prf(average, modifier, msg_start, len(result))
