In [1]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
    logging,
)

from trl import SFTTrainer, DataCollatorForCompletionOnlyLM
from peft import LoraConfig
from langchain import PromptTemplate
from IPython.display import Markdown, display

[2024-01-09 22:57:13,566] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)


In [2]:

def read_file(file_path, is_train=False):
    """Read data file of given path.

    :param file_path: path of data file.
    :param is_train: flag to indicate if it's a training file.
    :return: list of sentence, list of slot and list of intent.
    """
    texts, slots, intents, token_intents = [], [], [], []
    text, slot, token_intent = [], [], []

    with open(file_path, 'r', encoding="utf8") as fr:
        for line in fr.readlines():
            items = line.strip().split()

            if len(items) == 1:
                texts.append(' '.join(text))
                slots.append(slot)
                if is_train:
                    token_intents.append(token_intent)
                if "/" not in items[0]:
                    intents.append(items[0])
                else:
                    new = items[0].split("/")
                    intents.append(new[1])

                # clear buffer lists.
                text, slot, token_intent = [], [], []

            elif len(items) >= 2:
                text.append(items[0].strip())
                slot.append(items[1].strip())
                if is_train:
                    token_intent.append(items[2].strip())

    if is_train:
        return texts, slots, intents, token_intents
    else:
        return texts, slots, intents
    
def format_data(texts, slots, intents, token_intents=None):
    formatted_data = []

    for text, slot, intent, token_intent in zip(texts, slots, intents, token_intents if token_intents else [None] * len(texts)):
        words = text.split()
        slot_dict = {}
        sub_utterance_dict = {}
        current_slot = None
        current_value = []
        current_sub = []
        current_intent = None

        # 处理slots，转换为字典格式
        for word, slot_type in zip(words, slot):
            if slot_type.startswith("B-"):
                if current_slot and current_value:
                    slot_dict[current_slot] = ' '.join(current_value)
                current_slot = slot_type[2:]
                current_value = [word]
            elif slot_type.startswith("I-") and current_slot == slot_type[2:]:
                current_value.append(word)
            else:
                if current_slot and current_value:
                    slot_dict[current_slot] = ' '.join(current_value)
                current_slot = None
                current_value = []

        if current_slot and current_value:
            slot_dict[current_slot] = ' '.join(current_value)

        # 处理token_intents，生成子句字典
        if token_intent:
            for word, ti in zip(words, token_intent):
                if ti != "SEP":
                    current_sub.append(word)
                    current_intent = ti
                else:
                    if current_sub and current_intent:
                        sub_utterance_dict[current_intent] = ' '.join(current_sub)
                    current_sub = []
                    current_intent = None

            if current_sub and current_intent:
                sub_utterance_dict[current_intent] = ' '.join(current_sub)
        else:
            sub_utterance_dict = None

        formatted_example = {
            'utterance': text,
            'sub_utterance': sub_utterance_dict,
            'intent(s)': intent,
            'slots': ' '.join(slot),
            'entity_slots': slot_dict
        }
        formatted_data.append(formatted_example)

    return formatted_data


In [3]:
from datasets import load_dataset

# 加载训练、验证和测试数据
data_files = {
    "train": "/home/shangjian/code/Research/Multimodal & LLM/SLM/data/MixATIS_clean/train.json",
    "validation": "/home/shangjian/code/Research/Multimodal & LLM/SLM/data/MixATIS_clean/dev.json",
    "test": "/home/shangjian/code/Research/Multimodal & LLM/SLM/data/MixATIS_clean/test.json"
}

# 加载数据集
train_dataset = load_dataset('json', data_files=data_files['train'])
dev_dataset =  load_dataset('json', data_files=data_files['validation'])
test_dataset =  load_dataset('json', data_files=data_files['test'])
# 查看数据集结构
print(train_dataset,dev_dataset,test_dataset)
print(len(train_dataset['train']))

DatasetDict({
    train: Dataset({
        features: ['sub_utterance', 'intent(s)', 'slots', 'utterance', 'entity_slots'],
        num_rows: 13162
    })
}) DatasetDict({
    train: Dataset({
        features: ['sub_utterance', 'intent(s)', 'slots', 'utterance', 'entity_slots'],
        num_rows: 759
    })
}) DatasetDict({
    train: Dataset({
        features: ['sub_utterance', 'intent(s)', 'slots', 'utterance', 'entity_slots'],
        num_rows: 828
    })
})
13162


In [4]:
train_template = """
[Instruction]
you are an expert of spoken language understanding, I need you to perform intent detection and slot filling for given utterance. \n

[Input]
utterance: {utterance} 

[Response]
intent: {intent}
entity_slot: {entity_slots}
"""

test_template =  """
[Instruction]
you are an expert of spoken language understanding, I need you to perform intent detection and slot filling for given utterance. \n

[Input]
utterance: {utterance} 

[Response]
intent: {intent}
entity_slot: {entity_slots}
"""

train_prompt = PromptTemplate(template=train_template, input_variables=['utterance'  'intent' 'entity_slots'])
test_prompt = PromptTemplate(template=test_template, input_variables=['utterance' 'intent' 'entity_slots'])

def format_text(example, is_train=True):
    if is_train:
        return train_prompt.format(utterance=example['utterance'], 
                                #    sub_sentence={k: v for k, v in example['sub_utterance'].items() if v is not None},
                                   intent=example['intent(s)'],
                                   entity_slots={k: v for k, v in example['entity_slots'].items() if v is not None})
    else:
        return test_prompt.format(utterance=example['utterance'], 
                                  intent=example['intent(s)'],
                                  entity_slots={k: v for k, v in example['entity_slots'].items() if v is not None})

# 应用format_text到数据集
train_dataset = train_dataset.map(lambda x: {"formatted_text": format_text(x, is_train=True)})
dev_dataset = dev_dataset.map(lambda x: {"formatted_text": format_text(x, is_train=False)})
test_dataset = test_dataset.map(lambda x: {"formatted_text": format_text(x, is_train=False)})

# 查看处理后的数据集
print(train_dataset['train']['formatted_text'][0])
    


[Instruction]
you are an expert of spoken language understanding, I need you to perform intent detection and slot filling for given utterance. 


[Input]
utterance: define airline ua , names of airports and also show me city served both by nationair and canadian airlines international 

[Response]
intent: atis_abbreviation#atis_airport#atis_city
entity_slot: {'airline_code': 'ua', 'airline_name': 'canadian airlines international'}



In [5]:
model_id = "/home/shangjian/code/Research/Multimodal & LLM/dataroot/models/Mistral/Mistral-7B-Instruct-v0.1"
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token

In [6]:
qlora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [7]:
base_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    trust_remote_code=True,
    device_map='auto'
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [8]:
bs= 1
training_args = TrainingArguments(
    output_dir="./save/SFT/{}".format(model_id.split('/')[-1]) , 
    per_device_train_batch_size=bs,
    learning_rate=2e-4,
    logging_steps=50,
    save_steps=200,
    # num_train_epochs= 1,
    logging_strategy="steps",
    # max_steps=int(len(train_dataset['train'])/ bs),
    max_steps=200,
    optim="paged_adamw_8bit",
    fp16=True,
    run_name="baseline-{}".format(model_id.split('/')[-1]),
    remove_unused_columns=False
)

In [9]:
supervised_finetuning_trainer = SFTTrainer(
    base_model,
    train_dataset=train_dataset["train"],
    args=training_args,
    tokenizer=tokenizer,
    peft_config=qlora_config,
    dataset_text_field="formatted_text",
    max_seq_length=4096,
)



In [10]:
supervised_finetuning_trainer.train()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33m520[0m. Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01111259324890044, max=1.0)…

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
50,0.9782
100,0.5929
150,0.4935
200,0.4463


Checkpoint destination directory ./save/SFT/Mistral-7B-Instruct-v0.1/checkpoint-200 already exists and is non-empty.Saving will proceed but saved results may be invalid.


TrainOutput(global_step=200, training_loss=0.6277337551116944, metrics={'train_runtime': 87.2992, 'train_samples_per_second': 2.291, 'train_steps_per_second': 2.291, 'total_flos': 1319195677040640.0, 'train_loss': 0.6277337551116944, 'epoch': 0.02})

In [11]:
save_dir = "/home/shangjian/code/Research/Multimodal & LLM/SLM/save/model/" + model_id.split('/')[-1] 
supervised_finetuning_trainer.save_model(save_dir)

In [1]:
import re

def parse_generated_text(generated_text):
    # 使用正则表达式匹配意图和实体槽位
    intent_pattern = r"intent: ([\w#]+)"
    entity_slots_pattern = r"entity_slot: \{([^}]+)\}"
    utterance_pattern = r"utterance: (.+)"

    # 提取意图
    intent_match = re.search(intent_pattern, generated_text)
    intents = intent_match.group(1).split('#') if intent_match else []

    # 提取实体槽位
    entity_slots_match = re.search(entity_slots_pattern, generated_text)
    entity_slots = {}
    if entity_slots_match:
        slots_str = entity_slots_match.group(1)
        for slot_str in slots_str.split(', '):
            if ':' in slot_str:
                key, value = slot_str.split(': ')
                entity_slots[key.strip("'")] = value.strip("'")

    # 提取utterance
    utterance_match = re.search(utterance_pattern, generated_text)
    utterance = utterance_match.group(1).strip() if utterance_match else ""

    return intents, entity_slots, utterance

def convert_dict_to_slots(entity_slots, sentence):
    words = sentence.split()
    slot_sequence = ['O'] * len(words)  # 初始化槽位序列为全'O'

    for slot_type, slot_value in entity_slots.items():
        if slot_value:
            slot_words = slot_value.split()
            start_index = find_sublist_index(slot_words, words)

            if start_index != -1:
                # 标记B类型槽位
                slot_sequence[start_index] = f"B-{slot_type}"
                # 标记随后的I类型槽位
                for i in range(start_index + 1, start_index + len(slot_words)):
                    slot_sequence[i] = f"I-{slot_type}"

    return slot_sequence

def find_sublist_index(sublist, lst):
    for i in range(len(lst) - len(sublist) + 1):
        if sublist == lst[i:i + len(sublist)]:
            return i
    return -1

def get_multi_acc(pred_output, golds):
    acc = 0
    total = 0
    for p, c in zip(pred_output, golds):
        # print(p ,'<=>', c , c == p)
        if set(p) == set(c):
            acc += 1
        total += 1
    return acc / total


# compute f1 score is modified from conlleval.pl
def __startOfChunk(prevTag, tag, prevTagType, tagType, chunkStart=False):
    if prevTag == 'B' and tag == 'B':
        chunkStart = True
    if prevTag == 'I' and tag == 'B':
        chunkStart = True
    if prevTag == 'O' and tag == 'B':
        chunkStart = True
    if prevTag == 'O' and tag == 'I':
        chunkStart = True

    if prevTag == 'E' and tag == 'E':
        chunkStart = True
    if prevTag == 'E' and tag == 'I':
        chunkStart = True
    if prevTag == 'O' and tag == 'E':
        chunkStart = True
    if prevTag == 'O' and tag == 'I':
        chunkStart = True

    if tag != 'O' and tag != '.' and prevTagType != tagType:
        chunkStart = True
    return chunkStart


def __endOfChunk(prevTag, tag, prevTagType, tagType, chunkEnd=False):
    if prevTag == 'B' and tag == 'B':
        chunkEnd = True
    if prevTag == 'B' and tag == 'O':
        chunkEnd = True
    if prevTag == 'I' and tag == 'B':
        chunkEnd = True
    if prevTag == 'I' and tag == 'O':
        chunkEnd = True

    if prevTag == 'E' and tag == 'E':
        chunkEnd = True
    if prevTag == 'E' and tag == 'I':
        chunkEnd = True
    if prevTag == 'E' and tag == 'O':
        chunkEnd = True
    if prevTag == 'I' and tag == 'O':
        chunkEnd = True

    if prevTag != 'O' and prevTag != '.' and prevTagType != tagType:
        chunkEnd = True
    return chunkEnd


def __splitTagType(tag):
    s = tag.split('-')
    if len(s) > 2 or len(s) == 0:
        raise ValueError('tag format wrong. it must be B-xxx.xxx')
    if len(s) == 1:
        tag = s[0]
        tagType = ""
    else:
        tag = s[0]
        tagType = s[1]
    return tag, tagType


def computeF1Score(correct_slots, pred_slots):
    correctChunk = {}
    correctChunkCnt = 0.0
    foundCorrect = {}
    foundCorrectCnt = 0.0
    foundPred = {}
    foundPredCnt = 0.0
    correctTags = 0.0
    tokenCount = 0.0
    for correct_slot, pred_slot in zip(correct_slots, pred_slots):
        inCorrect = False
        lastCorrectTag = 'O'
        lastCorrectType = ''
        lastPredTag = 'O'
        lastPredType = ''
        for c, p in zip(correct_slot, pred_slot):
            correctTag, correctType = __splitTagType(c)
            predTag, predType = __splitTagType(p)

            if inCorrect == True:
                if __endOfChunk(lastCorrectTag, correctTag, lastCorrectType, correctType) == True and \
                 __endOfChunk(lastPredTag, predTag, lastPredType, predType) == True and \
                 (lastCorrectType == lastPredType):
                    inCorrect = False
                    correctChunkCnt += 1.0
                    if lastCorrectType in correctChunk:
                        correctChunk[lastCorrectType] += 1.0
                    else:
                        correctChunk[lastCorrectType] = 1.0
                elif __endOfChunk(lastCorrectTag, correctTag, lastCorrectType, correctType) != \
                 __endOfChunk(lastPredTag, predTag, lastPredType, predType) or \
                 (correctType != predType):
                    inCorrect = False

            if __startOfChunk(lastCorrectTag, correctTag, lastCorrectType, correctType) == True and \
             __startOfChunk(lastPredTag, predTag, lastPredType, predType) == True and \
             (correctType == predType):
                inCorrect = True

            if __startOfChunk(lastCorrectTag, correctTag, lastCorrectType,
                              correctType) == True:
                foundCorrectCnt += 1
                if correctType in foundCorrect:
                    foundCorrect[correctType] += 1.0
                else:
                    foundCorrect[correctType] = 1.0

            if __startOfChunk(lastPredTag, predTag, lastPredType,
                              predType) == True:
                foundPredCnt += 1.0
                if predType in foundPred:
                    foundPred[predType] += 1.0
                else:
                    foundPred[predType] = 1.0

            if correctTag == predTag and correctType == predType:
                correctTags += 1.0

            tokenCount += 1.0

            lastCorrectTag = correctTag
            lastCorrectType = correctType
            lastPredTag = predTag
            lastPredType = predType

        if inCorrect == True:
            correctChunkCnt += 1.0
            if lastCorrectType in correctChunk:
                correctChunk[lastCorrectType] += 1.0
            else:
                correctChunk[lastCorrectType] = 1.0

    if foundPredCnt > 0:
        precision = 1.0 * correctChunkCnt / foundPredCnt
    else:
        precision = 0

    if foundCorrectCnt > 0:
        recall = 1.0 * correctChunkCnt / foundCorrectCnt
    else:
        recall = 0

    if (precision + recall) > 0:
        f1 = (2.0 * precision * recall) / (precision + recall)
    else:
        f1 = 0

    return f1, precision, recall


def semantic_acc(pred_slot, real_slot, pred_intent, real_intent):
    """
	Compute the accuracy based on the whole predictions of
	given sentence, including slot and intent.
	"""
    total_count, correct_count = 0.0, 0.0
    for p_slot, r_slot, p_intent, r_intent in zip(pred_slot, real_slot,
                                                  pred_intent, real_intent):

        if p_slot == r_slot and set(p_intent) == set(r_intent):
            correct_count += 1.0
        total_count += 1.0

    return 1.0 * correct_count / total_count


In [2]:
from datasets import load_dataset
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
    logging,
)

from trl import SFTTrainer, DataCollatorForCompletionOnlyLM
from peft import LoraConfig
from langchain import PromptTemplate
from IPython.display import Markdown, display

# 加载训练、验证和测试数据
data_files = {
    "train": "/home/shangjian/code/Research/Multimodal & LLM/SLM/data/MixATIS_clean/train.json",
    "validation": "/home/shangjian/code/Research/Multimodal & LLM/SLM/data/MixATIS_clean/dev.json",
    "test": "/home/shangjian/code/Research/Multimodal & LLM/SLM/data/MixATIS_clean/test.json"
}

test_dataset =  load_dataset('json', data_files=data_files['test'])


test_template =  """
[Instruction]
you are an expert of spoken language understanding, I need you to perform intent detection and slot filling for given utterance. \n

[Input]
utterance: {utterance} 

[Response]
intent: {intent}
entity_slot: {entity_slots}
"""

test_prompt = PromptTemplate(template=test_template, input_variables=['utterance' 'intent' 'entity_slots'])

def format_text(example, is_train=True):
    if is_train:
        return train_prompt.format(utterance=example['utterance'], 
                                #    sub_sentence={k: v for k, v in example['sub_utterance'].items() if v is not None},
                                   intent=example['intent(s)'],
                                   entity_slots={k: v for k, v in example['entity_slots'].items() if v is not None})
    else:
        return test_prompt.format(utterance=example['utterance'], 
                                  intent=example['intent(s)'],
                                  entity_slots={k: v for k, v in example['entity_slots'].items() if v is not None})

# 应用format_text到数据集
test_dataset = test_dataset.map(lambda x: {"formatted_text": format_text(x, is_train=False)})


[2024-01-10 10:36:12,479] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)


In [3]:
#  Restart kneral 
import torch

from tqdm import tqdm
from peft import AutoPeftModelForCausalLM 
from peft import PeftModel, LoraConfig, prepare_model_for_kbit_training, get_peft_model
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    DataCollatorForLanguageModeling,
    DataCollatorForSeq2Seq,
    Trainer,
    TrainingArguments,
    GenerationConfig
)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)


model_id = '/home/shangjian/code/Research/Multimodal & LLM/dataroot/models/Mistral/Mistral-7B-Instruct-v0.1'
peft_path = "/home/shangjian/code/Research/Multimodal & LLM/SLM/save/model/" + model_id.split('/')[-1] 
# peft_path = '/data540/shangjian/Uni-MIS/llm/save/SFT/Mistral-7B-Instruct-v0.1/checkpoint-500'

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    use_cache=False,
    device_map='auto'
)

generation_config = GenerationConfig(
    temperature=0.1,
    top_p=0.75,
    top_k=40,
    num_beams=1, # beam search
    do_sample=True
)

# loading peft weight
model = PeftModel.from_pretrained(
    model,
    peft_path,
    torch_dtype=torch.float16,
)

model = model.bfloat16()
model.eval()

tokenizer.pad_token = "[PAD]"
tokenizer.padding_side = "left"

# 生成回复并计算评估指标
all_pred_intents = []
all_true_intents = []
all_pred_slots = []
all_true_slots = []

test_template =  """
[Instruction]
you are an expert of spoken language understanding, I need you to perform intent detection and slot filling for given utterance. \n

[Input]
utterance: {utterance} 

[Response]

"""
infer_batch_size = 2
texts = []
for i in range(0,len(test_dataset["train"])):
    texts.append(test_template.format(utterance=test_dataset["train"][i]["utterance"]))
    
with torch.no_grad():
    for i in tqdm(range(0, len(test_dataset["train"]), infer_batch_size), desc="Processing"):
        
        prompts  = texts[i:i + infer_batch_size]
        
        model_inputs = tokenizer(prompts, return_tensors="pt", padding=True, truncation=True).to(model.device)

        generation_outputs = model.generate(
            **model_inputs,
            max_length=256,  # 或其他适当的最大长度
            return_dict_in_generate=True,
            output_scores=True,
            pad_token_id=tokenizer.eos_token_id
        )
        generated_sequences = generation_outputs.sequences.cpu()
        
        for idx, output in enumerate(tokenizer.batch_decode(generated_sequences, skip_special_tokens=True)):
            generated_text = output
            # 解析生成的文本以获取预测的意图和槽位
            # print(generated_text)
            pred_intents, pred_slots, _ = parse_generated_text(generated_text)
            
            # print("--------------------",pred_intents,pred_slots)
            
            true_intent, true_slots, utterance = parse_generated_text(test_dataset["train"][i+idx]['formatted_text'])
            pred_bio_slots = convert_dict_to_slots(pred_slots, utterance)

            # 将真实的entity_slots转换为BIO格式
            true_bio_slots = convert_dict_to_slots(true_slots,utterance)

            # 添加预测和真实的意图和槽位到列表
            all_pred_intents.append(pred_intents)
            all_true_intents.append(true_intent)
            all_pred_slots.append(pred_bio_slots)
            all_true_slots.append(true_bio_slots)
        
        
# 计算多意图准确率、槽位F1分数和语义准确率
# 使用之前定义的 get_multi_acc, computeF1Score 和 semantic_acc 函数
intent_acc = get_multi_acc(all_pred_intents, all_true_intents)
slot_score = computeF1Score(all_true_slots, all_pred_slots)
semantic_accuracy = semantic_acc(all_pred_slots, all_true_slots, all_pred_intents, all_true_intents)

# 打印评估指标
print(f"Intent Accuracy: {intent_acc}")
print(f"Slot_Score(f1, precision, recall): {slot_score}")
print(f"Semantic Accuracy: {semantic_accuracy}")

    

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Processing:   0%|          | 0/414 [00:00<?, ?it/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Processing:   1%|▏         | 6/414 [04:44<5:12:42, 45.99s/it]

In [None]:
def parse_generated_text(generated_text):
    # 使用正则表达式匹配意图和实体槽位
    intent_pattern = r"intent\(s\): ([\w#]+)"
    entity_slots_pattern = r"entity_slots: \{([^}]+)\}"
    utterance_pattern = r"utterance: (.+)"

    # 提取意图
    intent_match = re.search(intent_pattern, generated_text)
    intents = intent_match.group(1).split('#') if intent_match else []

    # 提取实体槽位
    entity_slots_match = re.search(entity_slots_pattern, generated_text)
    entity_slots = {}
    if entity_slots_match:
        slots_str = entity_slots_match.group(1)
        for slot_str in slots_str.split(', '):
            if ':' in slot_str:
                key, value = slot_str.split(': ')
                entity_slots[key.strip("'")] = value.strip("'")

    # 提取utterance
    utterance_match = re.search(utterance_pattern, generated_text)
    utterance = utterance_match.group(1).strip() if utterance_match else ""

    return intents, entity_slots, utterance

true_intent, true_slots, utterance = parse_generated_text(test_dataset["train"][1]['formatted_text'])

# print(test_dataset["train"][1]['formatted_text'])
print(true_intent)

# 将真实的entity_slots转换为BIO格式

# intent_acc = get_multi_acc(all_pred_intents, all_true_intents)
# slot_score = computeF1Score(all_true_slots, all_pred_slots)
# semantic_accuracy = semantic_acc(all_pred_slots, all_true_slots, all_pred_intents, all_true_intents)

# # 打印评估指标
# print(f"Intent Accuracy: {intent_acc}")
# print(f"Slot_Score(f1, precision, recall): {slot_score}")
# print(f"Semantic Accuracy: {semantic_accuracy}")