In [4]:
from llm import qwen, kimi, gpt
import util.mark as m
import util.data_processing as dp
from typing import List
from datetime import datetime
import random

In [5]:
# qwen = qwen.Qwen()
# #TODO add this to class
# model_name = "qwen"
# #TODO modify this to use the file in prompt dir

model_list = {
    "qwen": qwen.Qwen(), 
    "kimi": kimi.Kimi(), 
    "gpt": gpt.GPT()
    }

In [6]:
prompt_fixed = """
Given a question and corresponding answer, identify any entity, relation, contradictory, subjective, unverifiable, or invented errors in the answer.
Mark each erroneous segment by enclosing it within the corresponding <error></error> tags.
If there are no errors, return the answer with no tags.
Any identified errors should be highlighted using the tag <error> enclosed in the original answer text.
Below are the error definitions of the error types.

Definitions:

- Entity Error: A small part of a sentence, often an entity (e.g., location name), is incorrect (usually 1-3 words). Entity errors often involve noun phrases or nouns.
- Relational Error: A sentence is partially incorrect due to a small part (usually 1-3 words). Relational errors often involve verbs and are often the opposite of what they should be.
- Contradictory Sentence Error: A sentence where the entire content is contradicted by the given reference, meaning the sentence can be proven false due to a contradiction with information in the passage.
- Invented Info Error: Errors referring to entities that are not known or do not exist. This does not include fictional characters in books or movies. - Invented errors include phrases or sentences with unknown entities or misleading information.
- Subjective Sentence: An entire sentence or phrase that is subjective and cannot be verified, so it should not be included.
- Unverifiable Sentence: A sentence where the whole sentence or phrase is unlikely to be factually grounded. Although it can be true, the sentence cannot be confirmed nor denied using the reference given or internet search. It is often something personal or private and hence cannot be confirmed.

##
This is an example:
Input:
"question": "What did Petra van Staveren win a gold medal for?",
"answer": "Petra van Stoveren won a silver medal in the 2008 Summer Olympics in Beijing, China." 
Your response: "<error>Petra van Stoveren</error> won a silver medal in the 2008 Summer Olympics in Beijing, China." 
"response": ""
##

Instructions: Now detect errors and include tag in the following passage as demonstrated in the example above. Use <error></error> tags around each identified error segment. If there are no errors, return the passage unchanged. Wait for my input after Passage:

Passage:
"""
# system_prompt
SYS_PROMPT = "You are a intelligent and clever expert on finding the hallucations errors in the text."

#prompt for redo
user_prompt = """
MY SWEET HEART, PLEASE DO NOT CHANGE THE ORIGINAL TEXT, JUST ADD TAGS, PLEASE. CAN YOU DO THAT AGAIN!
Your output should be like: original text <error>original text</error> original text
No extra text is needed. Just give me the answer.
It should be exactly the same, including spaces
This is the original:" + prompt_user
"""

text = ""

USER_PROMPT = f"""
Please give me the answer with thanks. Your output should be like: original text <error>original text</error> original text. No extra text is needed. Just give me the answer. It should be exactly the same, including spaces. This is the original text:
```
{text}
```
"""

In [7]:
import pandas as pd

# convert the hard labels into soft labels
def convert_into_softlabels(in_file: str, out_file: str):
    data = pd.read_json(in_file, lines=True)
    soft_labels_list = []
    for ele in data["hard_labels"]:
        soft_labels = []
        for start, end in ele:
            soft_labels.append({"start": start, "prob": 1.0, "end": end})
        soft_labels_list.append(soft_labels)
    data["soft_labels"] = soft_labels_list
    data.to_json(out_file, orient="records", lines=True)


In [7]:
def transform_text_list_with_mark_into_output_file(input_cor: List[str], text_lst: List[str], mark:m.Mark, name_feature:str):
    output_lst = []
    input_and_output = zip(input_cor, text_lst)
    for tu in input_and_output:
        #get the hard_label i.e. [[12, 34], [34, 55] ...]
        #tu[1] text, tu[1] cor_input
        hard_labels = m.starts_and_ends(tu[1], mark)
        #TODO I dont know how to deal with the fucking soft labels, just blank
        #There soft label is empty, and hash_labels is just we got above.
        soft_labels = []
        for hard_label in hard_labels:
            soft_labels.append(dp.SoftLabel({"start": hard_label[0], "prob": 1.0, "end":hard_label[1]}))
        labels = dp.Labels(soft_labels=soft_labels, hard_labels=hard_labels)
        #We need to put the input we use, too!
        #one instance
        output_one = dp.Output(tu[0] | labels)
        #add it to the list
        output_lst.append(output_one)
        print(output_lst)
    #put them into a file!, you can specific the file_name actually

    timestampe = datetime.now().strftime("%Y%m%d_%H%M%S")
    output_dir = "./output/" 
    output_filename = name_feature + timestampe + str(random.randint(0, 1000))
    suffix = ".jsonl"
    full_output_filename = output_dir + output_filename + suffix 
    dp.save_file_output(output_lst, full_output_filename) 


In [6]:
#TODO modify this to read a list of file in a dir


In [8]:
mask = m.Mark("error")


In [9]:
def load(file_name):
    #load file
    input_dir_path = "./input_data/" 
    suffix = ".jsonl"
    full_file_name = input_dir_path + file_name + suffix
    input_lst = dp.load_file_jsonl(full_file_name)
    
    return input_lst
    #get prompt

In [10]:
file_list = ["ar", "de", "en", "es", "fi", "fr", "hi", "it", "sv", "zh"]
input_lst = load('en')

In [11]:
def ask_it(prompt_user_lst, mask, model):
    meow_lst = []
    for prompt_user in prompt_user_lst:
        ask = lambda x : model.ask(prompt_fixed + x, SYS_PROMPT)
        meow = ask(prompt_user)
        cnt = 0
        if m.plain_text(meow, mask) != prompt_user:
            #TODO modify this log info
            log_info = f"log info: original input:{prompt_user}, gpt output:{m.plain_text(meow, mask)}\n"
            #log_info += f"difference: {m.find_char_differences(prompt_user, m.plain_text(meow, mask))[:5]}"
            #to_add = "(0, '-  ') means you lost a space at the 0 position. (0, '+  ' mean you get a extra space at the 0 position). Just add the character at the corresponding position."
            print(log_info)
            meow = ask(prompt_user + log_info + to_add + user_prompt)        

            # to prevent loop
            #modify this part make it more concise
            #cnt += 1
            #if cnt > 3:
            #    meow = prompt_user
            #    break
        meow_lst.append(meow)
    print(meow_lst)
    return meow_lst

In [3]:
prompt_user_lst = list(input["model_output_text"] for input in input_lst)
meow_lst = ask_it(prompt_user_lst, mask, model_list["qwen"])

prompt_user_lst = []
for input in input_lst:
    question = input["model_input"]
    response = input["model_output_text"]
    prompt_input = "Question:" + question + '\n' + "Response:" + response
    #print(prompt_user_lst[10])
    prompt_user_lst.append(prompt_input)

print(prompt_user_lst)

NameError: name 'input_lst' is not defined

In [49]:
transform_text_list_with_mark_into_output_file(input_lst, meow_lst, mask, "gpt_en_")

file have been saved to ./output/gpt_en_20241029_182407877.jsonl


In [24]:
prompt_user_lst = list(input["model_output_text"] for input in input_lst)
meow_lst = ask_it(prompt_user_lst, mask, qwen)

ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok
ok


In [26]:
from pymongo import MongoClient

# 连接到 MongoDB
client = MongoClient('mongodb://localhost:27017/')
db = client['nlp_project']  # 替换为你的数据库名
collection = db['en']  # 替换为你的集合名

# 要写入的字符串
lines = []
for meow, prompt in zip (meow_lst, prompt_user_lst):
    lines.append([meow, prompt])

# 将每行作为文档插入到 MongoDB
for line in lines:
    document = {'gpt_output:': line[0], 'original': line[1]}
    collection.insert_one(document)

print("字符串已成功写入 MongoDB。")

字符串已成功写入 MongoDB。


In [66]:
# 从 MongoDB 中读取所有文档

# 打印每一行

    #print(gpt)
    #print(ori)

# 关闭连接


In [70]:
new_meow_lst = []
documents = collection.find()
for document in documents:
    new_meow_lst.append(document['gpt_output:'])

['Petra van Stoveren won a silver medal in the 2008 Summer Olympics in <error>Beijing, China</error>.',
 'The Elysiphale order contains 5 genera.',
 '<error>Yes, all arachnids have antennas.</error> However, not all of them are visible to the naked eye.',
 'Chance the rapper debuted in 2011.',
 "The UN's Sustainable City initiative defines a city as one that is:\n- Equipped with infrastructure and services to ensure sustainable and equitable access to a range of basic services, such as water, sanitation, and electricity;\n-.",
 'Zhejing cuisine is known for its unique flavors and cooking techniques. The four main styles are: 1) <error>Jiangnan</error> style, which is characterized by the use of rice and seafood; 2) <error>Hangzhou</error> style which uses a lot of vegetables and meat; the 3) <error>Zhaozhou</error> style that is famous for dishes made with pork; and 4) <error>Xiamen</error> style dishes that are influenced by Cantonese cuisine.',
 "There are 365 days in a year. So, if 

In [71]:
transform_text_list_with_mark_into_output_file(input_lst, new_meow_lst, mask, "gpt_en_")

file have been saved to ./output/gpt_en_20241030_153139129.jsonl


In [8]:
import subprocess
import os

score_script = "./util/score.py"
ref_file = "./input_data/en.jsonl"
pred_file = "./output/gpt_en_20241030_153139129.jsonl"
output_file = "scores.txt"

def run_evaluation(ref_file: str, pred_file: str, output_file: str):
    # 确保文件的绝对路径
    python_executable = "C:/Users/90586/AppData/Local/Programs/Python/Python311/python.exe"
    
    # 创建命令列表
    command = [python_executable, score_script, ref_file, pred_file, output_file]
    
    print("Running command:", ' '.join(command))
    
    # 执行命令
    subprocess.run(command)

# 执行评估
run_evaluation(ref_file, pred_file, output_file)


Running command: C:/Users/90586/AppData/Local/Programs/Python/Python311/python.exe ./util/score.py ./input_data/en.jsonl ./output/gpt_en_20241030_153139129.jsonl scores.txt
