# 1. Load Data

In [None]:
from utils.data_process import get_data
path="./dataset/Final_Quality.json"
data=get_data(path)
print(data[0].keys())
print(f"資料數:{len(data)}")
data[0]

# 2. Choose Model Api

In [None]:
# 選擇使用模型 Breeze Taide ChatGPT Llama3 Mistral
model_name="Breeze" #Breeze Taide ChatGPT Llama3 Mistral
from utils.api_devlop import get_api
get_llm_reply=get_api(model_name)
print(get_llm_reply("請用中文自我介紹，你是誰訓練的模型"))

# 3. Run

In [None]:
from tqdm.auto import tqdm
from sentence_transformers import SentenceTransformer, util
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SentenceTransformer("multi-qa-mpnet-base-dot-v1").to(device)
device = model.device

def pick_up(result):
    if "1" in result:
        return 1
    elif "2" in result:
        return 2
    elif "3" in result:
        return 3
    elif "4" in result:
        return 4
    else:
        return 5

def split_with_overlap_english(text, chunk_size=3000, overlap=1):
    chunks=[]
    sentences=text.split(".")
    temp_chunk=[]

    pre_overlap_num=0
    pre_overlap=[]
    af_overlap=[]
    word_count=""
    for sentence_idx in range(len(sentences)):
        word_count+=sentences[sentence_idx]+" "
        temp_chunk.append(sentences[sentence_idx])

        if len(word_count.split(" "))>chunk_size:
            if chunks!=[]:
                pre_overlap_num=-overlap
                pre_overlap=chunks[-1].split(".")[pre_overlap_num*2:pre_overlap_num]
            if sentence_idx<len(sentences)-1:
                af_overlap_num=min(len(sentences)-1,sentence_idx+1+overlap)
                af_overlap=sentences[sentence_idx+1:af_overlap_num]
            chunks.append(". ".join(pre_overlap)+". "+". ".join(temp_chunk)+". "+". ".join(af_overlap))
            temp_chunk.clear()
            word_count=""
    if chunks!=[]:
        pre_overlap_num=-overlap
        pre_overlap=chunks[-1].split(".")[pre_overlap_num*2:pre_overlap_num]
    chunks.append(". ".join(pre_overlap)+". "+". ".join(temp_chunk))

    return chunks

def find_smaller_than_neighbors(arr):
    result = []
    for i in range(1, len(arr) - 1):
        if arr[i] < arr[i - 1] and arr[i] < arr[i + 1]:
            result.append(i)
    return result

def Semantic_Sentence_Split(text, chunk_size=3000, overlap=10):
    # 將句子以句號分割
    sentences = text.split(".")
    # 將句子進行encode
    embedding_sentences = model.encode(sentences)
    # 計算句子倆倆相似度
    dot_result = []
    for i in range(len(embedding_sentences) - 1):
        similarity_of_adjacent = util.dot_score(embedding_sentences[i], embedding_sentences[i + 1])
        dot_result.append(similarity_of_adjacent.item())
    smaller_than_neighbors = find_smaller_than_neighbors(dot_result)
    start_num = 0
    passages = []
    for num in smaller_than_neighbors:
        end_num = num + 1  # 确定当前段落的结束位置
        if len(". ".join(sentences[start_num:end_num]).split(" ")) > chunk_size:
            temp_passage = ". ".join(sentences[start_num:end_num])  # 创建当前段落
            passages.append(temp_passage)
            start_num = max(start_num + 1, num + 1 -overlap)  # 更新下一段的起始位置，添加重叠
    temp_passage = ". ".join(sentences[start_num:])  # 添加最后一段
    passages.append(temp_passage)
    return passages


def os_ap_sss_answer(summary_prompt,input_content,input_question,truncate_size=3000,overlap=10):
    """
    url_num 多個api要用哪張顯卡 multithread 用的
    input_content 輸入文章
    input_question 輸入問題
    truncate_size 最常吃的input
    overlap 切完文章後要 overlap 多少句
    """
    os_prompt=summary_prompt
    
    # 切分摘要完要輸入給llm的內容
    new_content=input_content
    while len(new_content.split(" "))>truncate_size:
        # 將內容先切成好幾個chunk
        content_chuncks=Semantic_Sentence_Split(new_content,truncate_size,overlap)
        # 這一輪的新內容
        new_content=""
        for chunk in content_chuncks:

            # 請llm幫我們把重要資訊留下
            input_for_reader=f"""Article excerpt:
            {chunk}
            
            The above is the article excerpt related to my question.
            Below is the question I want to ask.
            Please select the text content that can answer this question.
            {os_prompt}
            
            Question:
            {input_question}"""
            # 將llm認為有關係的地方留下來
            chunk_summary=get_llm_reply(input_for_reader)
            # if chunk_summary==None:
            #     continue
            new_content+=chunk_summary+" "
        # 防錯(如果LLM api無回傳 直接比照truncate)
        if new_content=="":
            new_content=" ".join(input_content.split(" ")[:3000])
            break
    # 找完有用的內容後，進行問答
    input_to_llm=f"""There will be an article question and four options. 
    Please choose the option that answers the question based on the article.

    article:
    {new_content}

    question:
    {input_question}
    
    Your answer must be the number of one of the options,meaning it should be either option1, option2, option3, or option4. 
    The format for the answer should be as follows: Answer__optionX."""
    answer_from_llm=get_llm_reply(input_to_llm)
    return answer_from_llm


# 練蠱

In [26]:
import os
from api.Breeze_API_t import get_breeze_t

def get_score(chunck_size,overlap,prompt="",test_data=[]):
    # 總分數
    scores=0 
    # 資料計數器
    num=0 
    for each_data in tqdm(test_data):
        try:
            # 輸入的內容與答案
            input_content=each_data['content']
            input_question=each_data['question']
            truth_answer_number=each_data['answer']

            result=os_ap_sss_answer(prompt,input_content,input_question,chunck_size,overlap)
            # 對答案計分數
            if pick_up(result)==truth_answer_number:
                scores+=1
            num+=1
        except Exception as e:
            print(e)
    return scores

def prompt_not_in_list(prompt_list, new_prompt):
    for item in prompt_list:
        if item['prompt'] == new_prompt:
            return False
    return True

# 你要拿來練蠱的Training data
training_data=data[-100:]
# 練蠱終止條件(我是設超過baseline做100題後的分數，這邊看你資料量來設)
stop_score=100
# 或是設一個回合數來終止(本來我會讓他跑到天荒地老所以沒有用for loop)
stop_run_num=20
# 你要給LLM看幾個example
example_num=5

# 給幾個初始化prompt
prompt_list=["","Let's think step by step.","Identify the specific information that responds to the question in the article.","""The text content should be concise and follow a universal structure.
    To ensure a correct answer, the process of creating a summary should focus on identifying the main points and key details of the text.
    It should avoid including specific content or names from the original article and instead provide a general overview of the information.
    The summary should follow a universal structure, presenting the main idea and supporting details in a clear and concise manner.
    By following these guidelines, the summary can accurately reflect the content of the text and lead to a correct answer."""]
prompt_scores=[]
# 初始化prompt算分，這邊先用最後10筆示範
for prompt in prompt_list:
    score=get_score(3000,10,prompt,training_data)
    prompt_scores.append({'prompt': prompt, 'score': score})

sorted_prompt_scores = sorted(prompt_scores, key=lambda x: x['score'], reverse=True)
print(f"initial prompt:\n{sorted_prompt_scores}")
# 開練
while sorted_prompt_scores[0]['score']<stop_score:
    stop_run_num-=1
    if stop_run_num<0:
        break
    # 單純用來終止迴圈 你也可以interrupt
    if os.path.exists(os.path.join(os.getcwd(),'stop_true.txt')):
        break

    # 排序你的prompt 拿比較高分的給LLM當example
    sorted_prompt_scores = sorted(prompt_scores, key=lambda x: x['score'], reverse=True)

    # 整理example格式
    example=""
    for p in sorted_prompt_scores[:example_num]:
        example+=f"""[Old prompt]:"{p['prompt']}"
        [Scores]:{p['score']}"""

    # 整理輸入(可以print出來檢查)
    input_to_temperature_llm=f"""You are an expert at crafting prompts.
    Based on the example task given below for an LLM, fill in the most suitable prompt in the place marked [new_prompt].
    The following describes the task you will undertake:

    "
    Article excerpt:
    [article_chunk]

    The above is the article excerpt related to my question.
    Below is the question I want to ask.
    Please select the text content that can answer this question.
    [new_prompt]

    Question:
    [input_question]
    "

    Here are some example prompts and their scores, ranging from 0 to 100, with higher scores indicating better performance.
    Please help me think of a unique new_prompt where higher scores are better.

    {example}

    ### You only need to return the new_prompt ###
    DON'T return the [Scores] or explanation.
    Your new_prompt:__"""

    new_prompt=get_breeze_t(input_to_temperature_llm,1)
    print("="*50)
    print(new_prompt)
    if prompt_not_in_list(prompt_scores,new_prompt):
        score=get_score(3000,10,prompt,training_data)
        print("*"*50)
        print(f"{new_prompt}\n{score}")
        prompt_scores.append({'prompt': new_prompt, 'score': score})
    else:
        print("prompt exict")
summary_prompt=sorted_prompt_scores[0]['prompt']
print(f"""新的Prompt: 
      {summary_prompt}""")

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

In [25]:
sorted_prompt_scores

[{'prompt': 'The text content should be concise and follow a universal structure.\n    To ensure a correct answer, the process of creating a summary should focus on identifying the main points and key details of the text.\n    It should avoid including specific content or names from the original article and instead provide a general overview of the information.\n    The summary should follow a universal structure, presenting the main idea and supporting details in a clear and concise manner.\n    By following these guidelines, the summary can accurately reflect the content of the text and lead to a correct answer.',
  'score': 7},
 {'prompt': ' "The text content should be concise and follow a universal structure. To ensure a correct answer, the process of creating a summary should focus on identifying the main points and key details of the text. It should avoid including specific content or names from the original article and instead provide a general overview of the information. The s

# 用新prompt實際跑實驗

In [None]:
def run(input_data,summary_prompt,chunck_size,overlap):
    # 輸出結果
    my_result=[]
    # 錯誤資料
    error_list=[]
    # 總分數
    scores=0 
    # 資料計數器
    num=0 

    for each_data in tqdm(input_data):
        try:
            # 輸入的內容與答案
            input_content=each_data['content']
            input_question=each_data['question']
            truth_answer_number=each_data['answer']

            result=os_ap_sss_answer(summary_prompt,input_content,input_question,chunck_size,overlap)
            # 對答案計分數
            if pick_up(result)==truth_answer_number:
                scores+=1
            num+=1

            # print出目前得分正確率
            print(f"score:{scores}/{num} :({scores/num*100}%)")
            my_result.append(result)
            
        except Exception as e:
            error_list.append(each_data)
            print(e)
            print(f"目前錯誤數量:{len(error_list)}")
    return f"score:{scores}/{num} :({scores/num*100}%)",my_result,error_list


In [None]:
score,results,error_results=run(data[:100],summary_prompt,3000,10)

In [None]:
results