# **反思微调（Reflection-Tuning）**

In [1]:
# 使用sys.path添加上级目录
import sys
import os
package_path = os.path.dirname(os.path.dirname(os.getcwd()))
file_path = os.path.join(package_path, "ch07", "03_model-evaluation")
print(file_path)
sys.path.append(file_path)

import torch
if torch.cuda.is_available():
   device = torch.device("cuda")
elif torch.backends.mps.is_available():
   device = torch.device("mps")
else:
   device = torch.device("cpu")

/Users/young/project/llmProject/LLMs-from-scratch-CN/ch07/03_model-evaluation


## 测试 OpenAI API (使用deepseek)

In [2]:
def run_chatgpt(prompt, client, model="gpt-4-turbo", system_prompt=None):
    messages = []

    # 增加system prompt
    if system_prompt:
        messages.append({"role": "system", "content": system_prompt})

    messages.append({"role": "user", "content": prompt})

    response = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=0.0,
        seed=123,
    )

    return response.choices[0].message.content

In [4]:
from openai import OpenAI
from config_manager import config_manager

config = config_manager.config
api_key = config.get('api', {}).get('deepseek_api_key')
if not api_key:
    raise ValueError("需要提供API密钥")
model = config.get('api', {}).get('model', 'deepseek-chat')

# 初始化OpenAI客户端来访问Deepseek API
client = OpenAI(
    api_key=api_key,
    base_url=config.get('api', {}).get('deepseek_api_url')
)

In [5]:
# 准备输入
prompt = f"Respond with 'hello world' if you got this message."
run_chatgpt(prompt, client, model)

'hello world'

In [7]:
## 加载json数据
import json
from pathlib import Path

data_dir = "../../ch07/01_main-chapter-code/"
json_file =  "instruction-data.json"

with open(data_dir + json_file, "r") as file:
    json_data = json.load(file)

print("Number of entries:", len(json_data))

Number of entries: 1100


In [8]:
from pprint import pp as pprint

pprint(json_data[0])

{'instruction': 'Evaluate the following phrase by transforming it into the '
                'spelling given.',
 'input': 'freind --> friend',
 'output': 'The spelling of the given phrase "freind" is incorrect, the '
           'correct spelling is "friend".'}


## 优化指令数据

In [9]:
# 优化指令数据的工具函数
def instr_prompt_no_input(ins, outp):

    sys_prompt = "You are a helpful, precise but picky assistant for checking the quality of a given instruction."
    prompt_template = "[Instruction]\n{ins}\n\n[The Start of Answer]\n{outp}\n\n[The End of Answer]\n\n[System]\n{criteria}\n\n"
    criteria = "We would like you to answer several questions related to the quality of a given instruction. \n" + \
                "1. Why this instruction is not good? First analyse the instruction based on Complexity of the Topic, Level of Detail Required, Knowledge Required, Ambiguity of the Instruction and Logical Reasoning or Problem-Solving Involved. \n" + \
                "Then analyse why this answer is not good for the given instruction? Analyse based on the Helpfulness, Relevance, Accuracy and Level of Details. \n" + \
                "Finally analyse why this bad instruction lead to a bad answer. " +\
                "2. Based on the reason you provided, generate a new and complete instruction which is complex and difficult to answer directly. " + \
                "Make sure the new instruction is relevent but independent to the original instruction, which can be answered without knowing the original instruction, put the new instruction in the format of [New Instruction] your instruction [End]" +\
                "3. Answer the newly generated instruction as detailed as possible, in the format of [New Answer] your answer [End] \n"
    prompt = prompt_template.format(
        ins=ins, outp=outp, criteria=criteria
    )
    return sys_prompt, prompt

In [10]:
# 查看原始的指令数据
pprint(json_data[2])

{'instruction': 'Convert 45 kilometers to meters.',
 'input': '',
 'output': '45 kilometers is 45000 meters.'}


In [12]:
# 使用工具函数优化指令数据
entry = json_data[2]
sys_prompt, prompt = instr_prompt_no_input(entry["instruction"], entry["output"])
output = run_chatgpt(prompt, client, model, system_prompt=sys_prompt)
print(output)

### Analysis of the Instruction and Answer

**1. Why the Instruction is Not Good:**
- **Complexity of the Topic:** The instruction is overly simplistic. Converting kilometers to meters is a basic unit conversion that requires minimal effort or thought.
- **Level of Detail Required:** The instruction does not ask for any explanation or intermediate steps, just a direct conversion.
- **Knowledge Required:** The knowledge required is very basic (understanding the metric system's prefixes).
- **Ambiguity of the Instruction:** There is no ambiguity—the instruction is clear but trivial.
- **Logical Reasoning or Problem-Solving Involved:** Almost none. The conversion is straightforward and does not require problem-solving.

**2. Why the Answer is Not Good for the Given Instruction:**
- **Helpfulness:** The answer is correct but provides no additional value (e.g., no explanation or context).
- **Relevance:** The answer is relevant but too brief.
- **Accuracy:** The answer is accurate but lacks

In [13]:
# 提取优化后的指令和回答
import re

# 提取优化后指令的工具函数
def extract_ins(text, no_input=True):
    if '[New Instruction]' in text:
        pattern = r'(\[New Instruction\])(.*?)(\[End\]|\[New Answer\]|New Answer:)'
    else:
        pattern = r'(New Instruction:)(.*?)(\[End\]|\[New Answer\]|New Answer:)'
    segments = re.findall(pattern, text, re.DOTALL)
    if len(segments) == 0:
        seg_ins = ''
    else:
        seg_ins = segments[0][1].strip()
    if seg_ins.endswith("\n\n3."):
        seg_ins = seg_ins[:-4]
    return seg_ins

# 提取优化后回答的工具函数
def extract_oup(text, no_input=True):
    if '[New Answer]' in text:
        pattern = r'(\[New Answer\])(.*?)(\[End\]|$)'
    else:
        pattern = r'(New Answer:)(.*?)(\[End\]|$)'
        # pattern = r'(\[New Answer\]|New Answer:)(.*?)(\[End\]|$)'
    segments = re.findall(pattern, text, re.DOTALL)
    if len(segments) == 0:
        seg_oup = ''
    else:
        seg_oup = segments[0][1].strip()
    return seg_oup

# 提取优化后指令和回答的工具函数
def extract_instruction(text):
    if text == '':
        return []
    seg_ins = extract_ins(text, no_input=True)
    seg_oup = extract_oup(text, no_input=True)
    return [seg_ins, seg_oup]

In [15]:
# 准备输入
prompt = f"Respond with 'hello world' if you got this message."
run_chatgpt(prompt, client, model)

'hello world'

In [16]:
# 从之前的output中提取优化后的指令和回答
new_instr, new_outp = extract_instruction(output)

In [17]:
print(new_instr)

Explain the process of converting kilometers to meters, including the mathematical reasoning behind the conversion factor. Additionally, discuss why the metric system uses a base-10 system for such conversions and how this compares to other unit systems (e.g., imperial). Provide examples of real-world scenarios where this conversion might be critical.


In [18]:
print(new_outp)

**Process of Converting Kilometers to Meters:**  
1. **Understanding the Units:**  
   - A kilometer (km) is a unit of length in the metric system equal to 1,000 meters (m).  
   - The prefix "kilo-" denotes a factor of 1,000 in the metric system.  

2. **Mathematical Reasoning:**  
   - To convert kilometers to meters, multiply the number of kilometers by 1,000.  
   - Example: \( 45 \text{ km} \times 1,000 = 45,000 \text{ m} \).  

3. **Why Base-10?**  
   - The metric system is decimal-based, meaning it uses powers of 10 for conversions. This makes calculations straightforward because shifting the decimal point is all that’s required.  
   - For example, converting 45 km to meters involves moving the decimal point 3 places to the right (since \( 10^3 = 1,000 \)).  

4. **Comparison to Imperial System:**  
   - The imperial system (e.g., miles to feet) does not use a base-10 system. For example, 1 mile = 5,280 feet, which is not intuitive or easy to calculate.  
   - The metric syste

## 优化响应

In [19]:
# 响应优化的工具函数
def res_gen_prompt_no_input(ins, outp):

    sys_prompt = "You are a helpful, precise but picky assistant for checking the quality of the answer to a given instruction."
    prompt_template = "[Instruction]\n{ins}\n\n[The Start of Answer]\n{outp}\n\n[The End of Answer]\n\n[System]\n{criteria}\n\n"
    criteria = "We would like you to answer several questions related to the quality of the answer to the given instruction. \n" + \
                "1. Why this answer is not good for the given instruction? Analyse based on the Helpfulness, Relevance, Accuracy and Level of Details. \n" + \
                "2. Based on the reason you provided, generate a better answer, new and complete, as detailed as possible, in the format of [Better Answer] your answer [End] \n" 
    prompt = prompt_template.format(
        ins=ins, outp=outp, criteria=criteria
    )
    return sys_prompt, prompt


def res_gen_prompt_input(ins, inp, outp):

    sys_prompt = "You are a helpful and precise assistant for checking the quality of the answer to a given instruction and its input."
    prompt_template = "[Instruction]\n{ins}\n\n[The Start of Input]\n{inp}\n\n[The End of Input]\n\n[The Start of Answer]\n{outp}\n\n[The End of Answer]\n\n[System]\n{criteria}\n\n"
    criteria = "We would like you to answer several questions related to the quality of the answer to the given instruction and corresponding input. \n" + \
                "1. Why this answer is not good for the given instruction and corresponding input? Analyse based on the Helpfulness, Relevance, Accuracy and Level of Details. \n" + \
                "2. Based on the reason you provided, generate a better answer, new and complete, as detailed as possible, in the format of [Better Answer] your answer [End] \n" 
    prompt = prompt_template.format(
        ins=ins, inp=inp, outp=outp, criteria=criteria
    )
    return sys_prompt, prompt

In [21]:
# 查看响应优化的效果
entry = json_data[2]

system_prompt, prompt = res_gen_prompt_no_input(ins=entry["instruction"], outp=entry["output"])
output = run_chatgpt(prompt=prompt, client=client, model=model, system_prompt=system_prompt)

print(output)

1. Analysis of the Answer:
   - **Helpfulness**: The answer is helpful as it provides the correct conversion from kilometers to meters.
   - **Relevance**: The answer is relevant to the instruction, as it directly addresses the request to convert 45 kilometers to meters.
   - **Accuracy**: The answer is accurate, as 45 kilometers is indeed equal to 45,000 meters.
   - **Level of Details**: The answer lacks detail. It does not explain the conversion process or the relationship between kilometers and meters, which could be useful for someone learning about unit conversions.

2. Better Answer:
[Better Answer]
To convert 45 kilometers to meters, you need to understand the relationship between kilometers and meters. 
- 1 kilometer (km) is equal to 1,000 meters (m). 
- Therefore, to convert kilometers to meters, you multiply the number of kilometers by 1,000. 

Calculation:
45 km × 1,000 m/km = 45,000 m

So, 45 kilometers is equal to 45,000 meters.
[End]


In [22]:
# 提取优化后的响应
def extract_response(text):
    if text.count('[Better Answer]') >= 2:
        pattern = r'\[(Better Answer)\](.*?)(\[End\]|\[Better Answer\]|$)'
        segments = re.findall(pattern, text, re.DOTALL)
    else:
        # pattern = r'\[(Better Answer)\](.*?)\[End\]'
        pattern = r'\[(Better Answer)\](.*?)(\[End\]|End|$)'
        segments = re.findall(pattern, text, re.DOTALL)
    return [segment[1].strip() for segment in segments]

In [23]:
response = extract_response(output)[0]
print(response)

To convert 45 kilometers to meters, you need to understand the relationship between kilometers and meters. 
- 1 kilometer (km) is equal to 1,000 meters (m). 
- Therefore, to convert kilometers to meters, you multiply the number of kilometers by 1,000. 

Calculation:
45 km × 1,000 m/km = 45,000 m

So, 45 kilometers is equal to 45,000 meters.


## 优化数据集

In [24]:
data_to_process = json_data[:3]

In [25]:
from tqdm import tqdm


def reflect_instructions(json_data, client):
    new_json_data = []

    for entry in tqdm(json_data):

        if not entry["input"]:
            system_prompt, prompt = instr_prompt_no_input(ins=entry["instruction"], outp=entry["output"])
            output = run_chatgpt(prompt=prompt, client=client, model=model, system_prompt=system_prompt)
            new_instr, new_outp = extract_instruction(output)
            new_entry = {"instruction": new_instr, "input": "", "output": new_outp}
            new_json_data.append(new_entry)
        else:
            new_json_data.append(entry)
    
    return new_json_data

In [26]:
data_to_process = json_data[:3]

new_json_data = reflect_instructions(data_to_process, client)

100%|██████████| 3/3 [00:33<00:00, 11.15s/it]


In [27]:
for i in new_json_data:
    pprint(i)
    print("\n\n")

{'instruction': 'Evaluate the following phrase by transforming it into the '
                'spelling given.',
 'input': 'freind --> friend',
 'output': 'The spelling of the given phrase "freind" is incorrect, the '
           'correct spelling is "friend".'}



{'instruction': 'Edit the following sentence for grammar.',
 'input': 'He go to the park every day.',
 'output': 'He goes to the park every day.'}



{'instruction': 'Explain the process of converting kilometers to meters, '
                'including the mathematical reasoning behind the conversion '
                'factor. Then, apply this process to convert 45 kilometers to '
                'meters, showing all steps. Finally, discuss a real-world '
                'scenario where such a conversion might be necessary.',
 'input': '',
 'output': '**Process of Converting Kilometers to Meters:**  \n'
           '1. **Understanding the Units:**  \n'
           '   - The metric system is based on powers of 10. \n'
           '

In [29]:
with open("instruction-reflected.json", "w") as file:
    json.dump(new_json_data, file, indent=4)

## 响应反思微调

In [30]:
data_to_process = json_data[:3]

In [33]:
def reflect_responses(json_data, client):
    new_json_data = []

    for entry in tqdm(json_data):
        if not entry["input"]:
            system_prompt, prompt = res_gen_prompt_no_input(ins=entry["instruction"], outp=entry["output"])
            output = run_chatgpt(prompt=prompt, client=client, model=model, system_prompt=system_prompt)
            new_response = extract_response(output)

            if not len(new_response):
                new_response = entry["output"]
            
            new_entry = {
                "instruction": entry["instruction"],
                "input": "",
                "output": new_response[0]
            }
            new_json_data.append(new_entry)
        else:
            system_prompt, prompt = res_gen_prompt_input(ins=entry["instruction"], inp=entry["input"], outp=entry["output"])
            output = run_chatgpt(prompt=prompt, client=client, model=model, system_prompt=system_prompt)
            new_response = extract_response(output)

            if not len(new_response):
                new_response = entry["output"]
            
            new_entry = {
                "instruction": entry["instruction"],
                "input": "",
                "output": new_response[0]
            }
            new_json_data.append(new_entry)
    
    return new_json_data


In [34]:
new_json_data = reflect_responses(data_to_process, client)

100%|██████████| 3/3 [00:49<00:00, 16.36s/it]


In [35]:
for i in new_json_data[:3]:
    pprint(i)
    print("\n\n")

{'instruction': 'Evaluate the following phrase by transforming it into the '
                'spelling given.',
 'input': '',
 'output': '**  \n'
           '   The word "freind" is a common misspelling of the correct term '
           '"friend." The correct spelling follows the rule "i before e except '
           'after c," which applies here ("friend" has the "ie" sequence). To '
           'remember this, you can use the mnemonic: "A FRIEND is always there '
           'till the END" (note the "end" in "friend"). The incorrect spelling '
           '"freind" swaps the "i" and "e," violating the standard spelling '
           'rule. Always double-check such words to ensure accuracy in '
           'writing.'}



{'instruction': 'Edit the following sentence for grammar.',
 'input': '',
 'output': 'The original sentence, "He go to the park every day," contains a '
           'grammatical error. The verb "go" should be conjugated as "goes" to '
           'agree with the singular third

In [36]:
with open("response-reflected.json", "w") as file:
    json.dump(new_json_data, file, indent=4)