In [52]:
import urllib.request
import json

def query_model(prompt, model="llama3.2:1b", url="http://localhost:11434/api/chat"):
    data = {
        "model": model,
        "messages": [
            {
                "role": "user",
                "content": prompt
            }
        ],
        "options": {
            "seed": 123,
            "temperature": 1
        }
    }
    # print(data)

    payload = json.dumps(data).encode("utf-8")

    request = urllib.request.Request(url, data=payload, method="POST")
    request.add_header("Content-Type", "application/json")

    response_data = ""
    with urllib.request.urlopen(request) as response:
        while True:
            line = response.readline().decode("utf-8")
            if not line:
                break
            response_json = json.loads(line)
            response_data += response_json["message"]["content"]

    return response_data


In [22]:
result = query_model("what is your name")

{'model': 'llama3.2:1b', 'messages': [{'role': 'user', 'content': 'what is your name'}], 'options': {'seed': 123, 'temperature': 1}}


In [23]:
print(result)

I'm an artificial intelligence model known as Llama. Llama stands for "Large Language Model Meta AI."


In [27]:
from pathlib import Path 

json_file = Path("/Users/777bhavyagoyal/Developer/llmfromscratch/instruction-data.json")

with open(json_file,"r") as f:
    data = json.load(f)

In [28]:
data[0]

{'instruction': 'Evaluate the following phrase by transforming it into the spelling given.',
 'input': 'freind --> friend',
 'output': 'The spelling of the given phrase "freind" is incorrect, the correct spelling is "friend".'}

In [29]:
len(data)

1100

In [30]:
def format_input(entry):
    instruction_text = (
        f"Below is an instruction that describes a task. Write a response that "
        f"appropriately completes the request."
        f"\n\n### Instruction:\n{entry['instruction']}"
    )

    input_text = f"\n\n### Input:\n{entry['input']}" if entry["input"] else ""
    instruction_text + input_text

    return instruction_text + input_text

In [33]:
print(format_input(data[0]))

Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
Evaluate the following phrase by transforming it into the spelling given.

### Input:
freind --> friend


In [34]:
import random 

for entry in data[:5]:
    politeness = random.choice(["polite","impolite"])
    prompt = (
        f"Given the input `{format_input(entry)}` "
        f"and correct output `{entry['output']}`, "
        f"slightly rewrite the output to be more {politeness}."
        "Keep the modification minimal."
        "Only return return the generated response and nothing else."
    )
    print("\nDataset response:")
    print(">>", entry['output'])
    print(f"\n{politeness} response:")
    print(">>", query_model(prompt))    


Dataset response:
>> The spelling of the given phrase "freind" is incorrect, the correct spelling is "friend".

polite response:
{'model': 'llama3.2:1b', 'messages': [{'role': 'user', 'content': 'Given the input `Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nEvaluate the following phrase by transforming it into the spelling given.\n\n### Input:\nfreind --> friend` and correct output `The spelling of the given phrase "freind" is incorrect, the correct spelling is "friend".`, slightly rewrite the output to be more polite.Keep the modification minimal.Only return return the generated response and nothing else.'}], 'options': {'seed': 123, 'temperature': 1}}
>> "I'll take a closer look at the phrase 'freind' and help you find its correct spelling."

Dataset response:
>> He goes to the park every day.

polite response:
{'model': 'llama3.2:1b', 'messages': [{'role': 'user', 'content': 'Given the input `Below is

In [37]:
GROQ_API_KEY = "YOUR_API_KEY"

In [42]:
!pip install groq

Collecting groq
  Downloading groq-0.18.0-py3-none-any.whl.metadata (14 kB)
Downloading groq-0.18.0-py3-none-any.whl (121 kB)
Installing collected packages: groq
Successfully installed groq-0.18.0


In [44]:
from groq import  Groq
client = Groq(
    api_key=GROQ_API_KEY
)
def run_groq(prompt, client, model = 'mixtral-8x7b-32768', system_prompt = None):


    messages = []
    
    if system_prompt:
        messages.append(
            {
                "role":"system",
                "content":system_prompt
            }
        )

    messages.append(
        {
            "role":"user",
            "content":prompt
        }
    )  

    response = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=0.0,
        seed=123,
    )

    return response.choices[0].message.content

In [45]:
prompt = "Respond with 'hello world' if you got this message."
run_groq(prompt, client)

"Hello world! I'm glad to see that my code is working as expected. If you have any questions or need assistance with a specific topic, feel free to ask. I'm here to help!"

In [35]:
import random 
from tqdm import tqdm

def generate_model_responses(json_data):

    for i, entry in enumerate(tqdm(json_data,desc="Writing entries")):
        politeness = random.choice(["polite","rude"])
        prompt = (
            f"Given the input `{format_input(entry)}` "
            f"and correct output `{entry['output']}`, "
            f"slightly rewrite the output to be more {politeness}."
            "Keep the modification minimal."
            "Only return return the generated response and nothing else."
        )
        response = query_model(prompt)
        
        if politeness == 'polite':
            json_data[i]["chosen"] = response
            json_data[i]["rejected"] = entry["output"]

        else:
            json_data[i]["chosen"] = entry["output"]
            json_data[i]["rejected"]  = response    

In [53]:
generate_model_responses(data)

Writing entries: 100%|██████████| 1100/1100 [08:43<00:00,  2.10it/s]


In [60]:
data[990]

{'instruction': 'Explain the primary function of the human heart.',
 'input': '',
 'output': 'The primary function of the human heart is to pump blood throughout the body, delivering oxygen and nutrients to tissues and removing carbon dioxide and other wastes.',
 'chosen': 'The primary function of the human heart is to pump blood throughout the body, delivering oxygen and nutrients to tissues and removing carbon dioxide and other wastes.',
 'rejected': '```python\ndef explain_primary_function():\n    primary_function = "The human heart is a muscular organ that pumps blood throughout the body, delivering oxygen and nutrients to tissues and removing carbon dioxide and other wastes."\n    return corrected_output = "Ugh, really? Fine. The human heart pumps blood. Duh."\n\n# Test the function\nprint(explain_primary_function())\n```'}

In [61]:
with open("instruction-data-with-preference.json", "w") as file:
    json.dump(data, file, indent=4)

In [46]:
def instr_prompt_no_input(ins, outp):

    sys_prompt = "You are a helpful, precise but picky assistant for checking the quality of a given instruction."
    prompt_template = "[Instruction]\n{ins}\n\n[The Start of Answer]\n{outp}\n\n[The End of Answer]\n\n[System]\n{criteria}\n\n"
    criteria = "We would like you to answer several questions related to the quality of a given instruction. \n" + \
                "1. Why this instruction is not good? First analyse the instruction based on Complexity of the Topic, Level of Detail Required, Knowledge Required, Ambiguity of the Instruction and Logical Reasoning or Problem-Solving Involved. \n" + \
                "Then analyse why this answer is not good for the given instruction? Analyse based on the Helpfulness, Relevance, Accuracy and Level of Details. \n" + \
                "Finally analyse why this bad instruction lead to a bad answer. " +\
                "2. Based on the reason you provided, generate a new and complete instruction which is complex and difficult to answer directly. " + \
                "Make sure the new instruction is relevent but independent to the original instruction, which can be answered without knowing the original instruction, put the new instruction in the format of [New Instruction] your instruction [End]" +\
                "3. Answer the newly generated instruction as detailed as possible, in the format of [New Answer] your answer [End] \n"
    prompt = prompt_template.format(
        ins=ins, outp=outp, criteria=criteria
    )
    return sys_prompt, prompt

In [48]:

entry = data[2]

system_prompt, prompt = instr_prompt_no_input(ins=entry["instruction"], outp=entry["output"])

In [50]:
print(prompt)

[Instruction]
Convert 45 kilometers to meters.

[The Start of Answer]
45 kilometers is 45000 meters.

[The End of Answer]

[System]
We would like you to answer several questions related to the quality of a given instruction. 
1. Why this instruction is not good? First analyse the instruction based on Complexity of the Topic, Level of Detail Required, Knowledge Required, Ambiguity of the Instruction and Logical Reasoning or Problem-Solving Involved. 
Then analyse why this answer is not good for the given instruction? Analyse based on the Helpfulness, Relevance, Accuracy and Level of Details. 
Finally analyse why this bad instruction lead to a bad answer. 2. Based on the reason you provided, generate a new and complete instruction which is complex and difficult to answer directly. Make sure the new instruction is relevent but independent to the original instruction, which can be answered without knowing the original instruction, put the new instruction in the format of [New Instruction] 

In [51]:
output = run_groq(prompt=prompt, client=client, system_prompt=system_prompt)

print(output)

1. The instruction "Convert 45 kilometers to meters" is not complex, does not require a high level of detail, and does not involve problem-solving or logical reasoning. It is a straightforward conversion that requires basic knowledge of units of measurement.

The answer "45 kilometers is 45000 meters" is good because it is accurate, relevant, and provides the required level of detail. However, it could be improved by explaining the conversion process or providing additional context.

The bad instruction did not lead to a bad answer, but it did not encourage a complete or informative response.

2. [New Instruction] Explain the relationship between the metric system and the imperial system of measurement, including the historical context and the advantages and disadvantages of each system. [End]

3. [New Answer] The metric system and the imperial system are two different systems of measurement that originated in France and England, respectively. The metric system is based on powers of te