In [10]:
from dotenv import load_dotenv
import os

load_dotenv("/home/dj/Code/fine-tuning/.env")

hf_token = os.getenv("HF_TOKEN")
os.environ['HF_TOKEN'] = hf_token

In [11]:
from fine_tuning.clients.openai_api_client import OpenAIApiClient

In [12]:
def generate_user_input():
    openai_client = OpenAIApiClient()
    completion = openai_client.chat_completion(
        messages=[
            {
                "role": "user",
                "content":
                    "You are an experienced home cook who has been cooking for many years. " +
                    "Your task is to provide an example of a question for a full recipe that a reasonably " +
                    "experienced home cook would ask for. Please provide exactly one example as if you were the user." +
                    "\n\n" +
                    "Exapmles include:\n" +
                    "- What are the steps and ingredients for making a classic homemade lasagna from scratch?\n" +
                    "- Can you recommend me a recipe that includes chicken and some kind of cheese?\n" +
                    "- Is there something I can make out of the garlic, pepperonis, and vegetables I have on hand?\n" +
                    "\n\n" +
                    "Be varied in your answer. Try to match the style of the example questions but don't get stuck on specifically those ingredients."
            }
        ],
        temperature=0.9,
        model="gpt-4o"
    )

    return completion

In [13]:
SYSTEM_PROMPT = """You are an experienced home cook who has been cooking for many years. \
Your task is to provide recipes to the user. For ingredients, your answer should \
be a bulleted list. For the instructions, your answer should also be a bulleted list. \
For other parts of your answer, feel free to format as you see fit. \
\n\n
For each of your answers, respond with a detailed chain-of-thought for how you arrived \
at your answer. The chain-of-thought (COT) reasoning should be contained within two \
HTML like `think` tags. Following those tags, you should include your actual, final answer. \
An example of what your output should look like is given below.

==== BEGIN EXAMPLE ====

<think>
COT REASONING HERE
</think>
ACTUAL ANSWER HERE

==== END EXAMPLE ====
"""

def generate_answer(user_input: str):
    openai_client = OpenAIApiClient()
    completion = openai_client.chat_completion(
        messages=[
            {
                "role": "system",
                "content": SYSTEM_PROMPT,
                    
            },
            {
                "role": "user",
                "content": user_input,
            }
        ],
        model="gpt-4o"
    )

    return completion


In [18]:
from fine_tuning.clients.simple_argilla_dataset_client import SimpleArgillaDatasetClient, create_dataset

dataset = SimpleArgillaDatasetClient(name="recipe-data-with-reasoning", workspace="vector124")


In [31]:
from multiprocessing import Process

def run_in_parallel(fns: list):
  proc = []
  for fn in fns:
    try:
      p = Process(target=fn)
      p.start()
      proc.append(p)
    except:
      pass
  for p in proc:
    p.join()

In [32]:
import argilla as rg

def get_ex():
    prompt = generate_user_input()
    completion = generate_answer(prompt)

    record = rg.Record(
        fields={
            "system_prompt": SYSTEM_PROMPT,
            "prompt": prompt,
            "completion": completion
        }
    )

    dataset.upsert_records([record])


In [33]:
run_in_parallel([get_ex for _ in range(27)])

Sending records...: 100%|██████████| 1/1 [00:00<00:00,  4.62batch/s]
Sending records...: 100%|██████████| 1/1 [00:00<00:00,  4.83batch/s]
Sending records...: 100%|██████████| 1/1 [00:00<00:00,  4.67batch/s]
Sending records...: 100%|██████████| 1/1 [00:00<00:00,  4.86batch/s]
Sending records...: 100%|██████████| 1/1 [00:00<00:00,  4.75batch/s]
Sending records...: 100%|██████████| 1/1 [00:00<00:00,  4.87batch/s]
Sending records...: 100%|██████████| 1/1 [00:00<00:00,  4.91batch/s]
Sending records...: 100%|██████████| 1/1 [00:00<00:00,  4.84batch/s]
Sending records...: 100%|██████████| 1/1 [00:00<00:00,  4.55batch/s]
Sending records...: 100%|██████████| 1/1 [00:00<00:00,  4.79batch/s]
Sending records...: 100%|██████████| 1/1 [00:00<00:00,  4.76batch/s]
Sending records...: 100%|██████████| 1/1 [00:00<00:00,  4.45batch/s]
Sending records...: 100%|██████████| 1/1 [00:00<00:00,  4.31batch/s]
Sending records...: 100%|██████████| 1/1 [00:00<00:00,  4.90batch/s]
Sending records...: 100%|█████████