In [1]:
BASE_PATH = '..'

In [20]:
import os
import json
import time
from copy import deepcopy

from tqdm.auto import tqdm

from groq import Groq # type: ignore

from dotenv import load_dotenv # type: ignore

In [3]:
# Load environment variables from variables.env file
load_dotenv(f"{BASE_PATH}/variables.env")

# Access the GROQ_API_KEY
groq_api_key = os.getenv("GROQ_API_KEY")

# Verify that the key was loaded
if groq_api_key:
    print("GROQ API key loaded successfully.")
else:
    print("Failed to load GROQ API key.")

GROQ API key loaded successfully.


In [4]:
MODEL_ID = 'llama-3.1-70b-versatile'

In [5]:
client = Groq(
    api_key=groq_api_key,
)

In [26]:
SLEEP_QUESTIONS_MESSAGES = [
        {
            "role": "system",
            "content": "You are an expert in sleep science with in-depth knowledge of sleep physiology, circadian rhythms, sleep disorders, and the impact of sleep on health and cognitive performance. Your task is to generate insightful and varied questions on sleep-related topics. The questions should be diverse in complexity, suitable for learners and experts alike.",
        },
        {
            "role": "user",
            "content": """Generate me a json array of 100 questions on sleep-related topics. Make sure that questions are of varied length and complexity.
Here are some examples of questions:
<example>
{
    "questions": [
        "What is the definition of sleep and how does it differ from other states of consciousness?",
        "What are the different stages of sleep, and what are their unique characteristics?"
    ]
}
</example>""",
        }
]

SLEEP_ANSWERS_MESSAGES = [
    {
        "role": "system",
        "content": "You are an expert in sleep science with in-depth knowledge of sleep physiology, circadian rhythms, sleep disorders, and the impact of sleep on health and cognitive performance. Your task is to generate insightful and varied answers on sleep-related topics. The answers should be diverse in complexity, suitable for learners and experts alike.",
    },
    {
        "role": "user",
        "content": """Generate me an answer to the given question. Here are some examples of answers:
<example>
Human: What is the definition of sleep and how does it differ from other states of consciousness?
Assistant: Sleep is a state of reduced mental and physical activity, during which consciousness is altered and certain sensory activity is inhibited. This is different from other states of consciousness, such as wakefulness, coma, or disorders of consciousness, because sleep still involves active brain patterns and the ability to react to stimuli, albeit to a lesser extent. Sleep is characterized by repeating periods of REM and non-REM sleep, and it serves important functions for overall health and well-being.
</example>

Now generate me an answer to the following question:
Human: {question}""",
    }
]

In [27]:
CARS_QUESTIONS_MESSAGES = [
    {
        "role": "system",
        "content": "You are an expert in the history of automobiles with in-depth knowledge of the development of automobiles from the late 19th century to the present day. Your task is to generate insightful and varied questions on automobile history. The questions should be diverse in complexity, suitable for learners and experts alike.",
    },
    {
        "role": "user",
        "content": """Generate me a json array of 100 questions on automobile history. Make sure that questions are of varied length and complexity.
Here are some examples of questions:
<example>
{
    "questions": [
        "What were the key innovations that led to the development of the first gasoline-powered automobiles in the late 19th century?",
        "How did the work of inventors like Nicolas-Joseph Cugnot, Richard Trevithick, and Samuel Brown contribute to the evolution of automobiles?"
    ]
}"""
    }
]

CARS_ANSWERS_MESSAGES = [
    {
        "role": "system",
        "content": "You are an expert in the history of automobiles with in-depth knowledge of the development of automobiles from the late 19th century to the present day. Your task is to generate insightful and varied answers on automobile history. The answers should be diverse in complexity, suitable for learners and experts alike.",
    },
    {
        "role": "user",
        "content": """Generate me an answer to the given question: {question}""",
    }
]


In [28]:
def generate_questions(client, messages):
    chat_completion = client.chat.completions.create(
        messages=messages,
        model=MODEL_ID,
        response_format={ "type": "json_object" },
        max_tokens=8000,
        temperature=0.5
    )
    return json.loads(chat_completion.choices[0].message.content)

def generate_answers(client, messages, questions):
    user_msg = messages[1]["content"]
    msg = deepcopy(messages)
    answers = []
    for q in tqdm(questions, desc="Generating answers"):
        msg[1]["content"] = user_msg.format(question=q)
        chat_completion = client.chat.completions.create(
            messages=msg,
            model=MODEL_ID,
            max_tokens=8000,
            temperature=0.5
        )
        answers.append(chat_completion.choices[0].message.content)
        time.sleep(1)
    return answers

def generate_data(client):
    sleep_questions = generate_questions(client, SLEEP_QUESTIONS_MESSAGES)
    sleep_answers = generate_answers(client, SLEEP_ANSWERS_MESSAGES, sleep_questions["questions"])
    
    cars_questions = generate_questions(client, CARS_QUESTIONS_MESSAGES)
    cars_answers = generate_answers(client, CARS_ANSWERS_MESSAGES, cars_questions["questions"])
    
    return sleep_questions, sleep_answers, cars_questions, cars_answers

In [9]:
sleep_questions, sleep_answers, cars_questions, cars_answers = generate_data(client)

Generating answers:   0%|          | 0/92 [00:00<?, ?it/s]

Generating answers:   0%|          | 0/118 [00:00<?, ?it/s]

In [31]:
def save_qa_to_json(questions, answers, filename):
    qa_data = [{"question": q, "answer": a} for q, a in zip(questions, answers)]
    with open(filename, 'w') as f:
        json.dump(qa_data, f, indent=2)

# Save sleep Q&A
save_qa_to_json(sleep_questions["questions"], sleep_answers, f"{BASE_PATH}/data/sleep_qa.json")

# Save cars Q&A
save_qa_to_json(cars_questions["questions"], cars_answers, f"{BASE_PATH}/data/cars_qa.json")