In [None]:
from rich import print
import os
import json

In [None]:
# Importing

!pip install openai
from openai import OpenAI

In [None]:
# Setting up environment

!pip install python-dotenv
from dotenv import load_dotenv

In [None]:
# Load datasets

!pip install datasets
from datasets import Dataset, DatasetDict, load_dataset

In [None]:
# Define and generate subtopics

subtopics = 5

PROMPT_TEMPLATE = """\
Create a synthetic dataset of natural language and Git commands. Give me {subtopics} subtopics
to cover what needs to be covered while working with Git.
The list must be without numbers, and without any description of the subtopics. 
The subtopics must be separated by a comma. There must be no other text than the list.
"""

def generate_subtopics(client, subtopics):
    prompt = PROMPT_TEMPLATE.format(subtopics=subtopics)
    response = client.chat.completions.create(
        model = MODEL,
        messages = [
            { "role": "user",
             "content": prompt }
        ],
        temperature = 0.2,
        top_p = 0.7,
    )
    return response

responses = generate_subtopics(client, subtopics = subtopics)
print(responses.choices[0].message.content)


In [None]:
# Define and generate instructions

instructions = 100

INSTRUCTION_PROMPT_TEMPLATE = """\
The objective is to create a dataset of user instructions in natural language that should be returned by Git commands.
Given a topic in Git, generate {instructions} possible concise instructions that could be given to an AI assitant about that topic.
Write some of these instructions as if given by someone with limited knowledge of Git terminologies and knowledge,
like a beginner programmer. Your response should be in a list format.

The topic is: {sub_topic}
The list must be without numbers. The questions/instructions should be separated by a newline character. There must be no other text than the list.
"""

subtopic_list = responses.choices[0].message.content.split(",")
def generate_instructions(client, sub_topic, instructions):
    print(f"Generating Instructions for {sub_topic}.")
    prompt = INSTRUCTION_PROMPT_TEMPLATE.format(sub_topic=sub_topic, instructions=instructions)
    response = client.chat.completions.create(
        model=MODEL,
        messages = [
            {"role": "user",
             "content": prompt}
        ],
        temperature=0.2,
        top_p=0.7,
    )
    return response.choices[0].message.content


In [None]:
def instructions_generator(client, subtopic_list, instructions):
    instruction_list = [generate_instructions(client, subtopic, instructions) for subtopic in subtopic_list]
    return instruction_list

instruction_list = instructions_generator(client, subtopic_list, instructions)

instruction_list_formatted = []
for instruction_set in instruction_list:
    instruction_list_formatted.extend([instruction.strip() for instruction in instruction_set.split("\n") if instruction])
print(instruction_list_formatted)

In [None]:
# Define response template

RESPONSE_PROMPT_TEMPLATE = """\
Given an question/instruction related to Git, generate a response that could be given.
Keep the response on-topic, informative, concise.

The user prompt is: {instruction}
"""
def generate_responses(client, instruction):
    prompt = RESPONSE_PROMPT_TEMPLATE.format(instruction=instruction)
    response = client.chat.completions.create(
        model = MODEL,
        messages = [
            {"role": "user",
             "content": prompt}
        ],
        temperature = 0.2,
        top_p = 0.7,
        max_tokens = 1024,
    )
    if isinstance(response, str):
        print(f"API returned a string: {response}")
        # Error handling
        return "Error: API returned a string."
    else:
        return response.choices[0].message.content

In [None]:
# Generate response

def response_generator(client, instruction_list):
    response_list = [generate_responses(client, instruction) for instruction in instruction_list]
    return response_list

instruction_response_list = []
instruction_response_list = response_generator(client, instruction_list_formatted)
print(instruction_response_list)
instruction_response_pair_list = []
for instruction, response in zip(instruction_list_formatted, instruction_response_list):
    instruction_response_pair_list.append(
        {
            "instruction": instruction,
            "responses": response,
        }
    )

print(instruction_response_pair_list)