In [None]:
!pip install openai

In [None]:
import os
import random
from textwrap import dedent
import openai
import re
import pandas as pd

In [None]:
os.environ['OPENAI_API_KEY'] = ''
os.environ['SERPAPI_API_KEY'] = ''
openai.api_key = os.environ['OPENAI_API_KEY']

def remove_whitespace(sentence):
    '''remove additional white space in prompt'''
    pattern = re.compile(r'\s+')
    return re.sub(pattern, " ", sentence)

# 1) Generate Data with Prompt

In [None]:
def generate_example(prompt, prev_examples, temperature=.5):
    messages=[
        {
            "role": "system",

            "content": remove_whitespace(
                f'''
                You are generating data which will be used to train a machine learning model. \
                You will be given a high-level description of the model we want to train, \
                and from that, you will generate data samples, each with a prompt/response pair. \
                You will do so in this format:
                ```\nprompt\n-----------\nresponse_goes_here\n-----------\n```
                Only one prompt/response pair should be generated per turn. \
                For each turn, make the example slightly more complex than the last, while ensuring diversity. \
                Make sure your samples are unique and diverse, \
                yet high-quality and complex enough to train a well-performing model. \
                \n\nHere is the type of model we want to train:\n`{prompt}` \
                ''')
        }
    ]

    if len(prev_examples) > 0:
        if len(prev_examples) > 10:
            prev_examples = random.sample(prev_examples, 10)
        for example in prev_examples:
            messages.append({
                "role": "assistant",
                "content": example
            })

    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=messages,
        temperature=temperature,
        max_tokens=1028,
    )
    return response.choices[0].message['content']

In [None]:
### Generate examples ###
prompt = remove_whitespace(
    '''\
    A model that takes in love life related questions in English, \
    and responds with a short, sexy and funny response that always come with dirty jokes\
    '''
)

temperature = .4
number_of_examples = 5
prev_examples = []

for i in range(number_of_examples):
    print(f'Generating example {i}')
    example = generate_example(prompt, prev_examples, temperature)
    prev_examples.append(example)
    print(example)

Generating example 0
Prompt: How do I find love?

-----------
Response: Love is like a fart, if you have to force it, it's probably crap. But hey, don't worry, I'm here to help you navigate through the maze of love and find that special someone who will make your heart skip a beat and your pants feel a little tighter. Let's dive into the wild world of dating and find you a love that's as hot as a jalapeño pepper!
Generating example 1
Prompt: What should I wear on a first date?

-----------
Response: Well, my dear friend, the key to a successful first date outfit is to strike the perfect balance between looking sexy and leaving a little something to the imagination. You want to make them think, "Damn, they look good!" while also making them wonder what's underneath those clothes. So, how about a little black dress that hugs your curves in all the right places, paired with some killer heels that will make their jaw drop? Trust me, you'll have them eating out of the palm of your hand in n

# 2) Generate System Message

In [None]:
def generate_system_message(prompt):
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
          {
            "role": "system",
            "content": remove_whitespace(
                f'''\
                You will be given a high-level description of the model we are training, \
                and from that, you will generate a simple system prompt for that model to use. \
                Remember, you are not generating the system message for data generation -- \
                you are generating the system message to use for inference. A good format to follow is \
                `Given WHAT_THE_MODEL_SHOULD_DO.`. \
                \n\nMake it as concise as possible. Include nothing but the system prompt in your response.\
                \n\nFor example, never write: `\"SYSTEM_PROMPT_HERE`. \
                '''
                )
          },
          {
              "role": "user",
              "content": prompt.strip(),
          }
        ],
        temperature=temperature,
        max_tokens=500,
    )
    return response.choices[0].message['content']

system_message = generate_system_message(prompt)
print(f'The system message is: `{system_message}`')

The system message is: `Given a love life related question, respond with a short, sexy and funny response that includes a dirty joke.`


# 3) Saving Data

In [None]:
prev_examples

["Prompt: How do I find love?\n\n-----------\nResponse: Love is like a fart, if you have to force it, it's probably crap. But hey, don't worry, I'm here to help you navigate through the maze of love and find that special someone who will make your heart skip a beat and your pants feel a little tighter. Let's dive into the wild world of dating and find you a love that's as hot as a jalapeño pepper!",
 'Prompt: What should I wear on a first date?\n\n-----------\nResponse: Well, my dear friend, the key to a successful first date outfit is to strike the perfect balance between looking sexy and leaving a little something to the imagination. You want to make them think, "Damn, they look good!" while also making them wonder what\'s underneath those clothes. So, how about a little black dress that hugs your curves in all the right places, paired with some killer heels that will make their jaw drop? Trust me, you\'ll have them eating out of the palm of your hand in no time!',
 "Prompt: How can 

In [None]:
prompts = []
responses = []

# Parse out prompts and responses from examples
for example in prev_examples:
  try:
    split_example = example.split('-----------')
    prompts.append(split_example[0].strip())
    responses.append(split_example[1].strip())
  except:
    pass

# Create a DataFrame
df = pd.DataFrame({
    'prompt': prompts,
    'response': responses
})

# Remove duplicates
df = df.drop_duplicates()
df.head()

# Split the data into train and test sets, with 90% in the train set
# train_df = df.sample(frac=0.9, random_state=42)
# test_df = df.drop(train_df.index)

# Save the dataframes to .jsonl files
# train_df.to_json('train.jsonl', orient='records', lines=True)
# test_df.to_json('test.jsonl', orient='records', lines=True)

Unnamed: 0,prompt,response
0,Prompt: How do I find love?,"Response: Love is like a fart, if you have to ..."
1,Prompt: What should I wear on a first date?,"Response: Well, my dear friend, the key to a s..."
2,Prompt: How can I make my partner feel special?,"Response: Ah, making your partner feel special..."
3,Prompt: How do I keep the spark alive in a lon...,"Response: Ah, the eternal quest to keep the fl..."
4,Prompt: How do I handle jealousy in a relation...,"Response: Ah, jealousy, the green-eyed monster..."
