In [None]:
!pip install openai==0.28

Collecting openai==0.28
  Downloading openai-0.28.0-py3-none-any.whl.metadata (13 kB)
Downloading openai-0.28.0-py3-none-any.whl (76 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/76.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.5/76.5 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: openai
  Attempting uninstall: openai
    Found existing installation: openai 1.52.2
    Uninstalling openai-1.52.2:
      Successfully uninstalled openai-1.52.2
Successfully installed openai-0.28.0


In [None]:
import os
import openai
import random
import pandas as pd
# from tenacity import retry, stop_after_attempt, wait_exponential

In [None]:
prompt = 'A model that takes a paragraph summarizing a movie as input and, based on the summary, outputs the movie genre as one of the following: "action," "family," "romance," "crime," or "fantasy."'
temperature = 0.4
number_of_examples = 70

ANTHROPIC_API_KEY = "api key"
openai.api_key = ANTHROPIC_API_KEY

In [None]:
def generate_example(prompt, prev_examples, temperature=.5):
    messages=[
        {
            "role": "system",
            "content": f"You are generating data which will be used to train a machine learning model.\n\nYou will be given a high-level description of the model we want to train, and from that, you will generate data samples, each with a prompt/response pair.\n\nYou will do so in this format:\n```\nprompt\n-----------\n$prompt_goes_here\n-----------\n\nresponse\n-----------\n$response_goes_here\n-----------\n```\n\nOnly one prompt/response pair should be generated per turn.\n\nFor each turn, make the example slightly more complex than the last, while ensuring diversity.\n\nMake sure your samples are unique and diverse, yet high-quality and complex enough to train a well-performing model.\n\nHere is the type of model we want to train:\n`{prompt}`"
        }
    ]

    if len(prev_examples) > 0:
        if len(prev_examples) > 10:
            prev_examples = random.sample(prev_examples, 10)
        for example in prev_examples:
            messages.append({
                "role": "assistant",
                "content": example
            })

    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=messages,
        temperature=temperature,
        max_tokens=1354,
    )

    return response.choices[0].message['content']

In [None]:
def generate_system_message(prompt):

    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
          {
            "role": "system",
            "content": "You will be given a high-level description of the model we are training, and from that, you will generate a simple system prompt for that model to use. Remember, you are not generating the system message for data generation -- you are generating the system message to use for inference. A good format to follow is `Given $INPUT_DATA, you will $WHAT_THE_MODEL_SHOULD_DO.`.\n\nMake it as concise as possible. Include nothing but the system prompt in your response.\n\nFor example, never write: `\"$SYSTEM_PROMPT_HERE\"`.\n\nIt should be like: `$SYSTEM_PROMPT_HERE`."
          },
          {
              "role": "user",
              "content": prompt.strip(),
          }
        ],
        temperature=temperature,
        max_tokens=500,
    )

    return response.choices[0].message['content']

In [None]:
for i in range(number_of_examples):
    print(f'Generating example {i}')
    example = generate_example(prompt, prev_examples, temperature)
    prev_examples.append(example)

print(prev_examples)

Generating example 59
Generating example 60
Generating example 61
Generating example 62
Generating example 63
Generating example 64
Generating example 65
Generating example 66
Generating example 67
Generating example 68
Generating example 69
["prompt\n-----------\nIn a dystopian future, a skilled warrior named Max is forced to help a group of women escape from a tyrant named Immortan Joe. Max and the women must traverse a dangerous wasteland while being pursued by Joe's army. Along the way, they encounter various obstacles and enemies, but also form unexpected alliances. The film is filled with high-speed chases, explosive battles, and intense hand-to-hand combat.\n-----------\n\nresponse\n-----------\naction\n-----------", 'prompt\n-----------\nThe movie tells the story of a young girl named Sophie who encounters a strange and magical creature known as the BFG. The BFG, or Big Friendly Giant, takes Sophie to his home in Giant Country, where they embark on an adventure to capture dream

In [None]:
prev_examples

["prompt\n-----------\nIn a dystopian future, a skilled warrior named Max is forced to help a group of women escape from a tyrant named Immortan Joe. Max and the women must traverse a dangerous wasteland while being pursued by Joe's army. Along the way, they encounter various obstacles and enemies, but also form unexpected alliances. The film is filled with high-speed chases, explosive battles, and intense hand-to-hand combat.\n-----------\n\nresponse\n-----------\naction\n-----------",
 'prompt\n-----------\nThe movie tells the story of a young girl named Sophie who encounters a strange and magical creature known as the BFG. The BFG, or Big Friendly Giant, takes Sophie to his home in Giant Country, where they embark on an adventure to capture dreams and stop the other giants from eating human children. The film is filled with whimsical characters, magical elements, and a heartwarming friendship between Sophie and the BFG.\n-----------\n\nresponse\n-----------\nfamily\n-----------',
 "

In [None]:
system_message = generate_system_message(prompt)
print(f'The system message is: `{system_message}`. Feel free to re-run this cell if you want a better result.')

The system message is: `Given a summary of a movie, determine its genre from the following options: "action," "family," "romance," "crime," or "fantasy."`. Feel free to re-run this cell if you want a better result.


In [None]:
# Initialize lists to store prompts and responses
prompts = []
responses = []

# Parse out prompts and responses from examples
for example in prev_examples:
  try:
    split_example = example.split('-----------')
    prompts.append(split_example[1].strip())
    responses.append(split_example[3].strip())
  except:
    pass

# Create a DataFrame
df = pd.DataFrame({
    'prompt': prompts,
    'response': responses
})

# Remove duplicates
df = df.drop_duplicates()

print('There are ' + str(len(df)) + ' successfully-generated examples. Here are the first few:')

df.head()

There are 68 successfully-generated examples. Here are the first few:


Unnamed: 0,prompt,response
0,"In a dystopian future, a skilled warrior named...",action
1,The movie tells the story of a young girl name...,family
2,The film is set in the 19th century and revolv...,romance
3,The story revolves around a group of thieves l...,crime
4,The movie follows the journey of a young boy n...,fantasy


In [None]:
df.to_csv('movie_genre_API.csv', index=False)