In [None]:
import os

# Set the OPENAI_API_KEY environment variable
os.environ["OPENAI_API_KEY"] = "put key here"

In [None]:
!pip install openai -q

In [None]:
from openai import OpenAI

client = OpenAI()

completion = client.completions.create(model="davinci-002", prompt="Hello world")
chat_completion = client.chat.completions.create(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hello world"}])

In [None]:
models = client.models.list()
has_gpt4 = any("gpt-4" in model.id for model in models)

In [None]:
def prompt_model(prompt_list, model="gpt-4o"):
  return client.completions.create(model=model, messages=prompt_list)

def create_prompt(role, prompt):
  return {"role" : role, "content" : prompt}

### Generating Synthetic Data

We'll create 100 product/description pairs and then generate marketing emails for each.

In [None]:
datagen_prompts = [
    {"role" : "system", "content" : "You are a product innovator. You create new products that people crave."},
    {"role" : "user", "content" : "Please generate a list of 10 new products and extremely short descriptions."},
]

In [None]:
first_data_gen = prompt_model(datagen_prompts)
print(first_data_gen.choices[0].message.content)

Sure, here's a list of 10 new products with extremely short descriptions:

1. **SmartPlant**: Self-watering, app-connected plant pot.
2. **MoodMask**: Color-changing facemask indicating mood.
3. **EchoPen**: Voice-recording and transcribing pen.
4. **FitSnack**: Health bars customized to your DNA.
5. **AuraShower**: Mood-lighting and aromatherapy shower head.
6. **SolarWindow**: Energy-harvesting transparent window film.
7. **WrapNapper**: Weighted blanket that doubles as a stylish wrap.
8. **FreshPatch**: Odor-neutralizing fridge mat that signals spoilage.
9. **ChillSip**: Temperature-controlled smart cup for drinks.
10. **ThermSoles**: Self-heating and cooling insoles with app control.


Parsing them out into a Python list

In [None]:
def retrieve_token_usage(open_ai_response):
    return open_ai_response.usage.total_tokens

print(f"We used {retrieve_token_usage(first_data_gen)} tokens")

We used 216 tokens


In [None]:
text_response = first_data_gen.choices[0].message.content

products_and_descriptions = []
for line in text_response.splitlines():
  if "." in line:
    product_descriptions = line.split(".")[1]
    product_descriptions_split = product_descriptions.split(":")
    products_and_descriptions.append(
        {
            "product" : product_descriptions_split[0][1:],
            "description" : ":".join(product_descriptions_split[1:])[1:]
        }
    )

In [None]:
products_and_descriptions[0]

{'product': '**SmartPlant**',
 'description': 'Self-watering, app-connected plant pot'}

In [None]:
system_prompt = create_prompt(
    "system",
    "You are a marketing executive. You are proficient at writing short, and snappy marketing emails. The emails should be easy to read, using actionable language. Prioritize clarity, and only then think about 'catchiness'. Never sacrifice clarity for the entertainment value. Leverage FOMO.  Avoid spam trigger words. Establish relevancy. Recipients want emails that are applicable to their lives. If they consider your emails helpful and relevant, they're more likely to engage and less likely to send it to junk or unsubscribe. Priortize benefits over features. Dont write long drafts. Find a way to summarize what the reader will get in a compelling way and let them click through to a page on your website for more information."
)

In [None]:
def generate_user_prompt(product, description):
  user_prompt = create_prompt(
      "user",
      f"Please create a short marketing email using this product: {product} and this description: {description}"
  )

  return user_prompt

Generating our synthetic data by iterating through each item and collating the results into a list of dictionaries.

In [None]:
from openai import RateLimitError
total_token_usage = 0

for idx, item in enumerate(products_and_descriptions):
  if "marketing_email" in item:
    continue
  print(f"Working on {idx}")
  user_prompt = generate_user_prompt(item["product"], item["description"])
  full_prompt = [system_prompt, user_prompt]
  try:
    prompt_response = prompt_model(full_prompt)
    item["marketing_email"] = prompt_response.choices[0].message.content
    total_token_usage += retrieve_token_usage(prompt_response)
  except RateLimitError as e:
    continue

Working on 0
Working on 1
Working on 2
Working on 3
Working on 4
Working on 5
Working on 6
Working on 7
Working on 8
Working on 9


In [None]:
products_desc_and_marktng_emails_dataset = [p_d_and_m for p_d_and_m in products_and_descriptions if "marketing_email" in p_d_and_m]

In [None]:
products_desc_and_marktng_emails_dataset[0]

{'product': '**SmartPlant**',
 'description': 'Self-watering, app-connected plant pot',
 'marketing_email': "Subject: Never Worry About Watering Your Plants Again!\n\nHi [Name],\n\nSay goodbye to wilting plants and hello to SmartPlant! 🌱\n\nOur self-watering, app-connected plant pot makes plant care effortless. Just set it up, and let SmartPlant take care of the rest.\n\n**Why you'll love it:**\n- **Never Over/Under Water Again**: Your plant gets the perfect amount of water.\n- **Stay Informed**: Get real-time updates and tips via the app.\n- **Perfect for Busy Lifestyles**: Save time and keep your plants thriving.\n\nReady to transform your plant care routine?\n\n[Discover SmartPlant Now!]\n\nBest regards,\n[Your Company Name]"}

In [None]:
import pandas as pd

In [None]:
df=pd.DataFrame(products_desc_and_marktng_emails_dataset)

In [None]:
df.head()

Unnamed: 0,product,description,marketing_email
0,**SmartPlant**,"Self-watering, app-connected plant pot",Subject: Never Worry About Watering Your Plant...
1,**MoodMask**,Color-changing facemask indicating mood,Subject: Reveal Your Mood with MoodMask! 😍😎😲\n...
2,**EchoPen**,Voice-recording and transcribing pen,**Subject: Transform Your Notes with EchoPen! ...
3,**FitSnack**,Health bars customized to your DNA,**Subject: Unlock Your Health Potential with F...
4,**AuraShower**,Mood-lighting and aromatherapy shower head,Subject: Transform Your Daily Routine with Aur...


In [None]:
df.to_csv('marketData.csv',index=False)

In [None]:
import json


json_data = json.dumps(products_desc_and_marktng_emails_dataset, indent=4)

with open('marketData.json', 'w') as json_file:
    json_file.write(json_data)