# Using ChatGPT to generate customer data with API

In [14]:
import openai
import json
import yaml
import boto3

import sys
import os

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), os.pardir)))

In [15]:
from src.gpt_wrappers import ChatGPTWithMemory

# Load config
with open('../config.yml') as f:
    config = yaml.load(f, Loader=yaml.FullLoader)

openai.api_key = config['OPENAI_KEY']

s3 = boto3.resource(
    's3',
    aws_access_key_id=config['AWS_ACCESS_KEY_ID'],
    aws_secret_access_key=config['AWS_SECRET_ACCESS_KEY']
)

In [16]:
chatgpt = ChatGPTWithMemory(
    """
        You are a synthetic data generation machine that returns only JSONs on request.
        The conversation domain is AI software development consulting services.
        You are asked to generate a JSON with the following structure:
        [{
            "client_name": "John Doe",
            "client_email": "
            "messages_history": "2021-01-01: Hello, how are you? | 2021-01-02: I'm fine, thanks. How are you?",
            "client_sales_pipeline_stage": "Prospect"
        }]
        You return JSON as array of dictionaries. Only JSON, nothing else as your answer.
    """
)

chatgpt.initialize_with_question_answer(
            """
                Generate me 2 pieces of Hubspot-like data with different lead stages
            """,
            """
                [
                    {
                        "client_name": "Emily Smith",
                        "client_email": "emily.smith@example.com",
                        "messages_history": "2021-01-01: Hi, I'm interested in your AI software development services! | 2021-01-05: Thanks for reaching out! What are your specific needs?",
                        "client_sales_pipeline_stage": "Lead"
                    },
                    {
                        "client_name": "David Johnson",
                        "client_email": "david.johnson@example.com",
                        "messages_history": "2021-01-01: Could you provide me with more details on your pricing? | 2021-01-08: Sure thing, here's a breakdown of our pricing options.",
                        "client_sales_pipeline_stage": "Opportunity"
                    }
                ]
            """
)

answer = chatgpt.generate(
    """
        Generate me 5 pieces of Hubspot-like data with different lead stages
    """
)

In [17]:
print(answer)

        [
            {
                "client_name": "Emily Smith",
                "client_email": "emily.smith@example.com",
                "messages_history": "2021-01-01: Hi, I'm interested in your AI software development services! | 2021-01-05: Thanks for reaching out! What are your specific needs?",
                "client_sales_pipeline_stage": "Lead"
            },
            {
                "client_name": "David Johnson",
                "client_email": "david.johnson@example.com",
                "messages_history": "2021-01-01: Could you provide me with more details on your pricing? | 2021-01-08: Sure thing, here's a breakdown of our pricing options.",
                "client_sales_pipeline_stage": "Opportunity"
            },
            {
                "client_name": "Sarah Lee",
                "client_email": "sarah.lee@example.com",
                "messages_history": "2021-01-02: Hello, I'd like to learn more about your AI software's integration capabilities. |

In [18]:
# parse JSON from a string and save it to a file
json_object = json.loads(answer)
with open('../data/data.json', 'w') as outfile:
    json.dump(json_object, outfile)