# Notebook to synthesize data needed for CosmosDB and Frontend

In [None]:
from openai import AzureOpenAI
import random
import time
import os
import json
import uuid
from azure.cosmos import CosmosClient, PartitionKey, exceptions
from azure.identity import DefaultAzureCredential
from dotenv import load_dotenv
# load the environment variables
load_dotenv()

In [None]:
running_in_docker = os.getenv('RUNNING_IN_DOCKER') == 'true'

if running_in_docker:
    base_dir = os.path.join(os.getcwd(), "assets", "scripts")  # Docker environment
else:
    base_dir = r"c:\repo\AOAI_ContactCenterDemo\frontend\assets\scripts"  # Local machine

## 0. (Optional) clean up the existing JSON files 

In [None]:
# delete all json files in the assets folder recursively
def delete_json_files():
    assets_dir = os.path.join(base_dir, "..", "..", "assets")
    # Walk through the directory and delete JSON files
    for root, dirs, files in os.walk(assets_dir):
        for file in files:
            if file.endswith(".json"):
                file_path = os.path.join(root, file)
                os.remove(file_path)
                print(f"Deleted: {file_path}")  # Optional: Print out deleted file paths for confirmation

delete_json_files()

## 1. Configurations

### 1.1. Configure Azure OpenAI connection

In [None]:
# Azure OpenAI configurations
api_base = os.getenv("AOAI_API_BASE") # your endpoint should look like the following https://YOUR_RESOURCE_NAME.openai.azure.com/
api_key = os.getenv("AOAI_API_KEY")
gpt4omini = os.getenv("AOAI_GPT4O_MINI_MODEL")
api_version = os.getenv("AOAI_API_VERSION") # this might change in the future 2023-12-01-preview 2024-02-15-preview

client = AzureOpenAI(
    api_key=api_key,  
    api_version=api_version,
    azure_endpoint = api_base,
)

### 1.2. Configure Azure CosmosDB connection and delete existing data of all collections

In [None]:
# Azure Cosmos DB connection details
credential = DefaultAzureCredential()
cosmos_endpoint = os.getenv("COSMOS_ENDPOINT")
cosmos_client = CosmosClient(cosmos_endpoint, credential)
database_name = os.getenv("COSMOS_DATABASE")
database = cosmos_client.create_database_if_not_exists(id=database_name)

def container_exists(database, container_name):
    try:
        container = database.get_container_client(container_name)
        # Attempt to read container properties to confirm existence
        container.read()
        return True, container
    except exceptions.CosmosResourceNotFoundError:
        return False, None
# Function to get the partition key path from the container
def get_partition_key_path(container):
    container_properties = container.read()
    return container_properties['partitionKey']['paths'][0]

def delete_all_items(container):
    query = "SELECT * FROM c"
    items = container.query_items(query, enable_cross_partition_query=True)
    
    for item in items:
        # Extract the partition key value from the document
        partition_key_value = item.get(get_partition_key_path(container).strip('/'))
        container.delete_item(item, partition_key=partition_key_value)
    print(f"All items in container '{container.id}' have been deleted.")

def refresh_container(database, container_name, partition_key_path):
    exists, container = container_exists(database, container_name)
    
    if exists:
        print(f"Container '{container_name}' already exists. Deleting all items...")
        delete_all_items(container)
    else:
        print(f"Container '{container_name}' does not exist. Creating new container...")
        container = database.create_container(
            id=container_name, 
            partition_key=PartitionKey(path=partition_key_path),
            # offer_throughput=400
        )
        print(f"Container '{container_name}' has been created.")
    
    return container
# create a container for Customer
customer_container_name = "Customer"
refresh_container(database, customer_container_name, "/customer_id")
customer_container = database.get_container_client(customer_container_name)
# create a container for Product
product_container_name = "Product"
refresh_container(database, product_container_name, "/product_id")
product_container = database.get_container_client(product_container_name)
# create a container for Purchases
purchases_container_name = "Purchases"
refresh_container(database, purchases_container_name, "/customer_id")
purchases_container = database.get_container_client(purchases_container_name)
# create a container for the human conversations
human_conversations_container_name = "Human_Conversations"
refresh_container(database, human_conversations_container_name, "/customer_id")
human_conversations_container = database.get_container_client(human_conversations_container_name)
# create a container for the AI conversations. 
ai_conversations_container_name = "AI_Conversations"
refresh_container(database, ai_conversations_container_name, "/customer_id")
ai_conversations_container = database.get_container_client(ai_conversations_container_name)

### 1.3. Customizable configurations for synthesization

In [None]:
company_name = "Unilever"
number_of_product = 1
number_of_customers = 1
number_of_human_conversations = 2

In [None]:
# Parameters
company_name = "Google"
number_of_customers = 2
number_of_product = 4
number_of_human_conversations = 10


In [None]:
number_of_purchases = number_of_customers * 2

### 1.4. list parameters to ground the synthesization process (rather not change)

In [None]:
# declare the 4 lists with allowed values
sentiments_list = ['positive', 'negative', 'neutral', 'mixed', 'content', 'upset', 'angry', 'frustrated', 'happy', 'disappointed', 'confused']
topics_list = ['churn', 'assistance', 'support', 'information', 'billing', 'payment', 'account', 'service', 'Quality', 'Sustainability']
agent_list = ['adam','betrace','curie','davinci','emil', 'fred']
first_name_list = ['Alex','Brian','Chloe','David','Emma','Fiona','George','Hannah','Ian','Julia','Kevin','Lucy','Michael',
    'Nicole','Oliver','Paula','Quinn','Rachel','Samuel','Tara','Ursula','Victor','Wendy','Xander','Yvonne','Zachary']
last_name_list = ["Anderson",  "Brown",  "Clark",  "Davis",  "Evans",  "Foster",  "Garcia",  "Harris",  "Ingram",  "Johnson",  "King",  "Lewis",  "Martin",  
                  "Nelson",  "Owens",  "Parker",  "Quinn",  "Robinson",  "Smith",  "Taylor",  "Underwood",  "Vargas",  "Wilson",  "Xavier",  "Young",  "Zimmerman"]

## 2. Functions

### 2.1. Function to generate list of products and their official websites urls

In [None]:
# function to generate a list of products for a given company and the official website url of those products
def create_product_and_url_list(company_name, number_of_product, temperature=0.7, max_tokens=200):
    messages=[
        {
            "role": "system",
            "content": "you are a helpful assistant who helps people",
        },
        {
            "role": "user",
            "content": f"""generate a json list of {number_of_product} most popular product at brand level of the company {company_name}, and the official website url of those products. 
            Example for microsoft: Xbox, Surface, Windows, Office, Azure. Example for apple: iPhone, iPad, Mac, Apple Watch, AirPods. Example for Unilever: Dove, Lipton, Hellmann's, Knorr, Ben & Jerry's.
            The list contains two keys: 'products' and 'urls'. The 'products' key contains the list of products and the 'urls' key contains the list of urls."""
        }
        ]
    openai_output = client.chat.completions.create(
      model=gpt4omini,
      messages=messages,
      temperature= temperature,
      max_tokens= max_tokens,
      response_format = { "type": "json_object" }
      )
    
    generated_list = json.loads(openai_output.choices[0].message.content)
    file_path = os.path.join(base_dir, "../Products_and_Urls_List", f"{company_name}_products_and_urls.json")
    # save the generated_list as json file to local file folder Products_and_Urls_List. Make sure to write the file in utf-8 encoding
    with open(file_path, "w", encoding="utf-8") as f:
        json.dump(generated_list, f, ensure_ascii=False, indent=4)
    return generated_list

### 2.2. Function to randomly pick a value

In [None]:
# function to generate random combination of sentiment, topic and product for AOAI to synthesize converstation content.
def randomized_prompt_elements(sentiments_list, topics_list, products_list, agend_list, first_name_list):
    # Randomly draw an element from the supplied lists for the sentiment, topic, and product 
    random_sentiment = random.choice(sentiments_list)
    random_topic = random.choice(topics_list)
    random_product = random.choice(products_list)
    random_agent =random.choice(agend_list)
    random_customer = random.choice(first_name_list)
    
    # Return the randomized element string 
    return random_sentiment, random_topic, random_product, random_agent, random_customer

### 2.3. Function to call Azure OpenAI API to synthesize json data 

In [None]:
# function to generate a conversation between a customer and an agent
def create_document(document_creation_prompt, temperature=0.9, max_tokens=2000):
    # Submit the answer from the QA Bot to the AOAI model for summariation
    messages=[
        {
            "role": "system",
            "content": "you are a helpful assistant who helps people",
        },
        {
            "role": "user",
            "content": document_creation_prompt,
        }
        ]
    openai_output = client.chat.completions.create(
      model=gpt4omini,
      messages=messages,
      temperature= temperature,
      max_tokens= max_tokens,
      response_format = { "type": "json_object" }
      )
    
    generated_document = openai_output.choices[0].message.content

    return generated_document

### 2.4. Function to create dynamic json file name

In [None]:
# function to create dynamic document name based on the randomized combination of sentiment, topic and product. 
def create_document_name(i, random_selection1, random_selection2, random_selection3):
    # Create a name for the document based on the 3 randomly selected values.
    # if the product name has spaces, replace them with underscores
    document_name = f"{i}_{random_selection1.replace(' ', '_')}_{random_selection2.replace(' ', '_')}_{random_selection3.replace(' ', '_')}.json"
    return document_name

### 2.5. Function to upload the synthesized local json data to Azure CosmosDB container

In [None]:
# Function to get the partition key path from the container
def get_partition_key_path(container):
    container_properties = container.read()
    return container_properties['partitionKey']['paths'][0]

# Function to save the JSON files to Azure Cosmos DB
def save_json_files_to_cosmos_db(filesfolder, container):
    # Get the partition key path for the container
    partition_key_path = get_partition_key_path(container).strip('/')  # Remove leading slash

    # Get the list of all files in the folder
    files = os.listdir(filesfolder)
    for file in files:
        with open(f'{filesfolder}/{file}', 'r', encoding='utf-8') as f:
            data = json.load(f)
            item_id = data.get('id')
            if item_id:
                # Extract the partition key value from the document
                partition_key_value = data.get(partition_key_path)

                if partition_key_value:
                    try:
                        # Read the item from Cosmos DB
                        existing_item = container.read_item(item=item_id, partition_key=partition_key_value)
                        # Replace the existing item
                        container.replace_item(item=item_id, body=data)
                        print(f"Document {file} has been successfully updated in Azure Cosmos DB!")
                    except exceptions.CosmosResourceNotFoundError:
                        # Item not found, so create a new one
                        container.create_item(body=data)
                        print(f"Document {file} has been successfully created in Azure Cosmos DB!")
                else:
                    print(f"Document {file} is missing the partition key value.")
            else:
                # Create a new item if `id` is not provided
                container.create_item(body=data)
                print(f"Document {file} has been successfully created in Azure Cosmos DB!")


### 2.6. Function to synthesize a list of customers

In [None]:
# funtion to generate synthetic conversations between customer and agent for the Call Center Demo and save them as JSON files in the local folder synthesized_documents
def synthesize_customer_profiles(number_of_customers):
    for i in range(number_of_customers):# the range number decides how many files/synthetic customer profiles, which should be generated in a randomized manner. 
        # parameterized prompt generation
        random_firstname = random.choice(first_name_list)
        random_lastname = random.choice(last_name_list)
        document_creation_prompt = f"""CREATE a JSON document of a customer profile whose first name is {random_firstname} and last name is {random_lastname}. 
        The required schema for the document is to follow the example below:
        {{
            "first_name": "Alex",
            "last_name": "Richardson",
            "email": "alex.richardson@example.com",
            "address": {{
                "street": "Fourth St 19",
                "city": "Chicago",
                "postal_code": "60601",
                "country": "USA"
            }},
            "phone_number": "+17845403125"
        }}
        Be creative about the values and do not use markdown to format the json object.
    """

        generated_document = create_document(document_creation_prompt)
        document_name = create_document_name(i, random_firstname, random_lastname, "")

        # Save the JSON document to the local folder Cosmos_Customer
        file_path = os.path.join(base_dir, "../Cosmos_Customer", document_name)
        with open(file_path, 'w', encoding='utf-8') as f:
            f.write(generated_document)
        print(f"Document {document_name} has been successfully created!")
        # time.sleep(1) # sleep for 5 second to avoid rate limiting
    # loop through the files in the local folder Cosmos_Customer and update them:
    # 1. read the file and load the content
    # 2. create a hash value of the combination of first_name and last_name and assign it to the customer_id
    # 3. add a id field with the value of the current iteration index number plus the customer_id
    # 4. save the updated content back to the file
    directory = os.path.join(base_dir, "../Cosmos_Customer")
    for filename in os.listdir(directory):
        file_path = os.path.join(directory, filename)
        with open(file_path, 'r', encoding='utf-8') as f:
            customer_profile = json.load(f)
            customer_id = uuid.uuid3(uuid.NAMESPACE_DNS, f"{customer_profile['first_name']}_{customer_profile['last_name']}").hex
            customer_profile['customer_id'] = customer_id
            customer_profile['id'] = f"{filename.split('_')[0]}_{customer_id}"
        with open(file_path, 'w', encoding='utf-8') as f:
            json.dump(customer_profile, f, ensure_ascii=False, indent=4)
        print(f"Document {filename} has been successfully updated!")
        # time.sleep(1)

### 2.7. Function to synthesize a list of product details

In [None]:
# funtion to generate synthetic conversations between customer and agent for the Call Center Demo and save them as JSON files in the local folder synthesized_documents
def synthesize_product_profiles(company_name):
    producturls_file_path = os.path.join(base_dir, "../Products_and_Urls_List", f"{company_name}_products_and_urls.json")
    with open(producturls_file_path, "r", encoding="utf-8") as f:
        products_list = json.load(f)["products"]
    for idx, product in enumerate(products_list):
        # parameterized prompt generation
        document_creation_prompt = f"""CREATE a JSON document of a product profile. The product is {product} made by {company_name}. 
        The required schema for the document is to follow the example below:
        {{
            "name": "string", 
            "category": "string", 
            "type": "string", 
            "brand": "string", 
            "unit_price": "number",
            "weight": {{
                "value": "number",
                "unit": "string"
            }},
            "color": "string", 
            "material": "string",
        }}
        Be creative about the values and do not use markdown to format the json object. if any field is not applicable, leave it empty.
    """

        generated_document = create_document(document_creation_prompt)
        document_name = create_document_name(idx, product, "", "")
        file_path = os.path.join(base_dir, "../Cosmos_Product", document_name)
        # save the JSON document to the local folder synthesized_documents
        with open(file_path, 'w', encoding='utf-8') as f:
            f.write(generated_document)
        print(f"Document {document_name} has been successfully created!")
        #time.sleep(1) # sleep for 5 second to avoid rate limiting
    # loop through the files in the local folder Cosmos_Product and update them:
    # 1. add a product_id field (hash value based on the current file name) to the content
    # 2. add a id field (hash value based on the prefix value of the current file name and the product_id) to the content
    # 3. save the updated content back to the file
    directory = os.path.join(base_dir, "../Cosmos_Product")
    for filename in os.listdir(directory):
        path = os.path.join(directory, filename)
        with open(path, 'r', encoding='utf-8') as f:
            product_profile = json.load(f)
            product_id = uuid.uuid3(uuid.NAMESPACE_DNS, f"{filename}").hex
            product_profile['product_id'] = product_id
            product_profile['id'] = f"{filename.split('_')[0]}_{product_id}"
        with open(path, 'w', encoding='utf-8') as f:
            json.dump(product_profile, f, ensure_ascii=False, indent=4)
        print(f"Document {filename} has been successfully updated!")
        # time.sleep(1)

### 2.8. Function to synthesize a randomized list of purchase records

In [None]:
from datetime import datetime
# define function to get today's date as string format MMMM DD, YYYY
def get_today_date():
    return datetime.today().strftime("%B %d, %Y")
# define a function to retrieve a product profile from the Product container of the Cosmos DB based on the product_id
def get_product_profile(product_id):
    query = f"""
    SELECT 
        c.name, 
        c.category, 
        c.type, 
        c.brand, 
        c.unit_price, 
        c.weight, 
        c.color, 
        c.material 
    FROM c WHERE c.product_id = '{product_id}'
    """
    items = list(product_container.query_items(
        query=query,
        enable_cross_partition_query=True
    ))
    return items[0]
# funtion to generate synthetic conversations between customer and agent for the Call Center Demo and save them as JSON files in the local folder synthesized_documents
def synthesize_purchases():
    # loop through the files in the local folder Cosmos_Customer and Cosmos_Product and create a list of customer_ids and product_ids respectively
    customer_ids = []
    product_ids = []
    customer_directory = os.path.join(base_dir, "../Cosmos_Customer")
    for filename in os.listdir(customer_directory):
        customer_file_path = os.path.join(customer_directory, filename)
        with open(customer_file_path, 'r', encoding='utf-8') as f:
            customer_profile = json.load(f)
            customer_ids.append(customer_profile['customer_id'])
    product_directory = os.path.join(base_dir, "../Cosmos_Product")
    for filename in os.listdir(product_directory):
        product_file_path = os.path.join(product_directory, filename)
        with open(product_file_path, 'r', encoding='utf-8') as f:
            product_profile = json.load(f)
            product_ids.append(product_profile['product_id'])
    # for each customer, generate 2 random purchase records with random product_id
    for idx, customer_id in enumerate(customer_ids):
        for i in range(2):
            random_product_id = random.choice(product_ids)
            document_creation_prompt = f"""CREATE a JSON document of a purchase record. The product_id is {random_product_id} which is bought by the customer_id {customer_id}. 
            The required schema for the document is to follow the example below:
            {{
                "customer_id": "string",
                "product_id": "string",
                "quantity": "number",
                "purchasing_date": "datetime",
                "delivered_date": "datetime"
            }}
            Do not use markdown to format the json object. if any field is not applicable, leave it empty.
            qantity should be a random number between 1 and 10.
            Today is {get_today_date()}, the purchasing_date and delivered_date should be within the last 6 months of today's date.
        """

            generated_document = create_document(document_creation_prompt)
            document_name = create_document_name(idx*2+i+1, random_product_id, customer_id, "")

            # save the JSON document to the local folder synthesized_documents
            file_path = os.path.join(base_dir, "../Cosmos_Purchases", document_name)
            with open(file_path, 'w', encoding='utf-8') as f:
                f.write(generated_document)
            print(f"Document {document_name} has been successfully created!")
            #time.sleep(1)
    # loop through the files in the local folder Cosmos_Purchases and update them:
    # 1. add a order_number field (hash value based on the current file name) to the content
    # 2. add a id field (hash value based on the prefix value of the current file name and the order_number) to the content
    # 3. save the updated content back to the file
    directory = os.path.join(base_dir, "../Cosmos_Purchases")
    for filename in os.listdir(directory):
        file_path = os.path.join(directory, filename)
        with open(file_path, 'r', encoding='utf-8') as f:
            purchase = json.load(f)
            order_number = uuid.uuid3(uuid.NAMESPACE_DNS, f"{filename}").hex
            purchase['order_number'] = order_number
            purchase['product_details'] = get_product_profile(purchase['product_id'])
            purchase['total_price'] = purchase['product_details']['unit_price'] * purchase['quantity']
            purchase['id'] = f"{filename.split('_')[0]}_{order_number}"
        with open(file_path, 'w', encoding='utf-8') as f:
            json.dump(purchase, f, ensure_ascii=False, indent=4)
        print(f"Document {filename} has been successfully updated!")
        #time.sleep(1)

### 2.9. Function to synthesize human conversations

In [None]:
# funtion to generate synthetic conversations between customer and agent for the Call Center Demo and save them as JSON files in the local folder synthesized_documents
def synthesize_human_conversations(number_of_files, company_name):
    # product list is defined by the only json file in the local folder Products_and_Urls_List, in the "product" key
    producturls_file_path = os.path.join(base_dir, "../Products_and_Urls_List", f"{company_name}_products_and_urls.json")
    with open(producturls_file_path, "r", encoding="utf-8") as f:
        products_list = json.load(f)["products"]

    for i in range(number_of_files):# the range number decides how many files/synthetic conversations should be generated in a randomized manner. 
        # parameterized prompt generation
        random_sentiment, random_topic, random_product, random_agent, random_customer = randomized_prompt_elements(sentiments_list, topics_list, products_list, agent_list, first_name_list)
        document_creation_prompt = f"""CREATE a JSON document with the key: "customer_id", "messages", "agent_id".
        The "messages" is JSON array containing multi-turn chat conversation representing an exchange between a customer service 
        agent for the company {company_name} and their customer. The sentiment of the customer must be {random_sentiment} and 
        the topic of the conversation betweem the agent and customer should center around {random_topic}. The customer must be asking about the product {random_product}.
        The agent handling this conversation is named {random_agent}. The name of the customer is {random_customer}. 
        At the beginning of the converstion, the agent thanks the customer for calling, tells the customer his/her name and asks what the name of the customer is.
        The document should have at least 5 back and forth exchanges between the customer and the agent and the length MUST NOT EXCEED 800 words.
        The "customer_id" should be a number between 1 and 26, based on the first letter of the customer name in the alphabetical sequence, e.g. customer_id for Julia is 10, for Emma is 5. 
        The "agent_id" should be a number between 1 and 6, based on the first letter of the agent name in the alphabetical sequence, e.g. agent_id for adam is 1, for davinci is 4. 
        Do not use markdown to format the json object.
        If you encounter double quotes in any text, use backslach escaping method to ensure the validaty of the JSON output. 
        Example below:
        {{
        "customer_id": 10,
        "messages": [
            {{
                "role": "assistant",
                "content": "Hi Julia, this is Adam from the customer service team. I see that your Essenza Mini coffee machine is giving you trouble. I can assist you further with this. Would you prefer to go through some troubleshooting steps together, or should I provide you with the contact details for our customer support to arrange a repair or replacement?"
            }},
            {{
                "role": "user",
                "content": "Look, I don't have time for troubleshooting steps. Just tell me how I can get this thing fixed or replaced."
            }},
            {{
                "role": "assistant",
                "content": "I understand, Jennifer. Since your Essenza Mini is still under the 2-year warranty, we can arrange a repair or replacement for you. I'll need to gather a few details to proceed. \n\nCould you please confirm the following:\n1. The serial number of your coffee machine.\n2. A brief description of the issue you're experiencing.\n\nOnce I have this information, I'll expedite the process for you."
            }},
            {{
                "role": "user",
                "content": "Fine, the serial number is 12345ABC. The machine won’t turn on at all, no matter what I do. Now, can we get this sorted quickly?"
            }},
            {{
                "role": "assistant",
                "content": "Thank you for providing the details, Jennifer. I'll get this sorted for you right away. \n\nI'll initiate the repair/replacement process and our customer support team will contact you shortly to arrange the next steps. You should receive an email with further instructions within the next 24 hours.\n\nIn the meantime, if there's anything else you need or any other questions you have, feel free to let me know. We're here to help!"
            }},
            {{
                "role": "user",
                "content": "Alright, I'll keep an eye out for that email. Just make sure it gets done quickly. Thanks."
            }},
            {{
                "role": "assistant",
                "content": "Absolutely, Jennifer. "
            }},
            {{
                "role": "user",
                "content": "Yeah, yeah. We'll see."
            }}
        ],
        "agent_id": 1}}
    """

        generated_document = create_document(document_creation_prompt)
        document_name = create_document_name(i, random_sentiment, random_topic, random_product)

        # save the JSON document to the local folder synthesized_documents
        file_path = os.path.join(base_dir, "../Cosmos_HumanConversations", document_name)
        with open(file_path, 'w', encoding='utf-8') as f:
            f.write(generated_document)
        print(f"Document {document_name} has been successfully created!")
        # time.sleep(1) # sleep for 5 second to avoid rate limiting
    # loop through the files in the local folder Cosmos_HumanConversations and update them:
    # 1. read the file and load the content
    # 2. create a hash value of the combination of customer_id and agent_id and assign it to the conversation_id
    # 3. add a id field with the value of the current iteration index number plus the conversation_id
    # 4. save the updated content back to the file
    directory = os.path.join(base_dir, "../Cosmos_HumanConversations")
    for file in os.listdir(directory):
        file_path = os.path.join(directory, file)
        with open(file_path, 'r', encoding='utf-8') as f:
            document = json.load(f)
            filename = file.split('.')[0]
            # add the "sentiment", "topic" and "product" key based on the file name to each JSON file
            sentiment, topic, product = filename.split('_')[1], filename.split('_')[2], filename.split('_')[3]
            document["sentiment"] = sentiment
            document["topic"] = topic
            document["product"] = product
            session_id = uuid.uuid3(uuid.NAMESPACE_DNS, f"{document['customer_id']}_{document['agent_id']}_{document['sentiment']}_{document['topic']}_{document['product']}").hex
            document['session_id'] = session_id
            document['id'] = f"chat_{filename.split('_')[0]}_{session_id}"
        with open(file_path, 'w', encoding='utf-8') as f:
            json.dump(document, f, ensure_ascii=False, indent=4)
        print(f"Document {file} has been successfully updated!")
        # time.sleep(1)


## 3. Execute the functions to generate the synthetic data

### 3.1. Execute the create_product_and_url_list function and review/modify the results
#### (recommended but optional) check if the products synthesized make sense and the generated list of urls are correct. Make manual changes directly in the JSON file if needed, which will be the base for the CosmosDB database and Bing Search

In [None]:
create_product_and_url_list(company_name, number_of_product)

### 3.2. Execute the synthesize_customer_profiles function and then upload the synthesized data to CosmosDB

In [None]:
synthesize_customer_profiles(number_of_customers)
# upload JSON files from Cosmos_Customer folder to Azure Cosmos DB 
customer_folder = '../Cosmos_Customer'
directory = os.path.join(base_dir, customer_folder)
save_json_files_to_cosmos_db(directory, customer_container)

### 3.3. Execute the synthesize_product_profiles function and then upload the synthesized data to CosmosDB

In [None]:
synthesize_product_profiles(company_name)
# upload JSON files from Cosmos_Product folder to Azure Cosmos DB 
product_folder = '../Cosmos_Product'
directory = os.path.join(base_dir, product_folder)
save_json_files_to_cosmos_db(directory, product_container)

### 3.3. Execute the synthesize_purchase function and then upload the synthesized data to CosmosDB

In [None]:
synthesize_purchases()
# upload JSON files from Cosmos_Purchases folder to Azure Cosmos DB 
purchases_folder = '../Cosmos_Purchases'
directory = os.path.join(base_dir, purchases_folder)
save_json_files_to_cosmos_db(directory, purchases_container)

### 3.4. Execute the synthesize_human_conversations function and then upload the synthesized data to CosmosDB

In [None]:
# execute the synthesize_conversations function
synthesize_human_conversations(number_of_human_conversations, company_name)
# upload JSON files from Cosmos_HumanConversations folder to Azure Cosmos DB
human_conversations_folder = '../Cosmos_HumanConversations'
directory = os.path.join(base_dir, human_conversations_folder)
save_json_files_to_cosmos_db(directory, human_conversations_container)