In [30]:
from pydantic import BaseModel, Field
from langchain.output_parsers import PydanticOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv

In [31]:
load_dotenv()

True

Here are some auxiliar functions to save your synthetic data.

In [32]:
from auxiliar import add_messages

In [33]:
class SyntheticUserMessage(BaseModel):

    message: str = Field(
        ...,
        title="Message",
        description="The user message to generate for the target task intention.",
    )


class ListSyntheticUserMessages(BaseModel):

    messages: list[SyntheticUserMessage] = Field(
        ...,
        title="Messages",
        description="The list of synthetic user messages to generate for the target task intention.",
    )

output_parser = PydanticOutputParser(pydantic_object=ListSyntheticUserMessages)

In [34]:
system_prompt = """
You are tasked with generating synthetic user messages for an insurance company platform called SecureShield, 
seeking to help there employees to optimize its claims processing workflow and accelerate the approval or rejection of claims.

The user intentions are:
{user_intentions}

Your task is to create {k} distinct messages for the following target task intention:
{target_task_intention}

Specific information about the target task intention:
{target_task_intention_description}

Follow these guidelines:
1. Focus exclusively on the target task intention, ensuring the message is relevant.
2. Each message should be between 5 and 20 words.
3. Avoid including any details or references to other user intentions.
4. Ensure the messages sound natural and typical of user queries for the given intention.
5. Follow the provided format strictly to maintain consistency.

Message format:
{format_instructions}
"""

prompt = PromptTemplate(
    template=system_prompt,
    input_variables=["k", "user_intentions", "target_task_intention" "target_task_intention_description", "format_instructions"],
    partial_variables={"format_instructions": output_parser.get_format_instructions()},
)

In [35]:
llm = ChatOpenAI(temperature=0.0, model="gpt-4o-mini")

user_intentions = ["update_claim_status", "get_claim_info",  "get_policy_info"]
k = 40 # Number of synthetic user messages to generate for each target task intention

file_name = "synthetic_intetions.json"

synthetic_data_chain = prompt | llm | output_parser

# Intention 1 - Update Claim Status

In [36]:
intention = "update_claim_status"

description = "The employee wants to update the status of a claim,\
    to do so they provide an id number to identify the claim that they want to update\
    and ask for update it to approved or denied."

response = synthetic_data_chain.invoke({"k": k, "user_intentions": user_intentions, "target_task_intention": intention, "target_task_intention_description": description})

update_claim_status_messages = []
for message in response.messages:
    update_claim_status_messages.append({"Intention":intention, "Message":message.message})

In [37]:
add_messages(update_claim_status_messages, file_name)

# Intention 2 -  Get Claim Info 

In [38]:
intention = "get_claim_info"

description = """The employee wants to access relevant information to assist in making decisions about a claim.
This includes details like the claim's status, the associated policy's information, and other related claim data.
Employees can query about specific claims, claim status, claim amounts, or any policy-related information that might help in the approval or denial process."""

response = synthetic_data_chain.invoke({"k": k, "user_intentions": user_intentions, "target_task_intention": intention, "target_task_intention_description": description})

get_claim_info_messages = []

for message in response.messages:
    get_claim_info_messages.append({"Intention":intention, "Message":message.message})

In [39]:
add_messages(get_claim_info_messages, file_name)

# Intention 3 -  Get Policy Info 

In [40]:
intention = "get_policy_info"

description = """The employee wants to access relevant information regarding the insurance policy associated with a claim.
This includes details such as the policy type, coverage, policyholder information, policy dates, and any other relevant data that could help in the claim decision process.
Employees can query about the policy status, level, coverage type, or any associated details that may influence the claim's approval or denial."""

response = synthetic_data_chain.invoke({"k": k, "user_intentions": user_intentions, "target_task_intention": intention, "target_task_intention_description": description})

get_policy_info_messages = []

for message in response.messages:
    get_policy_info_messages.append({"Intention":intention, "Message":message.message})

In [41]:
add_messages(get_policy_info_messages, file_name)

# No Intention: None

In [42]:
system_prompt = """
You are tasked with generating synthetic user messages.

The user intentions are:
{user_intentions}

Your task is to create {k} distinct messages completely unrelated to the available user intentions.
These messages should be generic and not related to any specific task or intention.
The user is engaging in casual conversation.
The user might ask general questions, share opinions, or express emotions. 
The user might also ask for totaly none related questions to the platform. 
The user might ask general questions, share opinions, or express emotions.

Follow these guidelines:
1. Focus exclusively on not being related to any of the user intentions.
2. Each message should be between 5 and 20 words.
3. Avoid including any details or references to other user intentions.
4. Ensure the messages sound natural and typical of user queries for the given intention.
5. Follow the provided format strictly to maintain consistency.

Message format:
{format_instructions}
"""

In [43]:
prompt = PromptTemplate(
    template=system_prompt,
    input_variables=["k", "user_intentions"],
    partial_variables={"format_instructions": output_parser.get_format_instructions()},
)

synthetic_data_chain = prompt | llm | output_parser

In [44]:
response = synthetic_data_chain.invoke({"k": (k//3), "user_intentions": user_intentions})

none_related_messages = []

for message in response.messages:
    none_related_messages.append({"Intention":"None", "Message":message.message})

In [45]:
add_messages(none_related_messages, file_name)