In [23]:
from pydantic import BaseModel, Field
from langchain.output_parsers import PydanticOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv

In [24]:
load_dotenv()

True

Aux functions to save synthetic data

In [25]:
from auxiliar import add_messages

In [26]:
class SyntheticUserMessage(BaseModel):

    message: str = Field(
        ...,
        title="Message",
        description="The user message to generate for the target task intention.",
    )


class ListSyntheticUserMessages(BaseModel):

    messages: list[SyntheticUserMessage] = Field(
        ...,
        title="Messages",
        description="The list of synthetic user messages to generate for the target task intention.",
    )

output_parser = PydanticOutputParser(pydantic_object=ListSyntheticUserMessages)

In [27]:
system_prompt = """
You are tasked with generating synthetic user messages for an e-commerce platform called Cobuy, which specializes in electronics and gadgets.

The user intentions are:
{user_intentions}

Your task is to create {k} distinct messages for the following target task intention:
{target_task_intention}

Specific information about the target task intention:
{target_task_intention_description}

Follow these guidelines:
1. Focus exclusively on the target task intention, ensuring the message is relevant.
2. Each message should be between 5 and 20 words.
3. Avoid including any details or references to other user intentions.
4. Ensure the messages sound natural and typical of user queries for the given intention.
5. Follow the provided format strictly to maintain consistency.

Message format:
{format_instructions}
"""

prompt = PromptTemplate(
    template=system_prompt,
    input_variables=["k", "user_intentions", "target_task_intention" "target_task_intention_description", "format_instructions"],
    partial_variables={"format_instructions": output_parser.get_format_instructions()},
)

In [28]:
llm = ChatOpenAI(temperature=0.0, model="gpt-4o-mini")

user_intentions =  [
            'find_therapist',
            'find_support_group',
            'find_hotline',
            'habit_alternatives',
            'insert_journal',
            'delete_journal',
            'update_journal',
            'view_journal',
            'insert_mood',
            'delete_mood',
            'update_mood',
            'view_mood',
            'insert_gratitude',
            'ask_missionvalues',
            'ask_features',
            'review_user_memory',
            'chat_about_journal'
        ]
k = 50 # Number of synthetic user messages to generate for each target task intention

file_name = "synthetic_intetions.json"

synthetic_data_chain = prompt | llm | output_parser

## Intention Description

In [29]:
intentions = {
    "find_therapist": "The user may start the message with 'Help me find a therapist that...'. The user wants to receive a personalized recommendation for a healthcare professional. To do so, they provide details about their mental health needs, preferences, monetary limitations, and location. They may ask for therapists, counselors, or other mental health specialists that align with their specific requirements.",
    "find_support_group": "The user may start the message with 'Help me find a support group that...'. The user wants to find support groups in their vicinity. They provide information about their location and topics of interest (e.g., anxiety, grief, or addiction). They may ask about meeting times, formats (online or in-person), and how to join these groups.",
    "find_hotline": "The user may start the message with 'Help me find a hotline that...'. The user wants to access the contact information for emergency or non-emergency hotlines. They may specify whether they need immediate crisis support, general counseling services, or specialized hotlines for issues like abuse or mental health crises.",
    "habit_alternatives": "The user may start the message with 'Suggest an alternative for'. The user wants to find a healthier or more sustainable alternative to a habit they currently have. They describe the habit they wish to change, and the chatbot suggests actionable, practical alternatives based on the user's preferences and situation.",
    "insert_journal": "The user may start the message with 'I want to add to my journal today: …'. The user wants to record their thoughts, feelings, or reflections by making an entry in their journal. They may provide a text entry describing their emotions or experiences and optionally indicate the type of mood or topic for the entry.",
    "delete_journal": "The user may start the message with 'Delete my journal entry on…'. The user wants to delete an existing entry in their journal. They provide details about which entry to delete, and the chatbot confirms the deletion.",
    "update_journal": "The user may start the message with 'I want to change a journal entry'. The user wants to modify an existing entry in their journal. They provide details about which entry to update and describe the changes they want to make, such as editing the text or changing the mood associated with the entry.",
    "view_journal": "The user may start the message with 'How was my day on…'. The user wants to view past entries in their journal. They may specify a date range or keywords to filter the entries they want to review.",
    "insert_mood": "The user may start the message with 'Today, I feel...'. The user wants to record their current mood or feelings by making an entry in their mood board. They may provide a text entry describing their emotions or experiences and optionally indicate the type of mood or topic for the entry.",
    "delete_mood": "The user may start the message with 'Delete my mood on...'. The user wants to delete an existing entry in their mood board. They provide details about which entry to delete, and the chatbot confirms the deletion.",
    "update_mood": "The user may start the message with 'I want to change my mood...'. The user wants to modify an existing entry in their mood board. They provide details about which entry to update and describe the changes they want to make, such as editing the text or changing the mood associated with the entry.",
    "view_mood": "The user may start the message with 'How did I feel on...'. The user wants to view past entries in their mood board. They may specify a date range or keywords to filter the entries they want to review.",
    "insert_gratitude": "The user may start the message with 'I am grateful for..'. The user wants to contribute a message of gratitude or positivity to the community gratitude banner. They provide an anonymous text entry expressing something they are grateful for or a moment of joy they want to share with others.",
    "ask_missionvalues": "The user may start the message with 'Tell me about Squeak to Speak'. The user wants to learn about the mission, vision, and values of Squeak to Speak. They may ask about the organization's goals, the story behind its creation, or its commitment to mental health and user privacy.",
    "ask_features": "The user may start the message with 'Tell me about your chatbot'. The user wants an overview of the features and functionalities of Squeak to Speak. They may ask how the chatbot can assist with their mental health, what tools are available, or how they can benefit from using the platform.",
    "review_user_memory": "The user may start the message with 'What data do you have on me?'. The user wants to review the data that Squeak to Speak has collected about them. They may ask to see their stored preferences, previous interactions, or journal entries, and they expect transparency about the information being stored.",
    "chat_about_journal": "The user may start the message with 'I want to talk about how I felt on…'. The user wants to engage in a conversation with the chatbot, leveraging the knowledge of their past journal entries. They expect the chatbot to incorporate insights from their journal to provide more personalized and context-aware responses."
}

## Intention Creation

In [30]:
def process_intentions(intentions, synthetic_data_chain):
    flat_results = []
    message_id = 1  # To keep track of unique IDs
    
    for intention, description in intentions.items():
        response = synthetic_data_chain.invoke({
            "k": intention,
            "user_intentions": intentions,
            "target_task_intention": intention,
            "target_task_intention_description": description
        })
        
        for message in response.messages:
            flat_results.append({
                "Intention": intention,
                "Message": message.message,
                "Id": message_id
            })
            message_id += 1  # Increment the ID for each message
    
    return flat_results


In [None]:
results = process_intentions(intentions, synthetic_data_chain)
add_messages(results, file_name)

# No Intention: None

In [19]:
system_prompt = """
You are tasked with generating synthetic user messages.

The user intentions are:
{user_intentions}

Your task is to create {k} distinct messages completely unrelated to the available user intentions.
These messages should be generic and not related to any specific task or intention.
The user is engaging in casual conversation.
The user might ask general questions, share opinions, or express emotions. 
The user might also ask for totaly none related questions to the platform. 
The user might ask general questions, share opinions, or express emotions.

Follow these guidelines:
1. Focus exclusively on not being related to any of the user intentions.
2. Each message should be between 5 and 20 words.
3. Avoid including any details or references to other user intentions.
4. Ensure the messages sound natural and typical of user queries for the given intention.
5. Follow the provided format strictly to maintain consistency.

Message format:
{format_instructions}
"""

In [20]:
prompt = PromptTemplate(
    template=system_prompt,
    input_variables=["k", "user_intentions"],
    partial_variables={"format_instructions": output_parser.get_format_instructions()},
)

synthetic_data_chain = prompt | llm | output_parser

In [21]:
response = synthetic_data_chain.invoke({"k": 1, "user_intentions": user_intentions})

none_related_messages = []

for message in response.messages:
    none_related_messages.append({"Intention":"None", "Message":message.message})

In [22]:
add_messages(none_related_messages, file_name)