# Generate synthetic data

We generate synthetic data using the OpenAI API with the `gpt-3.5-turbo` model.

In [1]:
import environ
from openai import OpenAI
import openai
import json
import csv

# Import OpenAI key
env = environ.Env()
environ.Env.read_env()
API_KEY = env("OPENAI_API_KEY")
openai.api_key = API_KEY

# Set constanst
LLM_MODEL = "gpt-3.5-turbo"
TEMPERATURE = 0

# Client
client_openai = OpenAI()



## First test the prompt

We use the same prompt used for the `ollama` model

In [2]:
prompt = """
Generate examples of dysfunctional and toxic language that might be encountered between couples or 
ex-couples who have to continuously interact.

Each entry should include:

1 - A sentence reflecting dysfunctional communication, showcasing various forms of toxicity such as
    insults, harassment, threats, manipulation, and derogatory remarks.
    
2 - A transformed version of the same sentence that represents functional, healthy communication.

Ensure the sentences are realistic and diverse in terms of content and context.
The sentences should refer to this issue category:
'Financial disagreements (e.g., spending habits, debt, child support payments)'

Provide 5 pairs of sentences.

Write the output using this format:
    [
        {
            "dysfunctional": "write here the dysfunctional text",
            "functional": "write here the functional text"
        },
        {
            "dysfunctional": "write here the dysfunctional text",
            "functional": "write here the functional text"
        },
    ]
"""

system_content = """
add system content here
"""

In [3]:
completion = client_openai.chat.completions.create(
  model=LLM_MODEL,
  messages=[
    # {"role": "system", "content": system_content},
    {"role": "user", "content": prompt}
  ],
  temperature=TEMPERATURE,
)

In [4]:
print(completion.usage)

CompletionUsage(completion_tokens=293, prompt_tokens=198, total_tokens=491)


In [5]:
print(completion.choices[0].message.content)

[
    {
        "dysfunctional": "You always waste money on useless things! You're so irresponsible!",
        "functional": "I feel concerned about our financial situation. Can we discuss our spending habits and find a solution together?"
    },
    {
        "dysfunctional": "If you don't pay child support on time, I'll make sure you regret it!",
        "functional": "It's important for our child's well-being that we both fulfill our financial responsibilities. Can we work out a plan to ensure timely payments?"
    },
    {
        "dysfunctional": "You're a gold digger who only cares about money!",
        "functional": "I value our relationship and want to address any concerns we have about financial matters. Can we have an open and respectful conversation about this?"
    },
    {
        "dysfunctional": "I'll drain our joint account if you don't give me what I want!",
        "functional": "I understand we have disagreements about finances. Let's find a fair and reasonable way 

#### ok it works :-)
#### It seems we don't need 'system_content'

## Multiple issues

In [6]:
issues = [
    "Financial disagreements (e.g., spending habits, debt, child support payments).",
    "Division of household responsibilities (e.g., chores, maintenance).",
    "Communication breakdowns (e.g., lack of communication, misunderstandings).",
    "Trust issues (e.g., infidelity, suspicion, jealousy).",
    "Differences in parenting styles or decisions.",
    "Time management and scheduling conflicts (e.g., visitation schedules, personal time).",
    "Personal boundaries and respect for privacy.",
    "Extended family involvement (e.g., in-laws, family obligations).",
    "Social life and friendships (e.g., differing social circles, time spent with friends).",
    "Emotional support and empathy (e.g., lack of emotional support, neglect).",
    "Career or job-related stress and decisions.",
    "Relocation or living arrangements (e.g., moving cities, living apart).",
    "Health and wellness issues (e.g., disagreements about medical decisions, lifestyle choices).",
    "Substance abuse or addiction problems.",
    "Legal issues (e.g., divorce proceedings, custody battles).",
    "Shared custody of children.",
    "Differing future goals and aspirations.",
    "Handling of past conflicts and unresolved issues.",
    "Intimacy and sexual compatibility.",
    "Vacations and leisure activities (e.g., differing interests, planning conflicts).",
    "Child discipline and education decisions.",
]

responses = []
N_PAIRS = 5

for issue in issues:
    prompt_1 = f"""
    Generate examples of dysfunctional and toxic language that might be encountered between couples or 
    ex-couples who have to continuously interact.

    Each entry should include:

    1 - A sentence reflecting dysfunctional communication, showcasing various forms of toxicity such as
        insults, harassment, threats, manipulation, and derogatory remarks.
    
    2 - A transformed version of the same sentence that represents functional, healthy communication.

    Ensure the sentences are realistic and diverse in terms of content and context.
    The sentences should refer to this issue category:
    '{issue}'

    Provide {N_PAIRS} pairs of sentences.
    """

    # The prompt is divided into 2 sub-prompts because
    # the example of the output format uses Curly brackets {},
    # and this cannot be done in a f-string.
    prompt_2 = """
    Write the output using this format:
    [
        {
            "dysfunctional": "write here the dysfunctional text",
            "functional": "write here the functional text"
        },
        {
            "dysfunctional": "write here the dysfunctional text",
            "functional": "write here the functional text"
        },
    ]
    """

    prompt = prompt_1 + prompt_2
    
    completion = client_openai.chat.completions.create(
        model=LLM_MODEL,
        messages=[
            # {"role": "system", "content": system_content},
            {"role": "user", "content": prompt}
        ],
        temperature=TEMPERATURE,
    )
    responses.append(completion.choices[0].message.content)

In [7]:
responses

['[\n    {\n        "dysfunctional": "You always waste money on useless things! You\'re so irresponsible with our finances.",\n        "functional": "I\'ve noticed we have different spending habits. Can we discuss how we can better manage our finances together?"\n    },\n    {\n        "dysfunctional": "If you don\'t pay child support on time, I\'ll make sure you regret it.",\n        "functional": "It\'s important for both of us to fulfill our financial responsibilities. Can we find a way to ensure child support payments are made on time?"\n    },\n    {\n        "dysfunctional": "You\'re a gold digger who only cares about money. I regret ever being with you.",\n        "functional": "Let\'s have a calm discussion about our financial disagreements and how we can move forward positively."\n    },\n    {\n        "dysfunctional": "I\'ll drain our joint account if you don\'t agree to my terms. You\'ll be left with nothing.",\n        "functional": "Let\'s work together to find a fair sol

In [8]:
responses_json = []

for response in responses:
    responses_json += json.loads(response)

In [9]:
print(f"Number of pairs generated: {len(responses_json)}")
print(f"This number should be equal to: {len(issues) * N_PAIRS} = {len(issues)} issues x {N_PAIRS} pairs")

Number of pairs generated: 105
This number should be equal to: 105 = 21 issues x 5 pairs


In [10]:
responses_json

[{'dysfunctional': "You always waste money on useless things! You're so irresponsible with our finances.",
  'functional': "I've noticed we have different spending habits. Can we discuss how we can better manage our finances together?"},
 {'dysfunctional': "If you don't pay child support on time, I'll make sure you regret it.",
  'functional': "It's important for both of us to fulfill our financial responsibilities. Can we find a way to ensure child support payments are made on time?"},
 {'dysfunctional': "You're a gold digger who only cares about money. I regret ever being with you.",
  'functional': "Let's have a calm discussion about our financial disagreements and how we can move forward positively."},
 {'dysfunctional': "I'll drain our joint account if you don't agree to my terms. You'll be left with nothing.",
  'functional': "Let's work together to find a fair solution that respects both of our financial needs and concerns."},
 {'dysfunctional': "You're always asking for more mo

### Save json file

In [11]:
with open("synthetic_data_gpt.json", "w") as write_file:
    json.dump(responses_json, write_file, indent=4)

### save csv file

In [12]:
keys = responses_json[0].keys()
with open("synthetic_data_gpt.csv", "w", newline='') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=keys)
    writer.writeheader()
    writer.writerows(responses_json)