# Generate synthetic data

We convert dysfunctional text into a more functional version using the OpenAI API with the `gpt-3.5-turbo` model.

In [1]:
import environ
from openai import OpenAI
import openai
import json
import csv
from pathlib import Path

# Import OpenAI key
env = environ.Env()
environ.Env.read_env()
API_KEY = env("OPENAI_API_KEY")
openai.api_key = API_KEY

# Set constanst
LLM_MODEL = "gpt-3.5-turbo"
TEMPERATURE = 0

# Client
client_openai = OpenAI()



## Import json files

In [58]:
# Folder with synthetic data
directory = "data_nb/"
gpt_file = "synthetic_data_gpt.json"
ollama_file = "synthetic_data_ollama.json"


path_gpt = Path(directory, gpt_file)
path_ollama = Path(directory, ollama_file)

with open(path_gpt, 'r') as file:
    syn_data_gpt = json.load(file)

with open(path_ollama, 'r') as file:
    syn_data_ollama = json.load(file)

## Convert to functional language

In [59]:
syn_data_gpt[0]

{'dysfunctional': "You always waste money on useless things! You're so irresponsible with our finances.",
 'functional': "I've noticed we have different spending habits. Can we discuss how we can better manage our finances together?"}

In [60]:
syn_data_ollama[0]

{'dysfunctional': "You're so irresponsible with money, always overspending and ruining our finances!",
 'functional': 'I am concerned about your spending habits. It would be helpful to discuss this together.'}

In [76]:
def create_prompt(text:str) -> str:
    prompt = f"""
    Below is an instruction that describes a task.
    Write a response that appropriately completes the request.
    
    ### Objective:
    Transform the following text, which originates from the context of dysfunctional communication between couples, into functional language.
    Make the text actionable or practical, while maintaining a natural, conversational tone.
    
    ### Instructions:
    1. Review the provided text carefully.
    2. Convert the text into functional, everyday language, focusing on making the content actionable and practical.
    3. Aim for a conversational tone, as if explaining to a friend, to ensure the paragraph is engaging and accessible.
    4. Ensure the transformed text promotes understanding, empathy, and positive communication, suitable for couples or ex-couples who need to interact constructively.
    5. Always respond only with the transformed text and nothing else.
    
    ### Input 
    Please transform the following text into functional language:
    
    {text["dysfunctional"]}
    """
    return prompt


def call_client(prompt:str, client_openai, llm_model:str, temperature:float) -> str:
    completion = client_openai.chat.completions.create(
        model=llm_model,
        messages=[
            # {"role": "system", "content": system_content},
            {"role": "user", "content": prompt}
        ],
        temperature=temperature,
    )
    return completion.choices[0].message.content


def pair_text(data_input:list[dict], responses:list) -> list:
    paired_text = []

    for dysfunctional, functional in zip(data_input, responses):
        paired_text.append({
            'dysfunctional': dysfunctional["dysfunctional"],
            'functional': functional
        })

    return paired_text


def convert_functional_language(data:list[dict], client_openai, llm_model:str, temperature: float) -> list:

    responses = []

    for text in data:

        prompt = create_prompt(text)
        response = call_client(prompt, client_openai, llm_model, temperature)
        responses.append(response)

    print(f"Length input: {len(data)}")
    print(f"Length output: {len(responses)}")

    return pair_text(data, responses)

In [90]:
functional_gpt = convert_functional_language(syn_data_gpt[:2], client_openai, LLM_MODEL, TEMPERATURE)

Length input: 2
Length output: 2


In [91]:
functional_ollama = convert_functional_language(syn_data_ollama[:2], client_openai, LLM_MODEL, TEMPERATURE)

Length input: 2
Length output: 2


In [95]:
functional_gpt + functional_ollama

[{'dysfunctional': "You always waste money on useless things! You're so irresponsible with our finances.",
  'functional': "I've noticed that sometimes we spend money on things that may not be the best use of our resources. It would be helpful for us to discuss and make more thoughtful decisions together when it comes to our finances."},
 {'dysfunctional': "If you don't pay child support on time, I'll make sure you regret it.",
  'functional': "If we could work together to ensure child support is paid on time, it would really help our situation. Let's find a way to make sure this important responsibility is taken care of promptly."},
 {'dysfunctional': "You're so irresponsible with money, always overspending and ruining our finances!",
  'functional': "I've noticed that sometimes we struggle with managing our finances. It would be helpful for us to work together on creating a budget and sticking to it to avoid any financial stress. Let's have a conversation about how we can improve our

In [96]:
print( json.dumps(functional_gpt + functional_ollama, indent=4) )

[
    {
        "dysfunctional": "You always waste money on useless things! You're so irresponsible with our finances.",
        "functional": "I've noticed that sometimes we spend money on things that may not be the best use of our resources. It would be helpful for us to discuss and make more thoughtful decisions together when it comes to our finances."
    },
    {
        "dysfunctional": "If you don't pay child support on time, I'll make sure you regret it.",
        "functional": "If we could work together to ensure child support is paid on time, it would really help our situation. Let's find a way to make sure this important responsibility is taken care of promptly."
    },
    {
        "dysfunctional": "You're so irresponsible with money, always overspending and ruining our finances!",
        "functional": "I've noticed that sometimes we struggle with managing our finances. It would be helpful for us to work together on creating a budget and sticking to it to avoid any financ