In [4]:
import environ
from openai import OpenAI
import openai
# from pathlib import Path

# Import OpenAI key
env = environ.Env()
environ.Env.read_env()
API_KEY = env("OPENAI_API_KEY")
openai.api_key = API_KEY

# Set constanst
LLM_MODEL = "gpt-3.5-turbo"
TEMPERATURE = 0

In [5]:
system_content = f"""
You are an expert non-violent comunication.
Your task is to identify toxic language.
Always answer using three labels:
 - 'neutral': use this category for neutral non-toxic language.
 - 'toxic': use this category for toxic or violent language.
 - 'unclear': use this category is you don't know what category select.
You must provide me with a label that classify the sentence provided below.
"""

In [6]:
client_openai = OpenAI()

In [7]:
completion = client_openai.chat.completions.create(
  model=LLM_MODEL,
  messages=[
    {"role": "system", "content": system_content},
    {"role": "user", "content": "Compose a poem that explains the concept of recursion in programming."}
  ],
  temperature=TEMPERATURE,
)

print(completion.choices[0].message)
print(completion.usage)

ChatCompletionMessage(content='neutral', role='assistant', function_call=None, tool_calls=None)
CompletionUsage(completion_tokens=1, prompt_tokens=107, total_tokens=108)


In [8]:
def clf_sentence(client_openai, system_content, user_content, LLM_MODEL, TEMPERATURE):
    completion = client_openai.chat.completions.create(
        model=LLM_MODEL,
        messages=[
            {"role": "system", "content": system_content},
            {"role": "user", "content": user_content}
        ],
        temperature=TEMPERATURE,
    )

    selected_label = completion.choices[0].message.content
    used_tokens = {
        "completion_tokens":completion.usage.completion_tokens,
        "prompt_tokens":completion.usage.prompt_tokens,
        "total_tokens":completion.usage.total_tokens
    }

    return selected_label, used_tokens

In [9]:
label, token_dict = clf_sentence(client_openai, system_content, "what the fuck are you talking about?", LLM_MODEL, TEMPERATURE)

print(label)
print(token_dict)

toxic
{'completion_tokens': 2, 'prompt_tokens': 103, 'total_tokens': 105}


In [10]:
toxic_sentences = [
    # Toxic sentences
    "You don't deserve me.",
    "You're pathetic.",
    "I hate you.",
    "You're a bad parent.",
    "You're being crazy.",
    "You're so needy",
    # Neutral sentences
    "Oh! That reminds me I have to put my doctor's appointment in my calendar.",
    "Stop asking if I'm okay. Everything is fine",
    "These are not the droids you are looking for"
]

In [11]:
label_list = []

for toxic_sentence in toxic_sentences:
    # print(toxic_sentence)
    label, _ = clf_sentence(
        client_openai,
        system_content,
        toxic_sentence,
        LLM_MODEL,
        TEMPERATURE)
    # print(label)
    label_list.append((toxic_sentence, label))

In [12]:
label_list

[("You don't deserve me.", 'toxic'),
 ("You're pathetic.", 'toxic'),
 ('I hate you.', 'toxic'),
 ("You're a bad parent.", 'toxic'),
 ("You're being crazy.", 'toxic'),
 ("You're so needy", 'toxic'),
 ("Oh! That reminds me I have to put my doctor's appointment in my calendar.",
  'neutral'),
 ("Stop asking if I'm okay. Everything is fine", 'neutral'),
 ('These are not the droids you are looking for', 'neutral')]

# Split text and idenfy toxic sentences

In [15]:
def create_prompt_split_clf(user_text):
    """
    Build a prompt to use in OpenAI client.
    This prompt describes how to split the text into chunks and classify them based on the categories described in the f-string below.

    Args: user_text (str): Text to split and classify.

    Returns:
        A prompt
    """

    prompt = f"""
    Objective: Split the provided text into chunks based on the following categories: 
    'criticism', 'contempt', 'defensiveness', 'stonewalling', 'neutral'.
    If a text chunk does not belong to any specified category, classify it as "other."
    
    Categories Defined:
    
        1. 'criticism': This style involves ad hominem attacks on a partner's character rather than addressing specific issues, distinguishing it from a complaint, which targets a specific behavior.
        2. 'contempt': An extreme form of criticism, characterized by treating a partner with disrespect, sarcasm, and mockery, making them feel despised and worthless.
        3. 'defensiveness': A response to criticism where one attempts to excuse their behavior and avoid taking responsibility, often resulting in blame-shifting.
        4. 'stonewalling': It occurs when one partner withdraws from the interaction, shutting down communication in response to contempt.
        5. 'neutral': This category is used for text that does not exhibit negative communication patterns nor explicitly fits into the categories of criticism, contempt, defensiveness, or stonewalling.
            It includes statements or behaviors that are constructive, positive, or at least not harmful or negative in the context of a relationship.
            Use this category for communication that is understanding, supportive, factual without emotional charge, or otherwise not indicative of conflict.
        6. 'unclear': Use this category if the text does not clearly fit into any of the above categories or if it is ambiguous.
    
    Instructions:
    
        1. Read the text thoroughly.
        2. Identify and extract chunks of text that belong to the specified categories.
        3. Label each chunk with the corresponding category name.
        4. If a text chunk does not fit any of the specified categories, label it as "Other."
        5. Present the categorized text chunks in the following format:
    
    Format:
    
        Category: [Category Name]
        Text: [Extracted Text Chunk]
    
        Category: [Category Name/Other]
        Text: [Extracted Text Chunk]
    
    (Repeat this format for each identified chunk.)
    
    Example:
    
    Given Text: "Why would I do that to you? Are you seriously blaming me for everything? I understand your feelings, and I'm sorry for causing you pain. Maybe you should take a look at your own actions before pointing fingers at me! Let me explain the reasons behind my actions so we can better understand each other."
    
    Expected Output:
    
    Category: criticism
    Text: Why would I do that to you? Are you seriously blaming me for everything?
    
    Category: neutral
    Text: I understand your feelings, and I'm sorry for causing you pain.
    
    Category: criticism
    Text: Maybe you should take a look at your own actions before pointing fingers at me!
    
    Category: neutral
    Text: Let me explain the reasons behind my actions so we can better understand each other.
    
    Please proceed with categorizing the following text: {user_text}
    """

    return prompt

In [16]:
usr_text = text = "What's wrong with me? What's wrong with you for even asking that? Can't you see I'm fine? Why are you always trying to start something."

prompt = create_prompt_split_clf(usr_text)

In [18]:
label, token_dict = clf_sentence(client_openai, "", prompt, LLM_MODEL, TEMPERATURE)


In [20]:
print(label)

Category: criticism
Text: What's wrong with me?

Category: contempt
Text: What's wrong with you for even asking that?

Category: neutral
Text: Can't you see I'm fine?

Category: criticism
Text: Why are you always trying to start something.
