<h3>Import librairies and api keys:</h3>

In [None]:
import os
import openai
import pandas as pd
import numpy as np
from tqdm import tqdm
import time
import threading

openai.api_key = "--- Confidential ---"

<h3>Get data:</h3>

In [None]:
df_cc = pd.read_csv('counselchat-data.csv')
df_red = pd.read_csv('offmychest_dyadic.csv')

In [None]:
learning_power = 1400

In [None]:
df_cc.head()

In [None]:
df_red.head()

<h3>Emotion detection:</h3>

In [None]:
causes_class = """
- Love Relationship Issues
- Mental Issues
- Physical Health
- Personal Struggles
- Family Conflict
"""

emotions_class = """
- frustration
- anxiety
- insecurity
- helplessness
- anger
- stress
- fear
- betrayal
- confusion
- concern
- sadness
- love
- desperation
- depression
- hurt
- disappointment
- jealousy
- regret
- uncertainty
- disgust
- loneliness
- conflict
- worry
- longing
- hopelessness
- hope
- worried
- distrust
"""

In [None]:
class OpenAIRequestThread(threading.Thread):
    def __init__(self, content):
        self.response = None
        self.content = content
        threading.Thread.__init__(self)

    def run(self):
        try:
            self.response = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=[{"role": "user", "content": self.content}],
                temperature=0
            )
        except: {}

<h4> Prompting texts: </h4>

In [None]:
prompt_eng_pre = """
Assume the role of a specialist in text sentiment analysis. 
Your task involves scrutinizing a text provided to you, written by an individual. 
Your objective is to pinpoint each emotion expressed by the writer.
The only emotions possible are:""" + emotions_class +"""Do not use other emotions.
Subsequently, link every emotion to a specific cause. 
The only possible causes are:""" + causes_class + """
These are the only causes that should be utilized in your analysis, write them as they are, do not add details.
"""

prompt_eng_post = """
When delivering your analysis, ensure it's structured as follows: 
{Emotion: 'specified emotion' / Cause: 'identified cause'}
{Emotion: 'specified emotion' / Cause: 'identified cause'}
...
Your findings should solely pertain to the emotions articulated by the author.
Only output the structure described, do not add anything else.
Again the only emotions that you can use are:""" + emotions_class +"""
And the only causes that you can use are: """ + causes_class +"""
"""

print(prompt_eng_pre + " \n -----[DATA to insert]----- \n" + prompt_eng_post)

<h4>Send to chatGPT:</h4>

In [None]:
results_ed_history = [] 
for i in tqdm(range(0,learning_power)):
    
    try:
        prompt_request = ""
        prompt_request += prompt_eng_pre
        title_prompt = "The title of the text number " + str(i) + " is: "+str(df_cc.iloc[i]["questionTitle"])
        topic_prompt = "The topic of the text " + str(i) + " is: "+str(df_cc.iloc[i]["topics"])
        text_prompt = "The text is: \"\"\" "+str(df_cc.iloc[i]["questionText"]) +"\"\"\""
        prompt_request += title_prompt + "\n" + topic_prompt + "\n" + text_prompt + "\n\n"

        prompt_request += prompt_eng_post

        request_thread = OpenAIRequestThread(prompt_request)
        request_thread.start()
        request_thread.join(timeout=10)
        
        if request_thread.is_alive():
            request_thread._stop() 
            results_ed_history.append("Empty")
        else:
            response = request_thread.response
            results_ed_history.append(response.choices[0].message.content)
        
    except:
        results_ed_history.append("Empty")
    
print(results_ed_history)

<h3>MITI Categories detection:</h3>

<h4> Prompting texts: </h4>

In [None]:
prompt_eng_pre = """
You have been provided a professional counselor's response to a help-seeking message: 
"""

prompt_eng_post = """
Each sentence should be classified under one or more of the following categories:
- Closed question
- Open question
- Simple reflection
- Complex reflection
- Give information
- Advise with permission
- Affirm
- Emphasize Autonomy
- Support
- Advise without permission
- Confront
- Direct
- Warn
- Selft-disclosure
- Other

Your categorizations need to look like this:
{Closed question, Open question} if the first sentence is Closed question and the second sentence is Open question.
Output strictly this, nothing else.


These categories are part of a framework known as the Motivational Interviewing Treatment Integrity (MITI) system. 
To enhance your understanding of these categories, here is some additional information:

1. Closed Question: 
“Questions that can be answered with an yes/no response or a very restricted range of answers. “
Example: “Do you think this is an advantage? “
2. Open Question: 
“Questions that allow a wide range of possible answers. “
Example: “What is your take on that? “
3. Simple Reflection: 
“Repetition, rephrasing, or paraphrasing of speaker’s previous statement. “
Example: It sounds like you’re feeling worried. 
4. Complex Reflection: 
“Repeating or rephrasing the previous statement of the speaker but adding substantial meaning/emphasis to it. “
Example: “Speaker: Mostly, I would change for future generations. Listener: It sounds like you have a strong feeling of responsibility. “
5. Give Information: 
“Educating, providing feedback, or giving an opinion without advising. 
Example: “Logging your cravings is important as cravings often lead to relapses. “
MI Adherent Behaviour Codes: 
6. Advise with Permission: “Advising when the speaker asks directly for advice. Indirect forms of permission can also occur, such as when the listener says to disregard the advice as appropriate.”
Example: “If you agree with it, we could try to brainstorm some ideas that might help. “
7. Affirm: “Encouraging the speaker by saying something positive or complimentary. “
Example: You should be proud of yourself for your past’s efforts. 
8. Emphasize Autonomy: “Emphasizing the speaker’s control, freedom of choice, autonomy, and ability to decide. “
Example: “It is really up to you to decide. “
9. Support: “Statements of compassion or sympathy. “
Example: “I know it’s really hard to stop drinking. “
MI Non-Adherent Behaviour Codes: 
10. Advise without Permission: “Making suggestions, offering solutions or possible actions without first obtaining permission from the speaker. “
Example: “You should simply scribble a note that reminds you to take a break. “
11. Confront: “Directly and unambiguously disagreeing, arguing, blaming, criticizing, or questioning the speaker’s honesty. “
Example: Yes, you are an alcoholic. You might not think so, but you are. 
12. Direct: “Giving orders, commands, or imperatives. “
Example: “Don’t do that! “
13. Warn: “A statement or event that warns of something or that serves as a cautionary example. “
Example: “Be careful, DO NOT stop taking meds without discussing with your doctor. “
Other: 
14. Self-Disclose: “The listener discloses his/her personal information or experiences. “
Example: “I used to be similar where I get obsessed about how people look. “
15. Other: “Statements that are not classified under the above codes.”
Example: “Good morning, Hi there.”

Again, output only what was asked ({MITI categories}), do not respond anything else."""

print(prompt_eng_pre + " \n -----[DATA to insert]----- \n " + prompt_eng_post)

<h4> Send to chatGPT </h4>

In [None]:
results_miti_history = []
for i in tqdm(range(0,learning_power)):
    try:
        prompt_request = ""
        prompt_request += prompt_eng_pre
        text_prompt = "The response is: \"\"\" "+df_cc.iloc[i]["answerText"] +"\"\"\""
        prompt_request += "\n" + text_prompt + "\n\n"

        prompt_request += prompt_eng_post

        request_thread = OpenAIRequestThread(prompt_request)
        request_thread.start()
        request_thread.join(timeout=10)
        
        if request_thread.is_alive():
            request_thread._stop() 
            results_miti_history.append("Empty")
        else:
            response = request_thread.response
            results_miti_history.append(response.choices[0].message.content)
        
    except:
        results_miti_history.append("Empty")
    
print(results_miti_history)

<h3>Join and learn:</h3>

In [None]:
import re

def rearrange_lists(miti_categories, emotions):
    # Dictionary to hold the final structure
    final_structure = {}

    # Iterate over each element in the MITI categories list
    for i in range(len(miti_categories)):
        # Parse the categories set and the emotions set
        categories_set = [item.strip() for item in miti_categories[i].strip("{}").split(",")]
        emotions_set = re.findall(r"{Emotion: '([^']*)' / Cause: '([^']*)'}", emotions[i])

        # Iterate over each category
        for category in categories_set:
            # If the category is not already in the dictionary, add it
            if category not in final_structure:
                final_structure[category] = {}

            # Iterate over each emotion and its cause
            for emotion, cause in emotions_set:
                # Convert emotion to lowercase
                emotion = emotion.lower()

                # If the cause is not already in the dictionary under the category, add it
                if cause not in final_structure[category]:
                    final_structure[category][cause] = []

                # Add the emotion to the list of emotions for the cause under the category
                final_structure[category][cause].append(emotion)

    # Generate the final structured string
    structured_string = ""
    for category, causes in final_structure.items():
        structured_string += f'"{category}" = {{\n'
        for cause, emotions in causes.items():
            emotions_string = ", ".join([f'"{emotion}"' for emotion in emotions])
            structured_string += f'    "{cause}": {{{emotions_string}}},\n'
        structured_string += '}\n'

    return structured_string

# Test with the given inputs
print(rearrange_lists(results_miti_history, results_ed_history))

<h3>Sankey plot</h3>

In [None]:
import re
import plotly.graph_objects as go

# Your data
data = rearrange_lists(results_miti_history, results_ed_history)

causes = [item.strip()[1:].strip() for item in causes_class.split('\n') if item.strip()]
emotions = [item.strip()[1:].strip() for item in emotions_class.split('\n') if item.strip()]
miti_categories = ['Closed question', 'Open question', 'Simple reflection', 'Complex reflection', 'Give information', 
                   'Advise with permission', 'Affirm', 'Emphasize Autonomy', 'Support', 'Advise without permission', 
                   'Confront', 'Direct', 'Warn', 'Self-disclosure', 'Other']

def extract_mappings(data):
    mappings = {}
    current_miti = None
    for line in data.split("\n"):
        line = line.strip()
        if "=" in line:
            current_miti = line.split("=")[0].strip(" \"").lower()
        elif current_miti and ":" in line:
            cause, emotions_list = line.split(":")
            cause = cause.strip(" \"").lower()
            emotions_extract = re.findall(r'\{(.*?)\}', emotions_list)
            if emotions_extract:  # Check if there are any emotions
                emotions_list = emotions_extract[0].split(', ')
                for emotion in emotions_list:
                    emotion = emotion.strip(" \"\'").lower()
                    if emotion:  # Ensure emotion is not an empty string
                        # Add to mappings dictionary and increment count if already exists, and if they are in the predefined lists
                        if emotion in [e.lower() for e in emotions] and cause in [c.lower() for c in causes] and current_miti in [m.lower() for m in miti_categories]:
                            mappings[(emotion, cause)] = mappings.get((emotion, cause), 0) + 1
                            mappings[(cause, current_miti)] = mappings.get((cause, current_miti), 0) + 1
    return mappings

# Function to create Sankey diagram
def create_sankey(mappings):
    label_list = [label.lower() for label in emotions + causes + miti_categories]  # Case-insensitive comparison
    source_indices = []
    target_indices = []
    values = []

    for mapping in mappings:
        source, target = mapping
        if source in label_list and target in label_list:
            source_indices.append(label_list.index(source))
            target_indices.append(label_list.index(target))
            values.append(mappings[mapping])  # Use count from mappings as value
            
    data_stats = pd.DataFrame({
        'source': source_indices,
        'target': target_indices,
        'value': values
    })

    fig = go.Figure(data=[go.Sankey(
        node=dict(
            pad=15,
            thickness=30,
            line=dict(color="red", width=0.5),
            label=label_list
        ),
        link=dict(
            source=source_indices,
            target=target_indices,
            value=values,
        ))])

    fig.update_layout(title_text="Emotion to Cause to MITI Category Sankey Diagram", font_size=10)
    fig.show()

mappings = extract_mappings(data)
create_sankey(mappings)


<h3>Statistics on sankey diagram: </h3>

In [None]:
# Convert all categories to lowercase for consistent comparison
emotions_lower = [emotion.lower() for emotion in emotions]
causes_lower = [cause.lower() for cause in causes]
miti_categories_lower = [miti_category.lower() for miti_category in miti_categories]

# Count of connections for each node by category
emotion_to_cause_counts = defaultdict(int)
cause_to_miti_counts = defaultdict(int)

for link, count in mappings.items():
    if link[0] in emotions_lower and link[1] in causes_lower:
        emotion_to_cause_counts[link] += count
    elif link[0] in causes_lower and link[1] in miti_categories_lower:
        cause_to_miti_counts[link] += count

# Most common and least common links by category
most_common_emotion_to_cause = max(emotion_to_cause_counts, key=emotion_to_cause_counts.get) if emotion_to_cause_counts else None
least_common_emotion_to_cause = min(emotion_to_cause_counts, key=emotion_to_cause_counts.get) if emotion_to_cause_counts else None

most_common_cause_to_miti = max(cause_to_miti_counts, key=cause_to_miti_counts.get) if cause_to_miti_counts else None
least_common_cause_to_miti = min(cause_to_miti_counts, key=cause_to_miti_counts.get) if cause_to_miti_counts else None

# Print out the links and their counts
if most_common_emotion_to_cause is not None:
    print(f"Most common emotion to cause link: {most_common_emotion_to_cause}, count: {emotion_to_cause_counts[most_common_emotion_to_cause]}")
if least_common_emotion_to_cause is not None:
    print(f"Least common emotion to cause link: {least_common_emotion_to_cause}, count: {emotion_to_cause_counts[least_common_emotion_to_cause]}")
if most_common_cause_to_miti is not None:
    print(f"Most common cause to MITI link: {most_common_cause_to_miti}, count: {cause_to_miti_counts[most_common_cause_to_miti]}")
if least_common_cause_to_miti is not None:
    print(f"Least common cause to MITI link: {least_common_cause_to_miti}, count: {cause_to_miti_counts[least_common_cause_to_miti]}")

from collections import Counter

# Count the number of unique links each node has
emotion_link_counts = Counter(link[0] for link in emotion_to_cause_counts)
cause_link_counts = Counter(link[0] for link in cause_to_miti_counts)
miti_link_counts = Counter(link[1] for link in cause_to_miti_counts)

# Find the node with the most links in each category
most_linked_emotion = max(emotion_link_counts, key=emotion_link_counts.get, default=None)
most_linked_cause = max(cause_link_counts, key=cause_link_counts.get, default=None)
most_linked_miti = max(miti_link_counts, key=miti_link_counts.get, default=None)

print(f"Emotion with most unique links: {most_linked_emotion}, number of links: {emotion_link_counts[most_linked_emotion] if most_linked_emotion else None}")
print(f"Cause with most unique links: {most_linked_cause}, number of links: {cause_link_counts[most_linked_cause] if most_linked_cause else None}")
print(f"MITI category with most unique links: {most_linked_miti}, number of links: {miti_link_counts[most_linked_miti] if most_linked_miti else None}")

# Count the total number of links each node has (not unique)
emotion_total_links = Counter()
cause_total_links = Counter()
miti_total_links = Counter()

for link, count in emotion_to_cause_counts.items():
    emotion_total_links[link[0]] += count

for link, count in cause_to_miti_counts.items():
    cause_total_links[link[0]] += count
    miti_total_links[link[1]] += count

# Calculate total links in each category
total_emotion_links = sum(emotion_total_links.values())
total_cause_links = sum(cause_total_links.values())
total_miti_links = sum(miti_total_links.values())

# Calculate percentage of total links for most linked nodes in each category
if most_linked_emotion_total:
    emotion_percentage = emotion_total_links[most_linked_emotion_total] / total_emotion_links * 100
else:
    emotion_percentage = None

if most_linked_cause_total:
    cause_percentage = cause_total_links[most_linked_cause_total] / total_cause_links * 100
else:
    cause_percentage = None

if most_linked_miti_total:
    miti_percentage = miti_total_links[most_linked_miti_total] / total_miti_links * 100
else:
    miti_percentage = None

print(f"Emotion with most total links: {most_linked_emotion_total}, number of links: {emotion_total_links[most_linked_emotion_total] if most_linked_emotion_total else None}, percentage of total: {emotion_percentage:.2f}%")
print(f"Cause with most total links: {most_linked_cause_total}, number of links: {cause_total_links[most_linked_cause_total] if most_linked_cause_total else None}, percentage of total: {cause_percentage:.2f}%")
print(f"MITI category with most total links: {most_linked_miti_total}, number of links: {miti_total_links[most_linked_miti_total] if most_linked_miti_total else None}, percentage of total: {miti_percentage:.2f}%")


<h3>Application:</h3>

In [None]:
# Initialize dictionaries to store mappings
emotion_to_cause = {}
cause_to_miti = {}

# Separate mappings into two separate dictionaries for easier access
for (first, second), count in mappings.items():
    if first in [e.lower() for e in emotions]:
        if first not in emotion_to_cause:
            emotion_to_cause[first] = []
        emotion_to_cause[first].append((second, count))
    else:
        if first not in cause_to_miti:
            cause_to_miti[first] = []
        cause_to_miti[first].append((second, count))

# Create a textual representation
textual_representation = ""
for emotion, cause_counts in emotion_to_cause.items():
    for cause, count in cause_counts:
        textual_representation += f"{count} instances flow from '{emotion}' to '{cause}'.\n"
        if cause in cause_to_miti:
            for miti, miti_count in cause_to_miti[cause]:
                textual_representation += f"    Of these, {miti_count} instances flow to '{miti}'.\n"

print(textual_representation)

In [None]:
import re

data = textual_representation

# Replace "X instances flow from A to B" with "The emotion A was caused X times by B"
reformatted_data = re.sub(r"(\d+) instances flow from '(.+)' to '(.+)'", 
                          r"\n\nThe emotion \2 was caused \1 times by \3:", 
                          data)

# Replace "Of these, Y instances flow to C" with "This was addressed Y times by using C"
reformatted_data = re.sub(r"\s+Of these, (\d+) instances flow to '(.+)'", 
                          r"\nThis was addressed \1 times by using \2.", 
                          reformatted_data)

# Split the string into a table (list of strings) at "\n\n" (double newlines)
reformatted_data_table = [item.strip() for item in reformatted_data.split('\n\n') if item.strip()]

# print reformatted data
for item in reformatted_data_table:
    print(item)


In [None]:
prompt_eng_pre = """I need you to evaluate therapeutic responses to messages seeking for help.
The Motivational Interviewing Treatment Integrity (MITI) are types of sentences used for therapeutic responses.
I will teach you how to do it by showing you statistics on the MITI categories of sentences that were used by professional counselor to adresse particular emotion that was caused by a specific cause:"""

prompt_eng_post = """Grade how professional and therapeutic the response is and justify it using the statistics you learned. \n"""

print(prompt_eng_pre + " -----[DATA to insert]----- \n \n" + prompt_eng_post)

In [None]:
message_history = []
message_history.append({"role": "user", "content": prompt_eng_pre})

for item in reformatted_data_table:
    message_history.append({"role": "user", "content": item[:50]})

text_prompt_0 = "The message is: \"\"\" "+df_red.iloc[2]["text"] +"\"\"\""
text_prompt = "The response to this message I need you to evaluate is: \"\"\" "+df_red.iloc[3]["text"] +"\"\"\""

message_history.append({"role": "user", "content": text_prompt_0 + "\n" + text_prompt})
    
message_history.append({"role": "user", "content": text_prompt_0 + "\n" + prompt_eng_post})

In [None]:
print(message_history)

In [None]:

response = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    messages=message_history,
    temperature=0)
    
print(response.choices[0].message.content)