In [1]:
import openai
import pandas as pd
import time

In [2]:
client = openai.OpenAI(api_key="*** ***")

In [3]:
df_dialogues = pd.read_csv("../target_behaviour_identification.csv")
df_topics = pd.read_csv("topics_after_deduplication.csv")  

In [4]:
# Construct the topic list text for the prompt
topic_list_text = "\n".join([
    f"{i+1}. {row['Topics']}: {row['Definition']}"
    for i, row in df_topics.iterrows()
])

In [5]:
# Store results in assignments
assignments = []


# Iterate through each dialogue
for i, row in df_dialogues.iterrows():
    chat_id = row['chat_id']
    dialogue = row['combined_text']

    prompt = f"""You are a topic assignment assistant.

Your task is to assign **one or more** relevant topics from the list below to the given user-AI conversation. 
It is common for a single dialogue to reflect multiple themes. So please consider all possible relevant topics.

For **each assigned topic**, include:
- Topic: the topic name (from the list)
- Quote: one sentence from the dialogue that supports this topic

Only choose from the following topics and definitions:

{topic_list_text}

Now analyze the conversation below.

Dialogue:
---
{dialogue}
---

Respond in the following format. If multiple topics apply, list them all:

Topic: [Topic Name 1]  
Quote: "[Exact quote from the dialogue]"

Topic: [Topic Name 2]  
Quote: "[Exact quote from the dialogue]"

... (as many as needed)
"""

    try:
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": prompt}],
            temperature=0,
            max_tokens=500
        )
        output = response.choices[0].message.content
        assignments.append({"chat_id": chat_id, "assigned_topics": output})
        print(f"Assigned topics for chat {chat_id}")
        time.sleep(1.2)

    except Exception as e:
        print(f"Error for chat {chat_id}: {e}")
        continue

Assigned topics for chat 1.0
Assigned topics for chat 2.0
Assigned topics for chat 3.0
Assigned topics for chat 4.0
Assigned topics for chat 5.0
Assigned topics for chat 6.0
Assigned topics for chat 7.0
Assigned topics for chat 8.0
Assigned topics for chat 9.0
Assigned topics for chat 10.0
Assigned topics for chat 11.0
Assigned topics for chat 12.0
Assigned topics for chat 13.0
Assigned topics for chat 14.0
Assigned topics for chat 15.0
Assigned topics for chat 16.0
Assigned topics for chat 17.0
Assigned topics for chat 18.0
Assigned topics for chat 19.0
Assigned topics for chat 21.0
Assigned topics for chat 22.0
Assigned topics for chat 23.0
Assigned topics for chat 24.0
Assigned topics for chat 25.0
Assigned topics for chat 26.0
Assigned topics for chat 27.0
Assigned topics for chat 28.0
Assigned topics for chat 29.0
Assigned topics for chat 31.0
Assigned topics for chat 32.0
Assigned topics for chat 33.0
Assigned topics for chat 36.0
Assigned topics for chat 37.0
Assigned topics for

In [9]:
df_assignments = pd.DataFrame(assignments)

In [13]:
df_assignments.to_csv("raw_assignments_withQuote.csv", index=False)

In [10]:
df_assignments.loc[0, 'assigned_topics']

'Topic: Procrastination  \nQuote: "I want to procrastinate less."\n\nTopic: Self-Reflection  \nQuote: "Please think carefully about my questions and see them primarily as thought impulses that bring you mentally closer to your goal."\n\nTopic: Emotional Well-being  \nQuote: "It sounds like you want a higher quality of life and think that less procrastination could contribute to it."\n\nTopic: Family and Social Relationships  \nQuote: "So you would like to spend more time with your loved ones if you were to procrastinate less."'

In [11]:
df_assignments.loc[1, 'assigned_topics']

'Topic: Procrastination  \nQuote: "I see the importance, but I can\'t catch up."\n\nTopic: Goal Setting  \nQuote: "It can be helpful to set smaller, achievable goals."\n\nTopic: Patience  \nQuote: "Stay patient with yourself and celebrate small successes."'

In [12]:
import re

# Store structured results
topics_assigned = []

for i, row in df_assignments.iterrows():
    chat_id = row['chat_id']
    text = row['assigned_topics']

    # Use regular expression to extract Topic and corresponding Quote
    topic_blocks = re.findall(r"Topic:\s*(.*?)\s*Quote:", text, re.DOTALL)
    
    for topic in topic_blocks:
        topic_clean = topic.strip().replace('"', '')
        if topic_clean:
            topics_assigned.append({
                "chat_id": chat_id,
                "topic": topic_clean
            })

# Convert to DataFrame
topics_assigned = pd.DataFrame(topics_assigned)

# Save as CSV
topics_assigned.to_csv("topics_assigned.csv", index=False)

print(f"Done! Parsed {len(df_topics)} topic assignments.")

Done! Parsed 498 topic assignments.
