In [1]:
import pandas as pd
import numpy as np

In [2]:
explore_turn = pd.read_csv("explore_turn_common.csv")

## Combine *user_utterance_english* column and *bot_utterance_english* column on each *chat_id*

In [3]:
import pandas as pd

# Concatenate user and bot utterances for each row
explore_turn['combined_text'] = explore_turn['user_utterance_english'].fillna('') + ' ' + explore_turn['bot_utterance_english'].fillna('')

# Group by chat_id and concatenate all combined_text entries into one long text per chat
grouped = explore_turn.groupby('chat_id')['combined_text'].apply(lambda x: ' '.join(x)).reset_index()

grouped.columns = ['chat_id', 'combined_text']

# Preview the result
print(grouped.head())


   chat_id                                      combined_text
0      1.0  Start Hi! I'm MIcha, your motivational chatbot...
1      2.0  Start Hi! I'm MIcha, your motivational chatbot...
2      3.0  Start Hi! I'm MIcha, your motivational chatbot...
3      4.0  Start Hi! I'm MIcha, your motivational chatbot...
4      5.0  Start Hi! I'm MIcha, your motivational chatbot...


In [4]:
print(grouped[grouped['chat_id'] == 1]['combined_text'].values[0])

Start Hi! I'm MIcha, your motivational chatbot. My goal is to help you reflect on your behavior and help you develop a plan for your behavioral change. Please think carefully about my questions and see them primarily as thought impulses that bring you mentally closer to your goal. I'm not here to give you tips or recommendations for action, but to get clear about your motives and values. What behavioral change do you want to talk about today? I want to procrastinate less So you want to procrastinate less. Can you tell me something about why this change is important to you? I could be a lot further in my life. How important is this change on a scale from 0 (not at all) to 10 (very) I don't know. On a scale from 0 (not at all) to 10 (very), how interested are you in doing this change right now? 8 An 8. So you're positive about this change overall. Can you tell me a little more about this assessment? Why didn't you give a higher or lower value? Because I have to change something now, but 

In [5]:
### ! pip install --upgrade openai

In [6]:
import pandas as pd
import openai
import time

client = openai.OpenAI(api_key="*** ***")

def classify_text(text):
    prompt = f"""Please classify the following conversation into one of the three categories:
- sustainable living
- healthier eating
- less procrastination

Only return the exact category name. Do not explain.

Conversation:
{text}
"""
    try:
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": prompt}],
            temperature=0,
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(f"Error: {e}")
        return "Error"

# Apply to the dataframe
results = []
for i, row in grouped.iterrows():
    print(f"Processing chat_id {row['chat_id']}...")
    result = classify_text(row['combined_text'])
    results.append(result)
    time.sleep(2)  # optional: prevent rate limits

grouped['category'] = results

print(grouped.head())

Processing chat_id 1.0...
Processing chat_id 2.0...
Processing chat_id 3.0...
Processing chat_id 4.0...
Processing chat_id 5.0...
Processing chat_id 6.0...
Processing chat_id 7.0...
Processing chat_id 8.0...
Processing chat_id 9.0...
Processing chat_id 10.0...
Processing chat_id 11.0...
Processing chat_id 12.0...
Processing chat_id 13.0...
Processing chat_id 14.0...
Processing chat_id 15.0...
Processing chat_id 16.0...
Processing chat_id 17.0...
Processing chat_id 18.0...
Processing chat_id 19.0...
Processing chat_id 21.0...
Processing chat_id 22.0...
Processing chat_id 23.0...
Processing chat_id 24.0...
Processing chat_id 25.0...
Processing chat_id 26.0...
Processing chat_id 27.0...
Processing chat_id 28.0...
Processing chat_id 29.0...
Processing chat_id 31.0...
Processing chat_id 32.0...
Processing chat_id 33.0...
Processing chat_id 36.0...
Processing chat_id 37.0...
Processing chat_id 38.0...
Processing chat_id 39.0...
Processing chat_id 40.0...
Processing chat_id 41.0...
Processing

In [7]:
grouped['category'].value_counts()

category
less procrastination    99
healthier eating        49
sustainable living      12
Name: count, dtype: int64

## Combine all the user utterance based on chat_id

In [8]:
user_utterance_combine = explore_turn.groupby('chat_id')['user_utterance_english'].apply(lambda x: ' '.join(x)).reset_index()

## Combine all the botm utterance based on chat_id

In [9]:
bot_utterance_combine = explore_turn.groupby('chat_id')['bot_utterance_english'].apply(lambda x: ' '.join(x)).reset_index()

### Check **user_utterance_combine** , **bot_utterance_combine** and **grouped** before final combination

In [10]:
user_utterance_combine.head()

Unnamed: 0,chat_id,user_utterance_english
0,1.0,Start I want to procrastinate less I could be ...
1,2.0,Start I want to procrastinate less There are i...
2,3.0,Start I want to procrastinate less I'm unprodu...
3,4.0,Start I want to eat healthier I want to change...
4,5.0,Start I want to eat healthier I am overweight ...


In [11]:
bot_utterance_combine.head()

Unnamed: 0,chat_id,bot_utterance_english
0,1.0,"Hi! I'm MIcha, your motivational chatbot. My g..."
1,2.0,"Hi! I'm MIcha, your motivational chatbot. My g..."
2,3.0,"Hi! I'm MIcha, your motivational chatbot. My g..."
3,4.0,"Hi! I'm MIcha, your motivational chatbot. My g..."
4,5.0,"Hi! I'm MIcha, your motivational chatbot. My g..."


In [12]:
grouped.head()

Unnamed: 0,chat_id,combined_text,category
0,1.0,"Start Hi! I'm MIcha, your motivational chatbot...",less procrastination
1,2.0,"Start Hi! I'm MIcha, your motivational chatbot...",less procrastination
2,3.0,"Start Hi! I'm MIcha, your motivational chatbot...",less procrastination
3,4.0,"Start Hi! I'm MIcha, your motivational chatbot...",healthier eating
4,5.0,"Start Hi! I'm MIcha, your motivational chatbot...",healthier eating


### Get the final combination

In [13]:
target_behaviour = grouped.merge(user_utterance_combine, on='chat_id', how='outer').merge(bot_utterance_combine, on='chat_id', how='outer')

In [14]:
target_behaviour

Unnamed: 0,chat_id,combined_text,category,user_utterance_english,bot_utterance_english
0,1.0,"Start Hi! I'm MIcha, your motivational chatbot...",less procrastination,Start I want to procrastinate less I could be ...,"Hi! I'm MIcha, your motivational chatbot. My g..."
1,2.0,"Start Hi! I'm MIcha, your motivational chatbot...",less procrastination,Start I want to procrastinate less There are i...,"Hi! I'm MIcha, your motivational chatbot. My g..."
2,3.0,"Start Hi! I'm MIcha, your motivational chatbot...",less procrastination,Start I want to procrastinate less I'm unprodu...,"Hi! I'm MIcha, your motivational chatbot. My g..."
3,4.0,"Start Hi! I'm MIcha, your motivational chatbot...",healthier eating,Start I want to eat healthier I want to change...,"Hi! I'm MIcha, your motivational chatbot. My g..."
4,5.0,"Start Hi! I'm MIcha, your motivational chatbot...",healthier eating,Start I want to eat healthier I am overweight ...,"Hi! I'm MIcha, your motivational chatbot. My g..."
...,...,...,...,...,...
155,188.0,"Start Hi! I'm MIcha, your motivational chatbot...",less procrastination,Start I want to procrastinate less Because I w...,"Hi! I'm MIcha, your motivational chatbot. My g..."
156,189.0,"Start Hi! I'm MIcha, your motivational chatbot...",less procrastination,Start I want to procrastinate less I push myse...,"Hi! I'm MIcha, your motivational chatbot. My g..."
157,190.0,"Start Hi! I'm MIcha, your motivational chatbot...",less procrastination,Start I want to procrastinate less I want to g...,"Hi! I'm MIcha, your motivational chatbot. My g..."
158,191.0,"Start Hi! I'm MIcha, your motivational chatbot...",less procrastination,Start I want to procrastinate less The achieve...,"Hi! I'm MIcha, your motivational chatbot. My g..."


In [15]:
explore_user_common = pd.read_csv("explore_user_common.csv")

In [16]:
target_behaviour = target_behaviour.merge(
    explore_user_common[['chat_id', 'condition']], 
    on='chat_id',
    how='left'
)

### Export

In [17]:
target_behaviour.to_csv('target_behaviour_identification.csv', index=False)