In [1]:
import pandas as pd
import numpy as np

In [2]:
dat = pd.read_csv("pilot_test_ratings_new_zuweisung.csv", index_col="Unnamed: 0")
orig_chat_file = pd.read_csv("chats.csv")
orig_chat_file.rename(columns={"Chat_id": "original_chat_id"}, inplace=True)

In [8]:
dat["advice_id"].value_counts().describe()

count    49.000000
mean      3.122449
std       0.665603
min       2.000000
25%       3.000000
50%       3.000000
75%       3.000000
max       5.000000
Name: count, dtype: float64

In [5]:
dat.replace({"Mixtral": "mixtral"}, inplace=True)

In [6]:
dat.groupby("model").rating.describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
claude,66.0,4.106061,1.216917,1.0,4.0,5.0,5.0,5.0
gpt-4,24.0,4.333333,0.916831,2.0,4.0,5.0,5.0,5.0
llama3,39.0,4.230769,1.012073,2.0,4.0,5.0,5.0,5.0
mixtral,24.0,4.5,0.932505,2.0,4.0,5.0,5.0,5.0


In [8]:
mean_rating = dat.groupby("advice_id").rating.mean()
dat['mean_rating'] = dat.groupby("advice_id")['rating'].transform('mean')
dat['concatenated_feedback'] = dat.groupby('advice_id')['text_feedback'].transform(lambda x: ' \n'.join(x))
dat = pd.merge(dat, orig_chat_file[["Victim", "culprit", "original_chat_id"]], on="original_chat_id")

In [9]:
dat.head()

Unnamed: 0,id,prolific_id,rating,text_feedback,advice_id,rating_chat,advice_text,context,model,chat_id,original_advice_id,chat_text,interaction_context,original_chat_id,mean_rating,concatenated_feedback,Victim,culprit
0,13,666b0b16bb1fd84cc54b145e,3,I think that regardless of having issues we sh...,26,,The email conversation seems professional and ...,Advice for victim,claude,7,26,"From: Sarah\nTo: Michael, Tom\nSubject: Urgent...",workplace-no-hierarchy,39,4.0,I think that regardless of having issues we sh...,Michael,Sarah
1,14,66608d75bcf0d1932679c357,3,Michael did an great job by sending the email ...,26,,The email conversation seems professional and ...,Advice for victim,claude,7,26,"From: Sarah\nTo: Michael, Tom\nSubject: Urgent...",workplace-no-hierarchy,39,4.0,I think that regardless of having issues we sh...,Michael,Sarah
2,15,666b0b16bb1fd84cc54b145e,5,There are lots of ways to deal with a situatio...,24,,"You handled the situation quite well, expressi...",Advice for culprit,mixtral,6,24,Subject: Project Deadline Concerns\n\nSender: ...,Workplace-hierarchy,33,4.333333,There are lots of ways to deal with a situatio...,Colleague 1,Boss
3,16,6661473b27d4a66955363c2b,4,I think it addresses some of the key issues we...,26,,The email conversation seems professional and ...,Advice for victim,claude,7,26,"From: Sarah\nTo: Michael, Tom\nSubject: Urgent...",workplace-no-hierarchy,39,4.0,I think that regardless of having issues we sh...,Michael,Sarah
4,17,66608d75bcf0d1932679c357,2,He shouldn’t have been rude to him but state t...,1,,"After analyzing the conversation, I think I've...",Advice for victim,llama3,1,1,"John: Hey guys, I need some help moving this w...",friendship,44,3.666667,He shouldn’t have been rude to him but state t...,Mike,John


In [10]:
dat_filtered = dat[dat["mean_rating"] >= 4]
len(dat_filtered)

127

In [11]:
dat_relationship = dat_filtered[dat_filtered["interaction_context"] == "Relationship"].copy()
dat_friendship = dat_filtered[dat_filtered["interaction_context"] == "friendship"].copy()
dat_colleagues = dat_filtered[dat_filtered["interaction_context"] == "workplace-no-hierarchy"].copy()
dat_manager = dat_filtered[dat_filtered["interaction_context"] == "Workplace-hierarchy"].copy()

len(dat_relationship), len(dat_friendship), len(dat_colleagues), len(dat_manager)

(33, 38, 27, 29)

In [49]:
import json

def create_few_shot_files(dat, filename, context):
    advice = dat[dat["context"] == context].copy() # Advice for victim, Advice for culprit, Next reply

    relationship_dict = {
        "relationship": "romantic partner",
        "friendship": "friends",
        "workplace-no-hierarchy": "colleagues",
        "workplace-hierarchy": "manager"
    }

    conversational_contexts = {
        "Advice for culprit": "I feel I might have acted wrong in the conversation. Did I make any mistakes? What could I do better?",
        "Advice for victim": "The conversation leaves me uneasy, but I do not know why. What went wrong in this interaction?",
        "Next reply": "I don't like the way this interaction went. What should I say next? Why would that be the right reaction?"
    }

    column = {
        "Advice for culprit": "culprit",
        "Advice for victim": "Victim",
        "Next reply": "Victim"
    }

    few_shot_prompt = [{"role": "system", "content": """You are an experienced psychologist and relationship and communication expert. 
         You know everything about how people communicate and can pinpoint and recognize the most subtle forms of manipulation and toxic behaviour."""}]
    for index, row in advice.iterrows():
        user_string = f"""The following is a chat between me ({row[column.get(context)]}) and my {relationship_dict.get(filename)}: 
                            
                            {row['chat_text']}
                            
                            {conversational_contexts.get(context)}"""
        assistant_string = row['advice_text']
        few_shot_prompt.append({"role": "user", "content": user_string})
        few_shot_prompt.append({"role": "assistant", "content": assistant_string})

    '''    
    with open(f"../../assistant/few_shot_prompts/{filename}-{context}.txt", "w") as file:
        file.write(str(few_shot_prompt))'''
    with open(f"../../assistant/few_shot_prompts/{filename}-{context}.json", "w") as file:
        json.dump(few_shot_prompt, file)
        

In [13]:
dat_relationship.drop_duplicates(subset="advice_id", inplace=True)
dat_relationship.context.value_counts()

context
Advice for culprit    4
Advice for victim     4
Next reply            3
Name: count, dtype: int64

In [14]:
dat_relationship.head()

Unnamed: 0,id,prolific_id,rating,text_feedback,advice_id,rating_chat,advice_text,context,model,chat_id,original_advice_id,chat_text,interaction_context,original_chat_id,mean_rating,concatenated_feedback,Victim,culprit
14,27,614150a290b5ce2fc465f3eb,5,This is good advice the romantic relationship ...,41,,"In this situation, it's essential to communica...",Next reply,mixtral,10,49,"A: Hey, are you free tonight? I thought we cou...",Relationship,29,4.666667,This is good advice the romantic relationship ...,B,A
36,56,6120d7e955973ace858310b6,5,I love the advice as it will help the interact...,49,wrong,While your intentions might have been playful ...,Advice for culprit,llama3,12,48,"A: Hey, how was your day?\n\nB: It was good, d...",Relationship,22,4.666667,I love the advice as it will help the interact...,B,A
40,60,668157a33586d7e8df8fbff3,4,I think the advice the person has given is ver...,46,mistreated,"At first glance, the conversation seems playfu...",Advice for victim,llama3,12,45,"A: Hey, how was your day?\n\nB: It was good, d...",Relationship,22,4.0,I think the advice the person has given is ver...,B,A
47,65,668048aaad62048386c41b2e,5,"Similar to first scenario, I think this is per...",42,neutral,The conversation itself is quite sweet and you...,Advice for victim,Mixtral,11,41,"A: Hey, how was your day today?\n\nB: Pretty g...",Relationship,21,4.666667,"Similar to first scenario, I think this is per...",B,A
55,73,60bbc368a316ee5421a1153b,5,The provided advice considers both of the part...,40,mistreated,I would advise against simply going along with...,Next reply,claude,10,40,"A: Hey, are you free tonight? I thought we cou...",Relationship,29,5.0,The provided advice considers both of the part...,B,A


In [51]:
create_few_shot_files(dat_relationship[dat_relationship["context"] == "Advice for culprit"], "relationship", "Advice for culprit")
create_few_shot_files(dat_relationship[dat_relationship["context"] == "Advice for victim"], "relationship", "Advice for victim")
create_few_shot_files(dat_relationship[dat_relationship["context"] == "Next reply"], "relationship", "Next reply")

In [57]:
create_few_shot_files(dat_friendship[dat_friendship["context"] == "Advice for culprit"], "friendship", "Advice for culprit")
create_few_shot_files(dat_friendship[dat_friendship["context"] == "Advice for victim"], "friendship", "Advice for victim")

create_few_shot_files(dat_colleagues[dat_colleagues["context"] == "Advice for culprit"], "workplace-no-hierarchy", "Advice for culprit")
create_few_shot_files(dat_colleagues[dat_colleagues["context"] == "Advice for victim"], "workplace-no-hierarchy", "Advice for victim")

create_few_shot_files(dat_manager[dat_manager["context"] == "Advice for culprit"], "workplace-hierarchy", "Advice for culprit")
create_few_shot_files(dat_manager[dat_manager["context"] == "Advice for victim"], "workplace-hierarchy", "Advice for victim")
create_few_shot_files(dat_manager[dat_manager["context"] == "Next reply"], "workplace-hierarchy", "Next reply")

In [51]:
dat_friendship.drop_duplicates(subset="advice_id", inplace=True)
dat_friendship.context.value_counts()

context
Advice for culprit    6
Advice for victim     5
Name: count, dtype: int64

In [52]:
dat_colleagues.drop_duplicates(subset="advice_id", inplace=True)
dat_colleagues.context.value_counts()

context
Advice for culprit    5
Advice for victim     3
Name: count, dtype: int64

In [53]:
dat_manager.drop_duplicates(subset="advice_id", inplace=True)
dat_manager.context.value_counts()

context
Advice for victim     5
Advice for culprit    3
Next reply            2
Name: count, dtype: int64