In [1]:
import pandas as pd
import json

def create_few_shot_files(dat, filename, context):
    """
    Generate a few-shot learning prompt file based on the provided context and save it as a JSON file.

    Parameters:
    dat (DataFrame): Data containing chat contexts and advice.
    filename (str): Identifier for the type of relationship context (e.g., 'relationship', 'friendship').
    context (str): Specific advice context ('Advice for culprit', 'Advice for victim', 'Next reply').
    """

    # Filter data to include only rows matching the given context
    advice = dat[dat["context"] == context].copy() # Advice for victim, Advice for culprit, Next reply

    # Mapping of filename to relationship context
    relationship_dict = {
        "relationship": "romantic partner",
        "friendship": "friends",
        "workplace-no-hierarchy": "colleagues",
        "workplace-hierarchy": "manager"
    }

    # Mapping of context to conversational prompt
    conversational_contexts = {
        "Advice for culprit": "I feel I might have acted wrong in the conversation. Did I make any mistakes? What could I do better?",
        "Advice for victim": "The conversation leaves me uneasy, but I do not know why. What went wrong in this interaction?",
        "Next reply": "I don't like the way this interaction went. What should I say next? Why would that be the right reaction?"
    }

    # Mapping of context to relevant column in data
    column = {
        "Advice for culprit": "culprit",
        "Advice for victim": "Victim",
        "Next reply": "Victim"
    }

    # Initial few-shot prompt for the system role
    few_shot_prompt = [{"role": "system", "content": """You are an experienced psychologist and relationship and communication expert. 
         You know everything about how people communicate and can pinpoint and recognize the most subtle forms of manipulation and toxic behaviour."""}]
    for index, row in advice.iterrows():
        # Construct the user's chat and corresponding assistant's advice
        user_string = f"""The following is a chat between me ({row[column.get(context)]}) and my {relationship_dict.get(filename)}: 
                            
                            {row['chat_text']}
                            
                            {conversational_contexts.get(context)}"""
        assistant_string = row['advice_text']
        # Append user and assistant messages to the few-shot prompt
        few_shot_prompt.append({"role": "user", "content": user_string})
        few_shot_prompt.append({"role": "assistant", "content": assistant_string})

    # Write the constructed few-shot prompt to a JSON file
    with open(f"../../assistant/few_shot_prompts/{filename}-{context}.json", "w") as file:
        json.dump(few_shot_prompt, file)
        

In [2]:
# Reading in file of rated advice

dat = pd.read_csv("rated_advice.csv", index_col="Unnamed: 0")
orig_chat_file = pd.read_csv("chats.csv")
orig_chat_file.rename(columns={"Chat_id": "original_chat_id"}, inplace=True)
dat.replace({"Mixtral": "mixtral"}, inplace=True)

In [6]:
dat.groupby("model").rating.describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
claude,66.0,4.106061,1.216917,1.0,4.0,5.0,5.0,5.0
gpt-4,24.0,4.333333,0.916831,2.0,4.0,5.0,5.0,5.0
llama3,39.0,4.230769,1.012073,2.0,4.0,5.0,5.0,5.0
mixtral,24.0,4.5,0.932505,2.0,4.0,5.0,5.0,5.0


In [8]:
# Filter advice by mean rating. Only advice with a mean rating of at least 4 (out of 5) is used for few-shot learning
mean_rating = dat.groupby("advice_id").rating.mean()
dat['mean_rating'] = dat.groupby("advice_id")['rating'].transform('mean')
dat['concatenated_feedback'] = dat.groupby('advice_id')['text_feedback'].transform(lambda x: ' \n'.join(x))
dat = pd.merge(dat, orig_chat_file[["Victim", "culprit", "original_chat_id"]], on="original_chat_id")
dat_filtered = dat[dat["mean_rating"] >= 4]

In [11]:
dat_relationship = dat_filtered[dat_filtered["interaction_context"] == "Relationship"].copy()
dat_friendship = dat_filtered[dat_filtered["interaction_context"] == "friendship"].copy()
dat_colleagues = dat_filtered[dat_filtered["interaction_context"] == "workplace-no-hierarchy"].copy()
dat_manager = dat_filtered[dat_filtered["interaction_context"] == "Workplace-hierarchy"].copy()

len(dat_relationship), len(dat_friendship), len(dat_colleagues), len(dat_manager)

(33, 38, 27, 29)

In [None]:
dat_relationship.drop_duplicates(subset="advice_id", inplace=True)
dat_friendship.drop_duplicates(subset="advice_id", inplace=True)
dat_colleagues.drop_duplicates(subset="advice_id", inplace=True)
dat_manager.drop_duplicates(subset="advice_id", inplace=True)

In [51]:
create_few_shot_files(dat_relationship[dat_relationship["context"] == "Advice for culprit"], "relationship", "Advice for culprit")
create_few_shot_files(dat_relationship[dat_relationship["context"] == "Advice for victim"], "relationship", "Advice for victim")
create_few_shot_files(dat_relationship[dat_relationship["context"] == "Next reply"], "relationship", "Next reply")

In [57]:
create_few_shot_files(dat_friendship[dat_friendship["context"] == "Advice for culprit"], "friendship", "Advice for culprit")
create_few_shot_files(dat_friendship[dat_friendship["context"] == "Advice for victim"], "friendship", "Advice for victim")

create_few_shot_files(dat_colleagues[dat_colleagues["context"] == "Advice for culprit"], "workplace-no-hierarchy", "Advice for culprit")
create_few_shot_files(dat_colleagues[dat_colleagues["context"] == "Advice for victim"], "workplace-no-hierarchy", "Advice for victim")

create_few_shot_files(dat_manager[dat_manager["context"] == "Advice for culprit"], "workplace-hierarchy", "Advice for culprit")
create_few_shot_files(dat_manager[dat_manager["context"] == "Advice for victim"], "workplace-hierarchy", "Advice for victim")
create_few_shot_files(dat_manager[dat_manager["context"] == "Next reply"], "workplace-hierarchy", "Next reply")