In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import json

def save_json(data, filename):
    with open(filename, 'w') as f:
        json.dump(data, f, indent=4)


In [None]:
def control_template(reflection):
    return f"""Given the following insights about me:
{reflection}
Please make the following responses strictly align with these insights. """


def get_opt_df(eval_dir, model_name, eval_type, skip_initial=True):
    dir_path = f"{eval_dir}/{model_name}_{eval_type}"
    all_jsonl_files = [f for f in os.listdir(dir_path) if f.endswith('.jsonl')]
    dfs = []
    for file in all_jsonl_files:
        file_path = os.path.join(dir_path, file)
        df = pd.read_json(file_path, lines=True)
        df['length'] = df['strs'].apply(lambda x: len(x))
        trait_name = file.split("_")[0]
        df['trait'] = trait_name
        dfs.append(df)
    opt_df = pd.concat(dfs).sort_values(by='avg_reward', ascending=False)
    if skip_initial:
        opt_df = opt_df[opt_df['iteration'] != 0]
    grouped_df = opt_df.groupby('trait').first().reset_index()
    return grouped_df

# optimization
eval_dir = "codes/output/all_survey"
output_dir = "codes/output/final_reflections"
model_name="Qwen2.5-7B-Instruct" # GPT-4o Qwen2.5-7B-Instruct Mistral-7B-Instruct-v0.3
eval_type="moral" # value personality
grouped_df = get_opt_df(eval_dir, model_name, eval_type)
output_df = grouped_df[['trait','avg_reward', 'strs', 'text']]
output_df.columns = ['dimension', 'opt_avg_reward', 'opt_instructions', 'control']
output_df['control'] = output_df['control'].apply(lambda x: control_template(x))
output_df.to_csv(f"{output_dir}/{model_name}_{eval_type}.csv", index=False)
output_df