In [15]:
import pandas as pd
import os
import fnmatch
from ast import literal_eval


def find_files(directory, prefix):
    matches = []
    for root, dirnames, filenames in os.walk(directory):
        for filename in fnmatch.filter(filenames, prefix + "*.json"):
            matches.append(os.path.join(root, filename))
    return matches


def get_output(file):
    with open(file) as f:
        lines = f.readlines()
    
    row_output = {
        'filename':file,
        'model': os.path.basename(file).split('_')[1],
        'personality': os.path.splitext(os.path.basename(file))[0].split('_p')[-1][1:].title(),
        'accuracy': float(lines[0].split('accuracy:')[-1].strip()[:-1])
    }

    return pd.DataFrame.from_dict(row_output, orient='index').T


output_dir = r"D:\71 PsychLLM\outs\reasoning"
prefix = "REPORT_"

output = pd.DataFrame()
json_files = find_files(output_dir, prefix)
for file in json_files:
    output = pd.concat([output,get_output(file)])

output.sort_values(by=['model','personality'])

Unnamed: 0,filename,model,personality,accuracy
0,D:\71 PsychLLM\outs\reasoning\p2_ours\REPORT_L...,Llama-2-7b-chat-hf,Agreeableness,0.527778
0,D:\71 PsychLLM\outs\reasoning\p2_ours\REPORT_L...,Llama-2-7b-chat-hf,Conscientiousness,0.55
0,D:\71 PsychLLM\outs\reasoning\p2_ours\REPORT_L...,Llama-2-7b-chat-hf,Extraversion,0.544444
0,D:\71 PsychLLM\outs\reasoning\p2_ours\REPORT_L...,Llama-2-7b-chat-hf,Machiavellianism,0.611111
0,D:\71 PsychLLM\outs\reasoning\p2_ours\REPORT_L...,Llama-2-7b-chat-hf,Narcissism,0.675
0,D:\71 PsychLLM\outs\reasoning\p2_ours\REPORT_L...,Llama-2-7b-chat-hf,Neuroticism,0.619444
0,D:\71 PsychLLM\outs\reasoning\REPORT_Llama-2-7...,Llama-2-7b-chat-hf,,0.611111
0,D:\71 PsychLLM\outs\reasoning\p2_ours\REPORT_L...,Llama-2-7b-chat-hf,Openness,0.605556
0,D:\71 PsychLLM\outs\reasoning\p2_ours\REPORT_L...,Llama-2-7b-chat-hf,Psychopathy,0.594444
0,D:\71 PsychLLM\outs\reasoning\p2_ours\REPORT_L...,Llama-2-7b-chat-hf,Task-Specific,0.602778


In [29]:
model_map = {
    'mistral-instruct': "Mistral 7B", 
    'Llama-2-7b-chat-hf': "Llama2 7B",
    'falcon-7b-instruct': "Falcon 7B", 
    'zephyr-7b-beta': "Zephyr 7B Beta",
    'gpt-3.5-turbo-1106': "GPT-3.5",
}
personality_order = ['None', 'Openness', 'Conscientiousness', 'Extraversion','Agreeableness','Neuroticism', 'Narcissism', 'Machiavellianism', 'Psychopathy']
output['model_name'] = output['model'].apply(lambda x: model_map[x])
output

Unnamed: 0,filename,model,personality,accuracy,model_name
0,D:\71 PsychLLM\outs\reasoning\REPORT_Llama-2-7...,Llama-2-7b-chat-hf,,0.611111,Llama2 7B
0,D:\71 PsychLLM\outs\reasoning\p2_ours\REPORT_L...,Llama-2-7b-chat-hf,Openness,0.605556,Llama2 7B
0,D:\71 PsychLLM\outs\reasoning\p2_ours\REPORT_L...,Llama-2-7b-chat-hf,Conscientiousness,0.55,Llama2 7B
0,D:\71 PsychLLM\outs\reasoning\p2_ours\REPORT_L...,Llama-2-7b-chat-hf,Extraversion,0.544444,Llama2 7B
0,D:\71 PsychLLM\outs\reasoning\p2_ours\REPORT_L...,Llama-2-7b-chat-hf,Agreeableness,0.527778,Llama2 7B
0,D:\71 PsychLLM\outs\reasoning\p2_ours\REPORT_L...,Llama-2-7b-chat-hf,Neuroticism,0.619444,Llama2 7B
0,D:\71 PsychLLM\outs\reasoning\p2_ours\REPORT_L...,Llama-2-7b-chat-hf,Narcissism,0.675,Llama2 7B
0,D:\71 PsychLLM\outs\reasoning\p2_ours\REPORT_L...,Llama-2-7b-chat-hf,Machiavellianism,0.611111,Llama2 7B
0,D:\71 PsychLLM\outs\reasoning\p2_ours\REPORT_L...,Llama-2-7b-chat-hf,Psychopathy,0.594444,Llama2 7B
0,D:\71 PsychLLM\outs\reasoning\p2_ours\REPORT_L...,Llama-2-7b-chat-hf,Task-Specific,0.602778,Llama2 7B


In [35]:
def round_to_1dp_as_string(x):
    # Multiply by 100 and round to one decimal place
    rounded_value = round(x * 100, 1)
    # Convert to string with one decimal place
    rounded_string = "{:.1f}".format(rounded_value)
    return rounded_string


grouped = output.groupby(['personality','model_name'])['accuracy'].mean().unstack('model_name').reset_index()
grouped['order'] = pd.Categorical(grouped['personality'], categories=personality_order, ordered=True)
grouped = grouped.sort_values(by='order').drop('order',axis=1).reset_index(drop=True)
columns_to_subtract = list(output['model_name'].unique())
grouped.loc[1:, columns_to_subtract] -= grouped.loc[0, columns_to_subtract]
grouped[columns_to_subtract] = grouped[columns_to_subtract].applymap(lambda x: round_to_1dp_as_string(x))
grouped

model_name,personality,Llama2 7B,Mistral 7B,Zephyr 7B Beta
0,,61.1,63.1,71.9
1,Openness,-0.6,-1.7,3.9
2,Conscientiousness,-6.1,-6.4,2.8
3,Extraversion,-6.7,-3.9,5.3
4,Agreeableness,-8.3,-9.4,3.6
5,Neuroticism,0.8,-0.8,3.6
6,Narcissism,6.4,1.7,4.2
7,Machiavellianism,0.0,-8.9,10.3
8,Psychopathy,-1.7,-0.3,4.2
9,Task-Specific,-0.8,1.7,1.1


In [37]:
output.to_csv(os.path.join(output_dir, 'consolidated.csv'), index=False, encoding='utf-8-sig')
grouped.to_csv(os.path.join(output_dir, 'grouped.csv'), index=False, encoding='utf-8-sig')