In [None]:
import json
import pandas as pd

from visualize import *

my_dict = {'t1': 'T1', 't2': 'T2', 't3': 'T3', 't4': 'T4', 't5': 'T5',
           'v1': 'V1', 'v2': 'V2', 'v3': 'V3', 'v4': 'V4', 'v5': 'V5',
           'Zh': 'Zh', 'Ko': 'Ko', 'Es': 'Es', 'Fr': 'Fr', 'De': 'De',
           'It': 'It', 'Ar': 'Ar', 'Ru': 'Ru', 'Ja': 'Ja', 'En': 'En',
           'n': 'Arabic Numeral', 'al': 'Lowercase Latin', 'au': 'Uppercase Latin',
           'rl': 'Lowercase Roman', 'ru': 'Uppercase Roman',
           'f': 'Ascending', 'r': 'Descending',
           'a person of routine and familiarity': 'Routine',
           'a more spontaneous and less reliable person': 'Spontaneous',
           'a person with reserved and lower energy levels': 'Reserved',
           "a competitive person, sometimes skeptical of others' intentions": 'Competitive',
           'a person with emotional stability and consistent moods': 'Stability',
           'an adventurous and creative person': 'Adventurous',
           'an organized person, mindful of details': 'Organized',
           'a person full of energy and positive emotions': 'Energy',
           'a compassionate and cooperative person': 'Compassionate',
           'a person with emotional instability and diverse negative feelings': 'Instability',
          }

personality_order = [
    "a person of routine and familiarity",
    "a more spontaneous and less reliable person",
    "a person with reserved and lower energy levels",
    "a competitive person, sometimes skeptical of others' intentions",
    "a person with emotional stability and consistent moods",
    "an adventurous and creative person",
    "an organized person, mindful of details",
    "a person full of energy and positive emotions",
    "a compassionate and cooperative person",
    "a person with emotional instability and diverse negative feelings"
]

character_order = ["Harry Potter", "Luke Skywalker", "Indiana Jones", "James Bond", "Martin Luther King", 
                   "Winson Churchill", "Mahatma Gandhi", "Nelson Mandela", "Hannibal Lector", "Lord Voldemort", 
                   "Adolf Hitler", "Osama bin Laden", "Sauron", "Ursula", "Maleficent", "Darth Vader"]

environment_order = ["anger", "anxiety", "fear", "guilt", "jealousy", "embarrassment", 
                     "frustration", "depression", "calmness", "relaxation", "courage", 
                     "pride", "admiration", "confidence", "fun", "happiness"]

my_colors = ['#e6194B', '#42d4f4', '#ffe119', '#3cb44b', '#f032e6', '#fabed4', '#469990', '#dcbeff',
             '#9A6324', '#fffac8', '#800000', '#aaffc3', '#000075', '#a9a9a9', '#000000']

default_color = '#D9DDDC'

In [None]:
chatgpt_data, _ = extract_data('save/gpt-3.5-turbo-1106.json')
gpt4_data, _ = extract_data('save/gpt-4-1106.json')
gemini_data, _ = extract_data('save/gemini-1.0-pro.json')
llama_data, _ = extract_data('save/llama-3.1-8b.json')

character_cot, _ = extract_data('save/character_cot.json')
character_no, _ = extract_data('save/character.json')
character = pd.concat([character_cot, character_no], ignore_index=True)

environment, _ = extract_data('save/environment.json')

biography_cot, _ = extract_data('save/personality_biography_cot.json')
biography_no, _ = extract_data('save/personality_biography.json')
biography = pd.concat([biography_cot, biography_no], ignore_index=True)

portray_cot, _ = extract_data('save/personality_portray_cot.json')
portray_no, _ = extract_data('save/personality_portray.json')
portray = pd.concat([portray_cot, portray_no], ignore_index=True)

qa_cot, _ = extract_data('save/personality_qa_cot.json')
qa_no, _ = extract_data('save/personality_qa.json')
qa = pd.concat([qa_cot, qa_no], ignore_index=True)

personality = pd.concat([biography, portray, qa], ignore_index=True)

cot = pd.concat([character_cot, biography_cot, portray_cot, qa_cot])
no_cot = pd.concat([character_no, biography_no, portray_no, qa_no])

In [None]:
dfs = [chatgpt_data, gpt4_data, gemini_data, llama_data]
names = ["GPT-3.5-Turbo-1106", "GPT-4-1106", "Gemini-1.0-Pro", "LLaMA-3.1-8b"]

results_df = pd.DataFrame()

for i, df in enumerate(dfs):
    df = df[["Openness", "Conscientiousness", "Extraversion", "Agreeableness", "Neuroticism"]]
    mean_val = df.mean()
    std_val = df.std()
    result = {col: f"${mean:.2f}_{{\\pm {std:.2f}}}$" for col, mean, std in zip(df.columns, mean_val, std_val)}
    results_df = pd.concat([results_df, pd.DataFrame(result, index=[names[i]])])

latex_output = results_df.to_latex(escape=False)
print(latex_output)

In [None]:
df = personality[["append_label", "Openness", "Conscientiousness", "Extraversion", "Agreeableness", "Neuroticism"]]
df['append_label'] = pd.Categorical(df['append_label'], categories=personality_order, ordered=True)

df_sorted = df.sort_values('append_label')

grouped = df_sorted.groupby('append_label').agg(['mean', 'std'])

formatted_results_dict = {}

for label in grouped.index:
    formatted_results = {
        col: f"${grouped.loc[label][(col, 'mean')]:.2f}_{{\\pm {grouped.loc[label][(col, 'std')]:.2f}}}$"
        for col in df.columns[1:]
    }
    mapped_label = my_dict.get(label, label)
    formatted_results_dict[mapped_label] = formatted_results

formatted_results_df = pd.DataFrame(formatted_results_dict).T

latex_output = formatted_results_df.to_latex(escape=False)
print(latex_output)

In [None]:
df = character[["append_label", "Openness", "Conscientiousness", "Extraversion", "Agreeableness", "Neuroticism"]]
df['append_label'] = pd.Categorical(df['append_label'], categories=character_order, ordered=True)
df_sorted = df.sort_values('append_label')
grouped = df.groupby('append_label').agg(['mean', 'std'])

formatted_results_dict = {}

for label in grouped.index:
    formatted_results = {
        col: f"${grouped.loc[label][(col, 'mean')]:.2f}_{{\\pm {grouped.loc[label][(col, 'std')]:.2f}}}$"
        for col in df.columns[1:]
    }
    mapped_label = my_dict.get(label, label)
    formatted_results_dict[mapped_label] = formatted_results

formatted_results_df = pd.DataFrame(formatted_results_dict).T

formatted_results_df
latex_output = formatted_results_df.to_latex(escape=False)
print(latex_output)

In [None]:
df = environment[["append_label", "Openness", "Conscientiousness", "Extraversion", "Agreeableness", "Neuroticism"]]
df['append_label'] = pd.Categorical(df['append_label'], categories=environment_order, ordered=True)
df_sorted = df.sort_values('append_label')
grouped = df.groupby('append_label').agg(['mean', 'std'])

formatted_results_dict = {}

for label in grouped.index:
    formatted_results = {
        col: f"${grouped.loc[label][(col, 'mean')]:.2f}_{{\\pm {grouped.loc[label][(col, 'std')]:.2f}}}$"
        for col in df.columns[1:]
    }
    mapped_label = my_dict.get(label, label)
    formatted_results_dict[mapped_label] = formatted_results

formatted_results_df = pd.DataFrame(formatted_results_dict).T

formatted_results_df
latex_output = formatted_results_df.to_latex(escape=False)
print(latex_output)

In [None]:
categories = ["Openness", "Conscientiousness", "Extraversion", "Agreeableness", "Neuroticism"]

personality_df = personality[["append_label", "Openness", "Conscientiousness", "Extraversion", "Agreeableness", "Neuroticism"]]
personality_df['append_label'] = personality_df['append_label'].apply(lambda x: my_dict[x])

print("Minimum")
print(hypothesis_testing(chatgpt_data["Openness"].tolist(), personality_df[personality_df["append_label"] == "Routine"]["Openness"].tolist()))
print(np.mean(chatgpt_data["Openness"].tolist()), np.mean(personality_df[personality_df["append_label"] == "Routine"]["Openness"].tolist()))
print("Maximum")
print(hypothesis_testing(chatgpt_data["Openness"].tolist(), personality_df[personality_df["append_label"] == "Adventurous"]["Openness"].tolist()))
print(np.mean(chatgpt_data["Openness"].tolist()), np.mean(personality_df[personality_df["append_label"] == "Adventurous"]["Openness"].tolist()))

print("Minimum")
print(hypothesis_testing(chatgpt_data["Conscientiousness"].tolist(), personality_df[personality_df["append_label"] == "Spontaneous"]["Conscientiousness"].tolist()))
print(np.mean(chatgpt_data["Conscientiousness"].tolist()), np.mean(personality_df[personality_df["append_label"] == "Spontaneous"]["Conscientiousness"].tolist()))
print("Maximum")
print(hypothesis_testing(chatgpt_data["Conscientiousness"].tolist(), personality_df[personality_df["append_label"] == "Organized"]["Conscientiousness"].tolist()))
print(np.mean(chatgpt_data["Conscientiousness"].tolist()), np.mean(personality_df[personality_df["append_label"] == "Organized"]["Conscientiousness"].tolist()))
