# Setup

Use this notebook to analyze how psychometric test scores of synthetic personality reflect downstream behaviors for a given LLM.

To run, update the parameters below for the specific model you wish to analyze.

In [None]:
# independent shaping scores (relative) filepath)
UNIDIMENSIONAL_SHAPING_SCORES = "scored_results/your_scores_here.pkl"

# raw downstream results (relative) filepath
STATUS_UPDATES_DATA = "../results/" + "your_downstream_results_here.pkl"

# AMS results (relative) filepath
AMS_DATA = "../results/" + "your_AMS_results.csv"

## Load Dependencies

In [237]:
import pandas as pd
from scipy.stats import pearsonr, spearmanr

## Constants

In [238]:
IPIP_SCALE_IDS = [
    "IPIP300-EXT",
    "IPIP300-AGR",
    "IPIP300-CON",
    "IPIP300-NEU",
    "IPIP300-OPE"
]

AMS_SCALE_IDS = [
    "ams-IPIP300-EXT",
    "ams-IPIP300-AGR",
    "ams-IPIP300-CON",
    "ams-IPIP300-NEU",
    "ams-IPIP300-OPE"
]

AMS_SCALE_IDS_2 = [
    "BIG5_Extraversion",
    "BIG5_Agreeableness",
    "BIG5_Conscientiousness",
    "BIG5_Neuroticism",
    "BIG5_Openness"
]

## Read in Data

In [None]:
# independent shaping personality test scores
# 2250 profiles x 300 items = 675k rows
test_scores = pd.read_pickle(UNIDIMENSIONAL_SHAPING_SCORES)

# downstream task social media status updates
# 2250 profiles x 25 repeats = 56,250 rows
status_updates_raw = pd.read_pickle(STATUS_UPDATES_DATA)

# AMS personality predictions based on status updates
# 7 values per profile
ams_predictions_raw = pd.read_csv(AMS_DATA, index_col=0)

In [240]:
# pre-process status updates data

# consolidate every 25 updates under their prompted personality profile
status_updates = status_updates_raw.groupby('item_preamble_id')['model_output'].agg(list).reset_index()
status_updates['model_output'] = status_updates['model_output'].apply(lambda x: '\n'.join(x))
status_updates.head(1).model_output

0    1. "Spent the day in the garden planting some ...
Name: model_output, dtype: object

In [241]:
status_updates.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2250 entries, 0 to 2249
Data columns (total 2 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   item_preamble_id  2250 non-null   object
 1   model_output      2250 non-null   object
dtypes: object(2)
memory usage: 35.3+ KB


In [242]:
# pre-process AMS data

# create ID for every 7 rows
ams_predictions_raw['ID'] = ams_predictions_raw.index // 7

# pivot to wide
ams_predictions_wide = ams_predictions_raw.pivot(
    index=['ID', 'user_id'], 
    columns='trait', 
    values='value')

# average AMS scores by shared prompt
# 56,250 rows / 25 repetitions -> 2,250 rows

# group by `item_preamble_id` (labeled `user_id` here)
ams_predictions = ams_predictions_wide.groupby('user_id').agg('mean')

In [243]:
ams_predictions['user_id'] = ams_predictions.index
ams_predictions

trait,Age,BIG5_Agreeableness,BIG5_Conscientiousness,BIG5_Extraversion,BIG5_Neuroticism,BIG5_Openness,Female,user_id
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ext0-agr0-con0-neu0-ope1-d1-su1,43.577030,0.710400,0.663244,0.288300,0.319632,0.554256,0.255863,ext0-agr0-con0-neu0-ope1-d1-su1
ext0-agr0-con0-neu0-ope1-d10-su1,37.493776,0.689204,0.628384,0.459060,0.325252,0.512500,0.577332,ext0-agr0-con0-neu0-ope1-d10-su1
ext0-agr0-con0-neu0-ope1-d11-su1,30.371848,0.421184,0.481988,0.244660,0.387648,0.496340,0.124126,ext0-agr0-con0-neu0-ope1-d11-su1
ext0-agr0-con0-neu0-ope1-d12-su1,34.881288,0.453956,0.540276,0.258296,0.469668,0.459368,0.465200,ext0-agr0-con0-neu0-ope1-d12-su1
ext0-agr0-con0-neu0-ope1-d13-su1,36.273739,0.664904,0.631584,0.424552,0.355420,0.512788,0.287927,ext0-agr0-con0-neu0-ope1-d13-su1
...,...,...,...,...,...,...,...,...
ext9-agr0-con0-neu0-ope0-d50-su1,37.217066,0.724972,0.657980,0.681044,0.346496,0.804928,0.865389,ext9-agr0-con0-neu0-ope0-d50-su1
ext9-agr0-con0-neu0-ope0-d6-su1,40.345907,0.703024,0.658444,0.652780,0.461776,0.787992,0.869430,ext9-agr0-con0-neu0-ope0-d6-su1
ext9-agr0-con0-neu0-ope0-d7-su1,34.066738,0.737880,0.628792,0.654608,0.320100,0.885108,0.159897,ext9-agr0-con0-neu0-ope0-d7-su1
ext9-agr0-con0-neu0-ope0-d8-su1,36.526568,0.737352,0.641612,0.720772,0.355876,0.837368,0.578221,ext9-agr0-con0-neu0-ope0-d8-su1


In [244]:
# # optional data checks:

# # show number of unique preambles
# test_scores['item_preamble_id'].nunique()

## Join Data

In [245]:
# attach ablation 01 scores to status updates
# create partial IDs for matching
status_updates['partial_id'] = status_updates['item_preamble_id'].str[:-4]
test_scores['partial_id'] = test_scores['item_preamble_id'].str[:-4]
ams_predictions['partial_id'] = ams_predictions['user_id'].str[:-4]

# drop columns
status_updates.drop('item_preamble_id', axis=1, inplace=True)
test_scores.drop('item_preamble_id', axis=1, inplace=True)
ams_predictions.drop('user_id', axis=1, inplace=True)

dfs = [status_updates, test_scores, ams_predictions]
dfs = [df.set_index('partial_id') for df in dfs]
df_grouped = dfs[0].join(dfs[1:])


In [246]:
# add intended personality level info
LVL_IDS = ["lvl-EXT", "lvl-AGR", "lvl-CON", "lvl-NEU", "lvl-OPE"]
df_grouped['level_info'] = df_grouped.index
df_grouped[LVL_IDS + ["description_id"]] = df_grouped['level_info'].str.split('-', expand=True)

In [247]:
def extract_integer(text):
    """
    Extract the level integer contained in a string.
    
    Args:
    text: The input string.
    
    Returns:
    The integer found in the string, or None if no integer is found.
    """
    result = ''
    for char in text:
        if char.isdigit():
            result += char
    try:
        return int(result)
    except ValueError:
        return None

In [None]:
# replace the values of each lvl- column with only the digits they contain
df_grouped[LVL_IDS + ["description_id"]] = df_grouped[LVL_IDS + ["description_id"]].map(extract_integer)

In [250]:
# peek at new merged dataframe
# should be 2,250 rows
df_grouped

Unnamed: 0_level_0,model_output,item_postamble_id,response_scale_id,response_choice_postamble_id,model_id,ipip1,ipip10,ipip100,ipip101,ipip102,...,BIG5_Neuroticism,BIG5_Openness,Female,level_info,lvl-EXT,lvl-AGR,lvl-CON,lvl-NEU,lvl-OPE,description_id
partial_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ext0-agr0-con0-neu0-ope1-d1,"1. ""Spent the day in the garden planting some ...",plk-ipip-0,likert5_numeric,none,gpt-4o-2024-08-06,3,5,5,3,1,...,0.319632,0.554256,0.255863,ext0-agr0-con0-neu0-ope1-d1,0,0,0,0,1,1
ext0-agr0-con0-neu0-ope1-d10,"1. ""Well, folks, another day, another dollar. ...",plk-ipip-0,likert5_numeric,none,gpt-4o-2024-08-06,3,5,5,3,2,...,0.325252,0.512500,0.577332,ext0-agr0-con0-neu0-ope1-d10,0,0,0,0,1,10
ext0-agr0-con0-neu0-ope1-d11,"1. Well, another day at work where everyone el...",plk-ipip-0,likert5_numeric,none,gpt-4o-2024-08-06,1,5,5,3,3,...,0.387648,0.496340,0.124126,ext0-agr0-con0-neu0-ope1-d11,0,0,0,0,1,11
ext0-agr0-con0-neu0-ope1-d12,"1. ""Spent another long day at the restaurant. ...",plk-ipip-0,likert5_numeric,none,gpt-4o-2024-08-06,4,5,5,4,3,...,0.469668,0.459368,0.465200,ext0-agr0-con0-neu0-ope1-d12,0,0,0,0,1,12
ext0-agr0-con0-neu0-ope1-d13,"1. ""Another day at work. I enjoy the simple ro...",plk-ipip-0,likert5_numeric,none,gpt-4o-2024-08-06,3,5,5,3,2,...,0.355420,0.512788,0.287927,ext0-agr0-con0-neu0-ope1-d13,0,0,0,0,1,13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ext9-agr0-con0-neu0-ope0-d50,1. 🌟 Just wrapped up the most exhilarating wor...,plk-ipip-0,likert5_numeric,none,gpt-4o-2024-08-06,1,2,2,1,5,...,0.346496,0.804928,0.865389,ext9-agr0-con0-neu0-ope0-d50,9,0,0,0,0,50
ext9-agr0-con0-neu0-ope0-d6,"1. 🌞 Good morning, fabulous friends and family...",plk-ipip-0,likert5_numeric,none,gpt-4o-2024-08-06,1,2,2,1,5,...,0.461776,0.787992,0.869430,ext9-agr0-con0-neu0-ope0-d6,9,0,0,0,0,6
ext9-agr0-con0-neu0-ope0-d7,"1. 🎉🚀 Hey, amazing people! Just finished an ex...",plk-ipip-0,likert5_numeric,none,gpt-4o-2024-08-06,2,3,3,1,5,...,0.320100,0.885108,0.159897,ext9-agr0-con0-neu0-ope0-d7,9,0,0,0,0,7
ext9-agr0-con0-neu0-ope0-d8,1. 🎉🌟 Hey everyone! Just got back from a spont...,plk-ipip-0,likert5_numeric,none,gpt-4o-2024-08-06,1,1,2,1,5,...,0.355876,0.837368,0.578221,ext9-agr0-con0-neu0-ope0-d8,9,0,0,0,0,8


In [251]:
group = df_grouped

# Compute Correlations

In [252]:
group[IPIP_SCALE_IDS + AMS_SCALE_IDS_2]

Unnamed: 0_level_0,IPIP300-EXT,IPIP300-AGR,IPIP300-CON,IPIP300-NEU,IPIP300-OPE,BIG5_Extraversion,BIG5_Agreeableness,BIG5_Conscientiousness,BIG5_Neuroticism,BIG5_Openness
partial_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
ext0-agr0-con0-neu0-ope1-d1,2.383333,3.283333,3.400000,2.850000,1.300000,0.288300,0.710400,0.663244,0.319632,0.554256
ext0-agr0-con0-neu0-ope1-d10,2.500000,3.150000,3.083333,3.066667,1.066667,0.459060,0.689204,0.628384,0.325252,0.512500
ext0-agr0-con0-neu0-ope1-d11,2.283333,1.900000,2.000000,3.166667,1.033333,0.244660,0.421184,0.481988,0.387648,0.496340
ext0-agr0-con0-neu0-ope1-d12,2.733333,3.150000,3.433333,3.550000,1.083333,0.258296,0.453956,0.540276,0.469668,0.459368
ext0-agr0-con0-neu0-ope1-d13,2.516667,3.283333,3.483333,2.816667,1.100000,0.424552,0.664904,0.631584,0.355420,0.512788
...,...,...,...,...,...,...,...,...,...,...
ext9-agr0-con0-neu0-ope0-d50,5.000000,3.633333,3.600000,2.000000,4.233333,0.681044,0.724972,0.657980,0.346496,0.804928
ext9-agr0-con0-neu0-ope0-d6,5.000000,3.666667,3.783333,1.866667,4.183333,0.652780,0.703024,0.658444,0.461776,0.787992
ext9-agr0-con0-neu0-ope0-d7,4.983333,3.616667,3.850000,1.866667,4.200000,0.654608,0.737880,0.628792,0.320100,0.885108
ext9-agr0-con0-neu0-ope0-d8,5.000000,3.666667,3.333333,2.016667,4.016667,0.720772,0.737352,0.641612,0.355876,0.837368


In [269]:
# print for pasting (in order of variable_pairs)
correlations = []
p_values = []
for ipip_scale, ams_scale in zip(IPIP_SCALE_IDS, AMS_SCALE_IDS_2):
    corr_coef, p_value = pearsonr(group[ipip_scale], group[ams_scale])
    correlations.append(corr_coef)
    p_values.append(p_value)

print("Pearson Correlation Coefficients (Tested Personality x Downstream Personality):")
for corr in correlations:
    print(f"{corr:.4f}")

print("\np-values:")
for p in p_values:
    print(f"{p:.4f}")

Pearson Correlation Coefficients:
0.7785
0.8642
0.7705
0.6683
0.6593

p-values:
0.0000
0.0000
0.0000
0.0000
0.0000


In [270]:
# Spearman corrs
# print for pasting (in order of variable_pairs)
correlations = []
p_values = []
for prompted_level, observed_level in zip(LVL_IDS, AMS_SCALE_IDS_2):
    # make sure to only use data where level information is present
    subset = group[group[prompted_level] > 0]
    corr_coef, p_value = spearmanr(subset[prompted_level], subset[observed_level])
    correlations.append(corr_coef)
    p_values.append(p_value)

print("Spearman Correlation Coefficients (Prompted Personality Level x Downstream Personality):")
for corr in correlations:
    print(f"{corr:.4f}")

print("\np-values:")
for p in p_values:
    print(f"{p:.4f}")

Spearman Correlation Coefficients:
0.8259
0.8853
0.8114
0.7394
0.8199

p-values:
0.0000
0.0000
0.0000
0.0000
0.0000
