In [1]:
import pandas as pd
import numpy as np
import pingouin as pg

In [2]:
DATA_ROOT = './supplementary_data/seed_runs'

#base_df = pd.read_csv(f'{DATA_ROOT}/sst_gpt_41_mini_text_per_question_scores.csv')
#sub_order = base_df.subject.values.flatten()

base_df = pd.read_csv(f'{DATA_ROOT}/aapecs_gpt_41_mini_text_per_question_scores.csv')
sub_order = base_df.participantID.values.flatten()

In [3]:
#cols_of_interest = ['neuroticism', 'extraversion', 'openness', 'agreeableness', 'conscientiousness']
cols_of_interest = ['neoNeuroticism', 'neoExtraversion', 'neoOpenness', 'neoAgreeableness', 'neoConscientiousness']
#index_col = 'subject'
index_col = 'participantID'
#dataset = 'sst'
dataset = 'aapecs'

results = []

for col in cols_of_interest:
    ratings = []
    for counter in range(1, 6):
        seed = 0
        df = pd.read_csv(f'{DATA_ROOT}/seed_runs/{dataset}_gpt_41_mini_text_per_question_scores_seed{seed}_{counter}.csv')
        df = df[[index_col] + cols_of_interest]
        df = df.set_index(index_col).reindex(sub_order)
        ratings.append(df[[col]].values.flatten())
        
    rows = []
    for rater_idx, scores in enumerate(ratings, start=1):
        for subj, score in zip(sub_order, scores):
            rows.append({"subject": subj, "rater": rater_idx, "score": score})

    data = pd.DataFrame(rows)

    icc = pg.intraclass_corr(data=data, targets='subject', raters='rater', ratings='score')
    icc21 = icc.loc[(icc['Type']=='ICC3') & (icc['CI95%'].notna()), :]
    icc, pval, ci = icc21["ICC"].values.flatten()[0], icc21["pval"].values.flatten()[0], icc21["CI95%"].values.flatten()[0]
    
    results.append((col, icc, pval, ci))
    
    
df = pd.DataFrame(results, columns=["Trait", "ICC(3,1)", "p-value", "95% CI"])

df["Trait"] = df["Trait"].map(lambda x: str(x).capitalize())
df["ICC(3,1)"] = df["ICC(3,1)"].map("{:.3f}".format)
df["p-value"] = df["p-value"].map("{:.2e}".format)
df["95% CI"] = df["95% CI"].apply(lambda x: f"[{x[0]:.2f}, {x[1]:.2f}]")

print(df.to_markdown(index=False))

| Trait                |   ICC(3,1) |   p-value | 95% CI       |
|:---------------------|-----------:|----------:|:-------------|
| Neoneuroticism       |      0.992 | 0         | [0.99, 0.99] |
| Neoextraversion      |      0.987 | 0         | [0.98, 0.99] |
| Neoopenness          |      0.966 | 5.51e-280 | [0.96, 0.97] |
| Neoagreeableness     |      0.963 | 1.01e-271 | [0.95, 0.97] |
| Neoconscientiousness |      0.974 | 8.71e-306 | [0.97, 0.98] |


In [4]:
#cols_of_interest = ['neuroticism', 'extraversion', 'openness', 'agreeableness', 'conscientiousness']
cols_of_interest = ['neoNeuroticism', 'neoExtraversion', 'neoOpenness', 'neoAgreeableness', 'neoConscientiousness']
#index_col = 'subject'
index_col = 'participantID'
#dataset = 'sst'
dataset = 'aapecs'

results = []
np.random.seed(0)
seeds = [np.random.randint(0, 1000) for _ in range(5)]

for col in cols_of_interest:
    ratings = []
    for counter in range(1, 6):
        seed = seeds[counter-1]
        df = pd.read_csv(f'{DATA_ROOT}/seed_runs/{dataset}_gpt_41_mini_text_per_question_scores_seed{seed}_999.csv')
        df = df[[index_col] + cols_of_interest]
        df = df.set_index(index_col).reindex(sub_order)
        ratings.append(df[[col]].values.flatten())
        
    rows = []
    for rater_idx, scores in enumerate(ratings, start=1):
        for subj, score in zip(sub_order, scores):
            rows.append({"subject": subj, "rater": rater_idx, "score": score})

    data = pd.DataFrame(rows)

    icc = pg.intraclass_corr(data=data, targets='subject', raters='rater', ratings='score')
    icc21 = icc.loc[(icc['Type']=='ICC3') & (icc['CI95%'].notna()), :]
    icc, pval, ci = icc21["ICC"].values.flatten()[0], icc21["pval"].values.flatten()[0], icc21["CI95%"].values.flatten()[0]
    
    results.append((col, icc, pval, ci))
    
    
df = pd.DataFrame(results, columns=["Trait", "ICC(3,1)", "p-value", "95% CI"])

df["Trait"] = df["Trait"].map(lambda x: str(x).capitalize())
df["ICC(3,1)"] = df["ICC(3,1)"].map("{:.3f}".format)
df["p-value"] = df["p-value"].map("{:.2e}".format)
df["95% CI"] = df["95% CI"].apply(lambda x: f"[{x[0]:.2f}, {x[1]:.2f}]")

print(df.to_markdown(index=False))

| Trait                |   ICC(3,1) |   p-value | 95% CI       |
|:---------------------|-----------:|----------:|:-------------|
| Neoneuroticism       |      0.992 | 0         | [0.99, 0.99] |
| Neoextraversion      |      0.986 | 0         | [0.98, 0.99] |
| Neoopenness          |      0.967 | 4.24e-283 | [0.96, 0.98] |
| Neoagreeableness     |      0.959 | 1.09e-263 | [0.95, 0.97] |
| Neoconscientiousness |      0.969 | 1.25e-289 | [0.96, 0.98] |
