# Survey Analysis

## Examine Steering Effect

In [9]:
# get all json file
import json
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats


data_dir = './clean_results'
all_files = [f for f in os.listdir(data_dir) if f.endswith('.json')]
print(f"Found {len(all_files)} JSON files.")

Found 23 JSON files.


In [None]:
# load json files and convert them to csv format
# task1_group1 is a csv the length is 24, containning Bach style score, Mozart Style score, quality score, coherent score
task1_group1 = pd.DataFrame(columns=['t1_g1_Bach_style_original', 't1_g1_Mozart_style_original', 't1_g1_Quality_original', 't1_g1_Coherent_original', 't1_g1_Bach_style_steering', 't1_g1_Mozart_style_steering', 't1_g1_Quality_steering', 't1_g1_Coherent_steering'])
for file in all_files:
    try:
        with open(os.path.join(data_dir, file), 'r', encoding='utf-8') as f:
            data = json.load(f)
    except json.JSONDecodeError as e:
        print(f"❌ JSONDecodeError in {file}: {e}")
        continue
    survey_data = data['survey_data']
    # print(data)
    original = survey_data['task1_group1_original']
    steering = survey_data['task1_group1_sample1']
    
    # original
    bach_style_original = original['style_ratings']['bach']
    mozart_style_original = original['style_ratings']['mozart']
    quality_original = original['quality_ratings']['overall_quality']
    coherent_original = original['quality_ratings']['coherence']
    
    # steering
    bach_style_steering = steering['style_ratings']['bach']
    mozart_style_steering = steering['style_ratings']['mozart']
    quality_steering = steering['quality_ratings']['overall_quality']
    coherent_steering = steering['quality_ratings']['coherence']
    temp_df = pd.DataFrame({
        't1_g1_Bach_style_original': [bach_style_original],
        't1_g1_Mozart_style_original': [mozart_style_original],
        't1_g1_Quality_original': [quality_original],
        't1_g1_Coherent_original': [coherent_original],
        't1_g1_Bach_style_steering': [bach_style_steering],
        't1_g1_Mozart_style_steering': [mozart_style_steering],
        't1_g1_Quality_steering': [quality_steering],
        't1_g1_Coherent_steering': [coherent_steering]
    })
    task1_group1 = pd.concat([task1_group1, temp_df], ignore_index=True)

In [11]:

task1_group1

Unnamed: 0,Bach_style_original,Mozart_style_original,Quality_original,Coherent_original,Bach_style_steering,Mozart_style_steering,Quality_steering,Coherent_steering
0,5,2,5,5,2,5,4,3
1,5,2,5,5,2,5,4,3
2,3,3,5,5,2,5,5,5
3,3,3,5,5,2,5,5,5
4,5,2,5,5,2,5,5,5
5,4,2,3,3,2,3,3,3
6,4,2,4,3,4,2,5,4
7,4,2,4,3,4,2,5,4
8,4,2,3,3,4,3,4,4
9,5,2,4,4,3,4,3,4


In [20]:
# %%
# Now run the wide table build again
import os
import json
import glob
import pandas as pd
import numpy as np
from pathlib import Path
# import caas_jupyter_tools

search_dirs = [
    "./clean_results",
]
all_files = []
for d in search_dirs:
    all_files.extend(glob.glob(os.path.join(d, "*.json")))
all_files = sorted(set(all_files))

def get_nested(d, keys, default=None):
    cur = d
    for k in keys:
        if not isinstance(cur, dict) or k not in cur:
            return default
        cur = cur[k]
    return cur

TASKS = ["task1", "task2", "task3"]
GROUPS = [1, 2, 3]

def colnames_for(task, g):
    prefix = f"t{task[-1]}_g{g}"
    cols = [
        f"{prefix}_Bach_style_original",
        f"{prefix}_Mozart_style_original",
        f"{prefix}_Beethoven_style_original",
        f"{prefix}_Chopin_style_original",
        f"{prefix}_Quality_original",
        f"{prefix}_Coherent_original",
        f"{prefix}_Bach_style_steering",
        f"{prefix}_Mozart_style_steering",
        f"{prefix}_Beethoven_style_steering",
        f"{prefix}_Chopin_style_steering",
        f"{prefix}_Quality_steering",
        f"{prefix}_Coherent_steering",
    ]
    if task == "task3":
        cols += [
            f"{prefix}_Integration_original",
            f"{prefix}_Balance_original",
            f"{prefix}_Integration_steering",
            f"{prefix}_Balance_steering",
        ]
    return cols

all_columns = ["session_id"]
for t in TASKS:
    for g in GROUPS:
        all_columns += colnames_for(t, g)

rows = []
for fp in all_files:
    try:
        with open(fp, "r", encoding="utf-8") as f:
            data = json.load(f)
    except json.JSONDecodeError as e:
        print(f"❌ JSONDecodeError in {fp}: {e}")
        continue
    
    survey = data.get("survey_data", {})
    row = {"session_id": Path(fp).stem}
    
    for task in TASKS:
        for g in GROUPS:
            key_orig = f"{task}_group{g}_original"
            key_samp = f"{task}_group{g}_sample1"
            orig = survey.get(key_orig, {})
            samp = survey.get(key_samp, {})
            
            row[f"t{task[-1]}_g{g}_Bach_style_original"] = get_nested(orig, ["style_ratings", "bach"], np.nan)
            row[f"t{task[-1]}_g{g}_Mozart_style_original"] = get_nested(orig, ["style_ratings", "mozart"], np.nan)
            row[f"t{task[-1]}_g{g}_Beethoven_style_original"] = get_nested(orig, ["style_ratings", "beethoven"], np.nan)
            row[f"t{task[-1]}_g{g}_Chopin_style_original"] = get_nested(orig, ["style_ratings", "chopin"], np.nan)
            row[f"t{task[-1]}_g{g}_Quality_original"] = get_nested(orig, ["quality_ratings", "overall_quality"], np.nan)
            row[f"t{task[-1]}_g{g}_Coherent_original"] = get_nested(orig, ["quality_ratings", "coherence"], np.nan)
            
            row[f"t{task[-1]}_g{g}_Bach_style_steering"] = get_nested(samp, ["style_ratings", "bach"], np.nan)
            row[f"t{task[-1]}_g{g}_Mozart_style_steering"] = get_nested(samp, ["style_ratings", "mozart"], np.nan)
            row[f"t{task[-1]}_g{g}_Beethoven_style_steering"] = get_nested(samp, ["style_ratings", "beethoven"], np.nan)
            row[f"t{task[-1]}_g{g}_Chopin_style_steering"] = get_nested(samp, ["style_ratings", "chopin"], np.nan)
            row[f"t{task[-1]}_g{g}_Quality_steering"] = get_nested(samp, ["quality_ratings", "overall_quality"], np.nan)
            row[f"t{task[-1]}_g{g}_Coherent_steering"] = get_nested(samp, ["quality_ratings", "coherence"], np.nan)
            
            if task == "task3":
                row[f"t3_g{g}_Integration_original"] = get_nested(orig, ["fusion_ratings", "integration"], np.nan)
                row[f"t3_g{g}_Balance_original"] = get_nested(orig, ["fusion_ratings", "balance"], np.nan)
                row[f"t3_g{g}_Integration_steering"] = get_nested(samp, ["fusion_ratings", "integration"], np.nan)
                row[f"t3_g{g}_Balance_steering"] = get_nested(samp, ["fusion_ratings", "balance"], np.nan)
    
    rows.append(row)

big_df = pd.DataFrame(rows, columns=all_columns)

# out_dir = Path("/mnt/data/analysis")
out_dir = Path("./analysis")
out_dir.mkdir(parents=True, exist_ok=True)
csv_path = out_dir / "all_tasks_wide_table.csv"
big_df.to_csv(csv_path, index=False)


csv_path.as_posix()


'analysis/all_tasks_wide_table.csv'