## Demographic information

In this script, I am calculating the demographic information
First, before exclusion:

In [10]:
import os
import glob
import json
import pandas as pd

# Define the directory path
directory_path = "/Users/born/Downloads/results (27)"

# Find all JSON files
json_files = glob.glob(os.path.join(directory_path, "*.json"))
print(f"Found {len(json_files)} JSON files.")

data_list = []

for json_file in json_files:
    print(f"Processing file: {json_file}")

    # Load JSON data
    with open(json_file, 'r') as file:
        json_data = json.load(file)
    
    participant_id = os.path.splitext(os.path.basename(json_file))[0]
    age = None
    gender = None
    
    # Iterate through each trial to find the response with demographic info
    for trial in json_data:
        if 'response' in trial:
            response_data = trial['response']
            if 'Gender' in response_data and 'Age' in response_data:
                gender = response_data['Gender']
                age = response_data['Age']
                break
    
    # Append the extracted data to the list
    data_list.append({'participant_id': participant_id, 'age': age, 'gender': gender})

# Convert the list to a DataFrame
df_demographics = pd.DataFrame(data_list)

# Convert age to numeric, coercing errors to NaN
df_demographics['age'] = pd.to_numeric(df_demographics['age'], errors='coerce')

# Calculate the mean age and standard deviation
mean_age = df_demographics['age'].mean()
std_age = df_demographics['age'].std()

# Count the number of male, female, and diverse participants
gender_counts = df_demographics['gender'].value_counts()

print(f"Mean Age: {mean_age}")
print(f"Standard Deviation of Age: {std_age}")
print("\nGender Counts:")
print(gender_counts)

# Display the demographics DataFrame
print(df_demographics.head())



Found 501 JSON files.
Processing file: /Users/born/Downloads/results (27)/2024-04-16 14_57_03.json
Processing file: /Users/born/Downloads/results (27)/2024-04-15 12_47_36.json
Processing file: /Users/born/Downloads/results (27)/2024-04-16 10_33_14.json
Processing file: /Users/born/Downloads/results (27)/2024-04-11 12_21_59.json
Processing file: /Users/born/Downloads/results (27)/2024-04-15 09_25_50.json
Processing file: /Users/born/Downloads/results (27)/2024-04-16 17_29_54.json
Processing file: /Users/born/Downloads/results (27)/2024-04-15 09_06_49.json
Processing file: /Users/born/Downloads/results (27)/2024-04-16 12_16_55.json
Processing file: /Users/born/Downloads/results (27)/2024-04-15 09_32_03.json
Processing file: /Users/born/Downloads/results (27)/2024-04-15 13_48_57.json
Processing file: /Users/born/Downloads/results (27)/2024-04-16 13_49_57.json
Processing file: /Users/born/Downloads/results (27)/2024-04-12 10_33_43.json
Processing file: /Users/born/Downloads/results (27)/20


### Next, after exclusion


In [20]:
import os
import glob
import json
import pandas as pd

# Define the directory path
directory_path = "/Users/born/Downloads/nonidentifiable_results_exp_final_exc_perf_outliers"

# Find all JSON files
json_files = glob.glob(os.path.join(directory_path, "*.json"))
print(f"Found {len(json_files)} JSON files.")

data_list = []

for json_file in json_files:
    print(f"Processing file: {json_file}")

    # Load JSON data
    with open(json_file, 'r') as file:
        file_content = file.read()
        # Split the file content into lines and parse each line as JSON
        lines = file_content.splitlines()
        json_data = [json.loads(line) for line in lines]
    
    participant_id = os.path.splitext(os.path.basename(json_file))[0]
    age = None
    gender = None
    
    # Iterate through each trial to find the survey trial with demographic info
    for trial in json_data:
        if trial.get('trial_type') == 'survey':
            if 'response.Gender' in trial and 'response.Age' in trial:
                gender = trial['response.Gender']
                age = trial['response.Age']
                break
    
    # Append the extracted data to the list
    data_list.append({'participant_id': participant_id, 'age': age, 'gender': gender})

# Convert the list to a DataFrame
df_demographics = pd.DataFrame(data_list)

# Convert age to numeric, coercing errors to NaN
df_demographics['age'] = pd.to_numeric(df_demographics['age'], errors='coerce')

# Calculate the mean age and standard deviation
mean_age = df_demographics['age'].mean()
std_age = df_demographics['age'].std()

# Count the number of male, female, and diverse participants
gender_counts = df_demographics['gender'].value_counts()

print(f"Mean Age: {mean_age}")
print(f"Standard Deviation of Age: {std_age}")
print("\nGender Counts:")
print(gender_counts)

# Display the demographics DataFrame
print(df_demographics.head())


Found 473 JSON files.
Processing file: /Users/born/Downloads/nonidentifiable_results_exp_final_exc_perf_outliers/2024-04-16 14_57_03.json
Processing file: /Users/born/Downloads/nonidentifiable_results_exp_final_exc_perf_outliers/2024-04-15 12_47_36.json
Processing file: /Users/born/Downloads/nonidentifiable_results_exp_final_exc_perf_outliers/2024-04-16 10_33_14.json
Processing file: /Users/born/Downloads/nonidentifiable_results_exp_final_exc_perf_outliers/2024-04-11 12_21_59.json
Processing file: /Users/born/Downloads/nonidentifiable_results_exp_final_exc_perf_outliers/2024-04-15 09_25_50.json
Processing file: /Users/born/Downloads/nonidentifiable_results_exp_final_exc_perf_outliers/2024-04-16 17_29_54.json
Processing file: /Users/born/Downloads/nonidentifiable_results_exp_final_exc_perf_outliers/2024-04-15 09_06_49.json
Processing file: /Users/born/Downloads/nonidentifiable_results_exp_final_exc_perf_outliers/2024-04-16 12_16_55.json
Processing file: /Users/born/Downloads/nonidentifi