In [None]:
import pandas as pd


In [None]:
# Load survey data
file_path = 'data.xlsx'  # Ensure data.xlsx is in the same directory
index2 = pd.read_excel(file_path, sheet_name='index_2')
df = pd.read_excel(file_path, sheet_name='Pollfish_Survey_Work_behaviours')


In [None]:
# Identify cognitive inputs (I) and performance targets (T)
input_codes = index2[index2['xy'] == 'I']['Code'].tolist()
target_codes = index2[index2['xy'] == 'T']['Code'].tolist()

# Identify task questions Q47–Q76
task_codes = [code for code in index2['Code'] 
              if code.startswith('Q') and 47 <= int(code[1:]) <= 76 and code in df.columns]

print(f"Found {len(input_codes)} cognitive inputs, {len(task_codes)} tasks, {len(target_codes)} performance targets")


In [None]:
# Export correlations to Excel: one sheet per task question and response value
output_file = 'task_group_correlations.xlsx'
with pd.ExcelWriter(output_file, engine='xlsxwriter') as writer:
    for task in task_codes:
        # For each response value in this task
        for resp in sorted(df[task].dropna().unique()):
            subset = df[df[task] == resp]
            if subset.empty:
                continue
            rows = input_codes + task_codes
            # Compute correlation matrix between rows and performance targets
            data = subset[rows + target_codes].select_dtypes(include='number')
            corr_df = data[rows + target_codes].corr().loc[rows, target_codes]
            # Sheet name: task_resp (truncate to 31 chars)
            sheet_name = f"{task}_{int(resp)}"
            sheet_name = sheet_name[:31]
            corr_df.to_excel(writer, sheet_name=sheet_name)
print(f"Exported task-based correlations to {output_file}")