In [73]:
import pandas as pd
import numpy as np
import scipy.stats as stats
from scipy.stats import kruskal, mannwhitneyu, chi2_contingency
from statsmodels.stats.multitest import multipletests
import matplotlib.pyplot as plt
import seaborn as sns
import os


In [74]:
# Configuration
pd.set_option('display.max_columns', None)
plt.style.use('seaborn')


  plt.style.use('seaborn')


In [75]:
# Load data
data_path_correctness = os.path.join('..', 'data', 
                                     'performance', 'SpringCorrectness.csv')
correctness = pd.read_csv(data_path_correctness)

data_path_time = os.path.join('..', 'data', 
                              'performance', 'SpringTime.csv')
time = pd.read_csv(data_path_time)

In [76]:
# Preprocessing functions
def preprocess_data(df):
    """Clean and reshape data"""
    df = df.melt(id_vars=['Participant_ID', 'Group_ID'], 
                 var_name='Session', 
                 value_name='Value')
    df[['Session_Type', 'Session_Num']] = df['Session'].str.extract('([PT])(\d+)')
    df['Session_Num'] = df['Session_Num'].astype(float)
    return df.dropna(subset=['Value'])

# Process both datasets
time_long = preprocess_data(time)
correctness_long = preprocess_data(correctness)

In [77]:
time_long

Unnamed: 0,Participant_ID,Group_ID,Session,Value,Session_Type,Session_Num
0,1,1,T0,44.4200,T,0.0
1,2,2,T0,20.2200,T,0.0
2,3,3,T0,29.2700,T,0.0
3,4,4,T0,28.3800,T,0.0
4,5,1,T0,55.4400,T,0.0
...,...,...,...,...,...,...
413,39,3,T5,11.5000,T,5.0
414,41,1,T5,42.1559,T,5.0
415,42,4,T5,6.4667,T,5.0
416,46,2,T5,16.0500,T,5.0


In [78]:
# Focus on baseline + weekly tests (T0-T4)
test_sessions = ['T0', 'T1', 'T2', 'T3', 'T4']  # T5 is for RQ3

time_tests = time_long[time_long['Session'].isin(test_sessions)]
correctness_tests = correctness_long[correctness_long['Session'].isin(test_sessions)]


In [79]:
def kruskal_posthoc(data, session, metric_name):
    """Handle sessions with missing groups while retaining all T0-T4"""
    groups = data[(data['Session'] == session) & 
                 (data['Group_ID'].isin([1,2,3,4]))]
    
    # Get available groups with ≥1 participant
    valid_groups = [g for g in groups['Group_ID'].unique() if len(groups[groups['Group_ID'] == g]) >= 1]
    
    if len(valid_groups) < 2:
        return {
            'session': session,
            'metric': metric_name,
            'kw_p': np.nan,
            'posthoc_pairs': [],
            'posthoc_p_raw': [],
            'posthoc_p_corrected': [],
            'error': f"Only {len(valid_groups)} group(s) with data"
        }
    
    # Kruskal-Wallis test
    try:
        kw_result = kruskal(*[groups[groups['Group_ID'] == g]['Value'].dropna().values 
                          for g in valid_groups])
    except ValueError as e:
        return {
            'session': session,
            'metric': metric_name,
            'kw_p': np.nan,
            'posthoc_pairs': [],
            'posthoc_p_raw': [],
            'posthoc_p_corrected': [],
            'error': str(e)
        }
    
    # Posthoc tests for available groups
    pairs = [(i,j) for i in valid_groups for j in valid_groups if i < j]
    pvals = []
    for pair in pairs:
        g1 = groups[groups['Group_ID'] == pair[0]]['Value'].dropna()
        g2 = groups[groups['Group_ID'] == pair[1]]['Value'].dropna()
        if len(g1) == 0 or len(g2) == 0:
            pvals.append(np.nan)
            continue
        pvals.append(mannwhitneyu(g1, g2).pvalue)
    
    # Bonferroni correction
    rejected, corrected_p, _, _ = multipletests(pvals, alpha=0.05, method='bonferroni')
    
    return {
        'session': session,
        'metric': metric_name,
        'kw_p': kw_result.pvalue,
        'posthoc_pairs': pairs,
        'posthoc_p_raw': pvals,
        'posthoc_p_corrected': corrected_p,
        'error': None
    }

# Keep original session filter
test_sessions = ['T0','T1','T2','T3','T4']

In [80]:
def calculate_effect_size(g1, g2):
    """Calculate Cohen's d effect size"""
    n1, n2 = len(g1), len(g2)
    s1, s2 = np.var(g1, ddof=1), np.var(g2, ddof=1)
    pooled_var = ((n1-1)*s1 + (n2-1)*s2) / (n1 + n2 - 2)
    d = (np.mean(g1) - np.mean(g2)) / np.sqrt(pooled_var)
    return d


In [81]:
# RQ2 Analysis
def analyze_rq2():
    results = []
    
    for session in ['T0','T1','T2','T3','T4']:
        # Time analysis
        time_res = kruskal_posthoc(time_tests, session, 'time')
        results.append(time_res)
        
        # Correctness analysis
        correct_res = kruskal_posthoc(correctness_tests, session, 'correctness')
        results.append(correct_res)
    
    return pd.DataFrame(results)

# Generate results
rq2_results = analyze_rq2()
rq2_results.to_csv("../data/rqs-results/RQ2_results.csv")
rq2_results


Unnamed: 0,session,metric,kw_p,posthoc_pairs,posthoc_p_raw,posthoc_p_corrected,error
0,T0,time,0.647596,"[(1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4)]","[0.30743416592739536, 0.37722461666745344, 0.6...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0]",
1,T0,correctness,0.973,"[(1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4)]","[0.8072501679320072, 1.0, 0.9193330052953381, ...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0]",
2,T1,time,0.058525,"[(1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4)]","[0.4807075277663513, 0.8784770784770783, 0.014...","[1.0, 1.0, 0.08857808857808858, 1.0, 0.2784039...",
3,T1,correctness,0.02763,"[(1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4)]","[0.9539596927720386, 0.35870524478134846, 0.01...","[1.0, 1.0, 0.09725724845535264, 1.0, 0.0607114...",
4,T2,time,0.404051,"[(1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4)]","[0.7750513736700824, 0.859819205054158, 0.0941...","[1.0, 1.0, 0.5649982526272784, 1.0, 1.0, 1.0]",
5,T2,correctness,0.258767,"[(1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4)]","[0.8504362683123465, 0.6761931257289725, 0.108...","[1.0, 1.0, 0.6528565802432741, 1.0, 0.38173163...",
6,T3,time,0.023192,"[(1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4)]","[0.6588432549492806, 0.42677673653298365, 0.01...","[1.0, 1.0, 0.11978671983129627, 1.0, 0.0960659...",
7,T3,correctness,0.436336,"[(1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4)]","[0.683846186890438, 0.6761931257289725, 0.2876...","[1.0, 1.0, 1.0, 1.0, 0.7822189704648379, 1.0]",
8,T4,time,0.04599,"[(1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4)]","[0.7910815129207817, 0.42677673653298365, 0.02...","[1.0, 1.0, 0.1484680322783936, 1.0, 1.0, 0.047...",
9,T4,correctness,0.195181,"[(1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4)]","[1.0, 0.683846186890438, 0.10880943004054568, ...","[1.0, 1.0, 0.6528565802432741, 1.0, 0.65285658...",
