In [9]:
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns
import os
from scipy.stats import mannwhitneyu, kruskal, chi2_contingency
from statsmodels.stats.contingency_tables import mcnemar


In [10]:
# Load data
data_path_correctness = os.path.join('..', 'data', 
                                     'performance', 'SpringCorrectness.csv')
correctness = pd.read_csv(data_path_correctness)

data_path_time = os.path.join('..', 'data', 
                              'performance', 'SpringTime.csv')
time = pd.read_csv(data_path_time)

In [11]:
def prepare_data_for_analysis(correctness_df, time_df):
    """
    Reshape the wide-format data into long format for T4 and T5 sessions.
    """
    # Verify that both dataframes have the same structure
    assert all(correctness_df.columns == time_df.columns), "Dataframes must have same columns"
    
    # Select only T4 and T5 columns plus ID columns
    columns_of_interest = ['Participant_ID', 'Group_ID', 'T4', 'T5']
    correctness_subset = correctness_df[columns_of_interest].copy()
    time_subset = time_df[columns_of_interest].copy()
    
    # Reshape to long format
    correctness_long = pd.melt(
        correctness_subset,
        id_vars=['Participant_ID', 'Group_ID'],
        value_vars=['T4', 'T5'],
        var_name='session',
        value_name='correctness'
    )
    
    time_long = pd.melt(
        time_subset,
        id_vars=['Participant_ID', 'Group_ID'],
        value_vars=['T4', 'T5'],
        var_name='session',
        value_name='completion_time'
    )
    
    # Merge the reshaped dataframes
    merged_data = pd.merge(
        correctness_long,
        time_long[['Participant_ID', 'session', 'completion_time']],
        on=['Participant_ID', 'session']
    )
    
    # Add a proper group label
    merged_data['group'] = 'G' + merged_data['Group_ID'].astype(str)
    
    return merged_data


In [12]:
def analyze_skill_retention(correctness_df, time_df):
    """
    Analyze skill retention between T4 and T5 sessions across different instruction groups.
    """
    # Prepare data in the required format
    analysis_data = prepare_data_for_analysis(correctness_df, time_df)
    results = {}
    
    # 1. Between-group analysis at T5
    def between_group_analysis(df_t5):
        # Kruskal-Wallis test for completion time
        groups_time = [group_data['completion_time'].values 
                      for name, group_data in df_t5.groupby('group')]
        kw_stat, kw_p = kruskal(*groups_time)
        
        # Chi-square test for correctness
        contingency_table = pd.crosstab(df_t5['group'], df_t5['correctness'])
        chi2_stat, chi2_p, dof, expected = chi2_contingency(contingency_table)
        
        # Pairwise Mann-Whitney U tests if Kruskal-Wallis is significant
        pairwise_tests = {}
        if kw_p < 0.05:
            groups = sorted(df_t5['group'].unique())
            for i in range(len(groups)):
                for j in range(i+1, len(groups)):
                    g1, g2 = groups[i], groups[j]
                    stat, p = mannwhitneyu(
                        df_t5[df_t5['group'] == g1]['completion_time'],
                        df_t5[df_t5['group'] == g2]['completion_time'],
                        alternative='two-sided'
                    )
                    pairwise_tests[f'{g1}_vs_{g2}'] = {'statistic': stat, 'p_value': p}
        
        return {
            'kruskal_wallis': {'statistic': kw_stat, 'p_value': kw_p},
            'chi_square': {'statistic': chi2_stat, 'p_value': chi2_p},
            'pairwise_tests': pairwise_tests
        }
    
    # 2. Within-group analysis (T4 vs T5)
    def within_group_analysis(df_group):
        # Wilcoxon signed-rank test for completion time
        t4_time = df_group[df_group['session'] == 'T4']['completion_time']
        t5_time = df_group[df_group['session'] == 'T5']['completion_time']
        wilcoxon_stat, wilcoxon_p = stats.wilcoxon(t4_time, t5_time)
        
        # McNemar's test for correctness
        t4_correct = df_group[df_group['session'] == 'T4']['correctness'].values
        t5_correct = df_group[df_group['session'] == 'T5']['correctness'].values
        
        # Create contingency table for McNemar's test
        contingency = pd.crosstab(t4_correct, t5_correct)
        
        # Only perform McNemar's test if we have a valid 2x2 contingency table
        try:
            # Convert contingency table to numpy array for mcnemar test
            mcnemar_table = contingency.values
            if mcnemar_table.shape == (2, 2):
                mcnemar_result = mcnemar(mcnemar_table, exact=True)
                mcnemar_stat = mcnemar_result.statistic
                mcnemar_p = mcnemar_result.pvalue
            else:
                mcnemar_stat, mcnemar_p = np.nan, np.nan
        except (ValueError, AttributeError):
            mcnemar_stat, mcnemar_p = np.nan, np.nan
        
        # Calculate retention metrics
        time_change = ((t5_time.mean() - t4_time.mean()) / t4_time.mean()) * 100
        correctness_change = (
            df_group[df_group['session'] == 'T5']['correctness'].mean() -
            df_group[df_group['session'] == 'T4']['correctness'].mean()
        ) * 100
        
        return {
            'wilcoxon': {'statistic': wilcoxon_stat, 'p_value': wilcoxon_p},
            'mcnemar': {'statistic': mcnemar_stat, 'p_value': mcnemar_p},
            'retention_metrics': {
                'time_change_percent': time_change,
                'correctness_change_percent': correctness_change,
                'mean_t4_time': t4_time.mean(),
                'mean_t5_time': t5_time.mean(),
                'mean_t4_correctness': df_group[df_group['session'] == 'T4']['correctness'].mean(),
                'mean_t5_correctness': df_group[df_group['session'] == 'T5']['correctness'].mean()
            }
        }
    
    # Run analyses
    results['between_group_t5'] = between_group_analysis(analysis_data[analysis_data['session'] == 'T5'])
    
    results['within_group'] = {}
    for group in sorted(analysis_data['group'].unique()):
        group_data = analysis_data[analysis_data['group'] == group]
        results['within_group'][group] = within_group_analysis(group_data)
    
    # Create visualizations
    fig1, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
    
    # Completion Time Box Plot
    sns.boxplot(x='group', y='completion_time', hue='session', 
                data=analysis_data, ax=ax1)
    ax1.set_title('Completion Time Comparison (T4 vs T5)')
    ax1.set_ylabel('Time (seconds)')
    
    # Correctness Bar Plot
    correctness_data = analysis_data.groupby(['group', 'session'])['correctness'].mean().unstack()
    correctness_data.plot(kind='bar', ax=ax2)
    ax2.set_title('Correctness Rate Comparison (T4 vs T5)')
    ax2.set_ylabel('Correctness Rate')
    ax2.set_ylim(0, 1)
    plt.tight_layout()
    
    results['figures'] = {'retention_comparison': fig1}
    
    return results, analysis_data


In [13]:
def print_results(results):
    """Print the analysis results in a readable format."""
    print("\nBetween-Group Analysis at T5:")
    print(f"Kruskal-Wallis test (completion time): p = {results['between_group_t5']['kruskal_wallis']['p_value']:.4f}")
    print(f"Chi-square test (correctness): p = {results['between_group_t5']['chi_square']['p_value']:.4f}")
    
    if results['between_group_t5']['pairwise_tests']:
        print("\nPairwise Mann-Whitney U tests:")
        for pair, stats in results['between_group_t5']['pairwise_tests'].items():
            print(f"{pair}: p = {stats['p_value']:.4f}")
    
    print("\nWithin-Group Analysis (T4 vs T5):")
    for group, stats in results['within_group'].items():
        print(f"\n{group}:")
        print(f"Wilcoxon test (completion time): p = {stats['wilcoxon']['p_value']:.4f}")
        if not np.isnan(stats['mcnemar']['p_value']):
            print(f"McNemar test (correctness): p = {stats['mcnemar']['p_value']:.4f}")
        else:
            print("McNemar test (correctness): Not applicable (insufficient data variation)")
        print(f"Time change: {stats['retention_metrics']['time_change_percent']:.1f}%")
        print(f"Correctness change: {stats['retention_metrics']['correctness_change_percent']:.1f}%")
        print(f"Mean T4 time: {stats['retention_metrics']['mean_t4_time']:.1f}")
        print(f"Mean T5 time: {stats['retention_metrics']['mean_t5_time']:.1f}")
        print(f"Mean T4 correctness: {stats['retention_metrics']['mean_t4_correctness']:.2f}")
        print(f"Mean T5 correctness: {stats['retention_metrics']['mean_t5_correctness']:.2f}")

In [14]:
import csv
import numpy as np
import pandas as pd
import os

def save_results_to_csv(results, filename="skill_retention_analysis.csv"):
    """
    Save the analysis results to a CSV file.
    
    Parameters:
    -----------
    results : dict
        Dictionary containing analysis results from analyze_skill_retention function
    filename : str
        Path to the output CSV file
    
    Returns:
    --------
    str
        Path to the saved CSV file
    """
    # Create directory if it doesn't exist
    output_dir = os.path.dirname(filename)
    if output_dir and not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # Extract results data
    between_group = results['between_group_t5']
    within_group = results['within_group']
    
    # Create DataFrame for between-group analysis
    between_data = {
        'Analysis_Type': ['Between_Group'] * 2,
        'Test': ['Kruskal-Wallis (time)', 'Chi-square (correctness)'],
        'Statistic': [
            between_group['kruskal_wallis']['statistic'] if not np.isnan(between_group['kruskal_wallis']['statistic']) else 'N/A',
            between_group['chi_square']['statistic'] if not np.isnan(between_group['chi_square']['statistic']) else 'N/A'
        ],
        'P_Value': [
            between_group['kruskal_wallis']['p_value'] if not np.isnan(between_group['kruskal_wallis']['p_value']) else 'N/A',
            between_group['chi_square']['p_value'] if not np.isnan(between_group['chi_square']['p_value']) else 'N/A'
        ]
    }
    between_df = pd.DataFrame(between_data)
    
    # Create DataFrame for pairwise tests if they exist
    if between_group['pairwise_tests']:
        pairwise_data = {
            'Analysis_Type': ['Pairwise_Tests'] * len(between_group['pairwise_tests']),
            'Group_Pair': list(between_group['pairwise_tests'].keys()),
            'P_Value': [stats['p_value'] if not np.isnan(stats['p_value']) else 'N/A' 
                       for stats in between_group['pairwise_tests'].values()]
        }
        pairwise_df = pd.DataFrame(pairwise_data)
    else:
        pairwise_df = pd.DataFrame()
    
    # Create DataFrame for within-group analysis
    within_rows = []
    
    for group, stats in within_group.items():
        # Statistical tests
        wilcoxon_p = stats['wilcoxon']['p_value'] if not np.isnan(stats['wilcoxon']['p_value']) else 'N/A'
        mcnemar_p = stats['mcnemar']['p_value'] if not np.isnan(stats['mcnemar']['p_value']) else 'N/A'
        
        # Performance metrics
        metrics = stats['retention_metrics']
        
        row = {
            'Analysis_Type': 'Within_Group',
            'Group': group,
            'Wilcoxon_P_Value': wilcoxon_p,
            'McNemar_P_Value': mcnemar_p,
            'T4_Time': metrics['mean_t4_time'],
            'T5_Time': metrics['mean_t5_time'],
            'Time_Change_Percent': metrics['time_change_percent'],
            'T4_Correctness': metrics['mean_t4_correctness'],
            'T5_Correctness': metrics['mean_t5_correctness'],
            'Correctness_Change_Percent': metrics['correctness_change_percent']
        }
        within_rows.append(row)
    
    within_df = pd.DataFrame(within_rows)
    
    # Combine all DataFrames
    # Save to CSV
    with open(filename, 'w', newline='') as f:
        # Write Between-Group Analysis section
        f.write("BETWEEN-GROUP ANALYSIS AT T5\n")
        between_df.to_csv(f, index=False)
        
        # Write Pairwise Tests section if available
        if not pairwise_df.empty:
            f.write("\nPAIRWISE TESTS\n")
            pairwise_df.to_csv(f, index=False)
        
        # Write Within-Group Analysis section
        f.write("\nWITHIN-GROUP ANALYSIS (T4 vs T5)\n")
        within_df.to_csv(f, index=False)
    
    print(f"Results saved to {filename}")
    return filename



In [15]:
# Run analysis
results, analysis_data = analyze_skill_retention(correctness, time)
save_results_to_csv(results, "../data/rqs-results/RQ3_results.csv")

Results saved to ../data/rqs-results/RQ3_results.csv


'../data/rqs-results/RQ3_results.csv'