![Logo AIED26](../assets/logo_AIED26.png)

# **Notebook#02 : Analysis on RQ1**
## [impact on in-game progression and learning gain]

## 1/ Imports

In [1]:
# Internal
import sys
sys.path.append("../src")
import students_constants as stu_const
import interaction_constants as int_const
import tests_constants  as tes_const

# External
import pandas as pd
from scipy import stats
import numpy as np

## 2/ Data importation

In [2]:
interaction_data = pd.read_pickle("../data/interim/interaction_data.pkl")
pre_test_data = pd.read_pickle("../data/interim/pre_test_data.pkl")
post_test_data = pd.read_pickle("../data/interim/post_test_data.pkl")

## 3/ In-game progression [A/B/C]

### 3.1/ Calculate game progression (index 0-100)

In [3]:
# Get all STARTED actions to find maximum level reached by each student
started_actions = interaction_data[
    interaction_data[int_const.ACTION_DATA_KEY] == int_const.STARTED_ACTION
]

# Find maximum level reached by each student
max_level_per_student = started_actions.groupby(
    int_const.GAME_ID_DATA_KEY
)[int_const.LEVEL_DATA_KEY].max()

print(f"Number of games detected: {len(max_level_per_student)}")

student_progress_data = []

for game_id in max_level_per_student.index:
    max_level = max_level_per_student[game_id]
    
    # Get all traces for this student at his maximum level
    student_level_data = interaction_data[
        (interaction_data[int_const.GAME_ID_DATA_KEY] == game_id) & 
        (interaction_data[int_const.LEVEL_DATA_KEY] == max_level)
    ]
    
    # Maximum progression
    max_progression = student_level_data[int_const.GAME_PROGRESSION_DATA_KEY].max()
    
    # Group id
    group = student_level_data[int_const.GROUP_ID_DATA_KEY].iloc[0]

    # Results aggregation 
    student_progress_data.append({
        'game_id': game_id,
        'group_id': group,
        'max_level': max_level,
        'max_progression': max_progression
    })

# Create progress dataframe
progress_df = pd.DataFrame(student_progress_data)

# Calculate progress index (0-100 points)
# - Each completed level gives 12.5 points (100/8)
# - For current level, progression percentage gives fraction of 12.5 points
def calculate_progress_index(max_level, max_progression):
    completed_levels_points = (max_level - 1) * (100 / 8)
    current_level_points = (max_progression / 100) * (100 / 8)
    return min(completed_levels_points + current_level_points, 100)

progress_df['progress_index'] = progress_df.apply(
    lambda row: calculate_progress_index(row['max_level'], row['max_progression']),
    axis=1
)

print(f"Number of progress entries computed: {len(progress_df)}")
# Export to Excel for debug
progress_df.to_excel("../debug/debug_students_game_progression.xlsx")


Number of games detected: 248
Number of progress entries computed: 248


### 3.2/ Test A VS (B+C)

Is there a difference in game progression between students from group A and students from group B+C ?

In [4]:
# Extract progress data for each group
group_a_progress = progress_df[progress_df['group_id'] == 'A']['progress_index']
group_b_progress = progress_df[progress_df['group_id'] == 'B']['progress_index']
group_c_progress = progress_df[progress_df['group_id'] == 'C']['progress_index']
# Combine B and C into one group
group_bc_progress = pd.concat([group_b_progress, group_c_progress])

# Descriptive statistics
average_progress_A = group_a_progress.mean()
average_progress_BC = group_bc_progress.mean()

std_progress_A = group_a_progress.std()
std_progress_BC = group_bc_progress.std()

mean_diff = average_progress_BC - average_progress_A
percent_increase = (mean_diff / average_progress_A) * 100

print("=== DESCRIPTIVE STATISTICS ===")
print(f"Group A: n = {len(group_a_progress)}, mean = {average_progress_A:.2f}, std = {std_progress_A:.2f}")
print(f"Group B+C: n = {len(group_bc_progress)}, mean = {average_progress_BC:.2f}, std = {std_progress_BC:.2f}")
print(f".   Difference in mean (B+C - A): {mean_diff:.2f}")
print(f".   Percentage increase from Group A: {percent_increase:.2f}%")

=== DESCRIPTIVE STATISTICS ===
Group A: n = 81, mean = 63.83, std = 23.74
Group B+C: n = 167, mean = 67.41, std = 23.08
.   Difference in mean (B+C - A): 3.58
.   Percentage increase from Group A: 5.61%


In [5]:
# TODO SL

### 3.3/ Test A VS B VS C

Is there a difference in game progression between students from groups A, B, and C?

In [6]:
# Descriptive statistics
average_progress_A = group_a_progress.mean()
average_progress_B = group_b_progress.mean()
average_progress_C = group_c_progress.mean()

diff_B = average_progress_B - average_progress_A
percent_increase_B = (diff_B / average_progress_A) * 100

diff_C = average_progress_C - average_progress_A
percent_increase_C = (diff_C / average_progress_A) * 100


print("=== DESCRIPTIVE STATISTICS ===")
print(f"Group A: n = {len(group_a_progress)}, mean = {average_progress_A:.2f}, std = {group_a_progress.std():.2f}")
print(f"Group B: n = {len(group_b_progress)}, mean = {average_progress_B:.2f}, std = {group_b_progress.std():.2f}")
print(f".   diff vs A = {diff_B:.2f}, increase = {percent_increase_B:.2f}%")
print(f"Group C: n = {len(group_c_progress)}, mean = {average_progress_C:.2f}, std = {group_c_progress.std():.2f}")
print(f".   diff vs A = {diff_C:.2f}, increase = {percent_increase_C:.2f}%")

=== DESCRIPTIVE STATISTICS ===
Group A: n = 81, mean = 63.83, std = 23.74
Group B: n = 94, mean = 65.88, std = 22.19
.   diff vs A = 2.04, increase = 3.20%
Group C: n = 73, mean = 69.40, std = 24.19
.   diff vs A = 5.56, increase = 8.71%


In [7]:
# TODO SL

## 4/ Learning gain [A/B/C]

### 4.1/ Learning gain calculation

The learning gain calculation is based on ANSWERS_SCORES dictionary (see `scr/tests_constants.py`)

In [8]:
# Calculate maximum score
max_score_general = sum(max(answer.values()) for answer in tes_const.ANSWERS_SCORES.values())

# Score calculation function
def calculate_score(student, max_score):
    score = 0
    for question, answer in student.items():
        if question in tes_const.ANSWERS_SCORES and answer in tes_const.ANSWERS_SCORES[question]:
            score += tes_const.ANSWERS_SCORES[question][answer]
    return round((score/max_score)*100)

groups = [tes_const.GROUP_A, tes_const.GROUP_B, tes_const.GROUP_C]
scores_data = {}

for group in groups:
    # Filter and calculate scores
    pre_temp = pre_test_data[pre_test_data[tes_const.GROUP_ID_KEY] == group].copy()
    post_temp = post_test_data[post_test_data[tes_const.GROUP_ID_KEY] == group].copy()
    
    pre_temp['pre_score'] = pre_temp.apply(calculate_score, axis=1, max_score=max_score_general)
    post_temp['post_score'] = post_temp.apply(calculate_score, axis=1, max_score=max_score_general)
    
    # Merge and calculate gain
    scores_df = pd.merge(
        pre_temp[[tes_const.STUDENT_ID_KEY, 'pre_score']],
        post_temp[[tes_const.STUDENT_ID_KEY, 'post_score']],
        on=tes_const.STUDENT_ID_KEY,
        how='inner' # the student must have a pre-test score AND a post-test score
    )
    scores_df['learning_gain'] = scores_df['post_score'] - scores_df['pre_score']
    
    scores_data[group] = scores_df
    print(f"Group {group}: {len(scores_df)}/{sum(1 for s in stu_const.ALL_STUDENTS if s[stu_const.GROUP_ID] == group)} students with both pre and post tests")
    
    # Export
    scores_df.to_excel(f"../debug/debug_learning_gain_{group}.xlsx", index=False)


Group A: 73/81 students with both pre and post tests
Group B: 90/94 students with both pre and post tests
Group C: 72/73 students with both pre and post tests


### 4.2/ Intra-group learning gain analysis

Do students demonstrate learning gains using Pyrates in their respective groups?

In [9]:
alpha = 0.05
groups = [tes_const.GROUP_A, tes_const.GROUP_B, tes_const.GROUP_C]
for group in groups:
    scores_df = scores_data[group]
    pre_scores = scores_df['pre_score']
    post_scores = scores_df['post_score']
    learning_gains = scores_df['learning_gain']
    print(f"\n=======================================")
    print(f"GROUP {group} INTRA-GROUP ANALYSIS")
    print(f"========================================")

    print("\n=== DESCRIPTIVE STATISTICS ===")

    print(f"Pre-test: M = {pre_scores.mean():.2f}, SD = {pre_scores.std():.2f}")
    print(f"Post-test: M = {post_scores.mean():.2f}, SD = {post_scores.std():.2f}")
    print(f"Learning gain: M = {learning_gains.mean():.2f}, SD = {learning_gains.std():.2f}")
    
    print(f"\n=== NORMALITY TESTS (Shapiro-Wilk) ===")
    # Shapiro-Wilk test for pre-post differences
    shapiro_gain = stats.shapiro(learning_gains)
    print(f"Shapiro-Wilk test for learning gains: p-value = {shapiro_gain.pvalue:.4f}")
    
    # Check normality
    if shapiro_gain.pvalue >= alpha:
        print("Learning gains are normally distributed -> Paired T-test is applicable.")
        normal_distribution = True
    else:
        print("Learning gains are not normally distributed -> Prefer Wilcoxon test.")
        normal_distribution = False
    
    print(f"\n=== TEST RESULTS ===")
    # Paired T-test
    t_test = stats.ttest_rel(pre_scores, post_scores)
    print(f"Paired T-test: t = {t_test.statistic:.4f}, p-value = {t_test.pvalue:.4f}")
    
    # Wilcoxon signed-rank test
    wilcoxon_test = stats.wilcoxon(pre_scores, post_scores)
    print(f"Wilcoxon test: W = {wilcoxon_test.statistic:.4f}, p-value = {wilcoxon_test.pvalue:.4f}")
    
    # Choose appropriate test based on normality
    if normal_distribution:
        recommended_pvalue = t_test.pvalue
        test_used = "Paired T-test"
    else:
        recommended_pvalue = wilcoxon_test.pvalue
        test_used = "Wilcoxon test"

    # Calculate effect sizes
    n = len(learning_gains)
    
    # Cohen's d for paired samples (using learning gains)
    cohens_d = learning_gains.mean() / learning_gains.std()
    
    # r effect size for Wilcoxon (using z-score approximation)
    z_wilcoxon = stats.norm.ppf(1 - wilcoxon_test.pvalue / 2)
    r_wilcoxon = z_wilcoxon / np.sqrt(n)


    # Choose appropriate test based on normality
    if normal_distribution:
        recommended_pvalue = t_test.pvalue
        test_used = "Paired T-test"
        effect_size = cohens_d
        effect_label = "Cohen's d"
        
        # Interpret Cohen's d
        if abs(cohens_d) < 0.2:
            effect_interpretation = "negligible"
        elif abs(cohens_d) < 0.5:
            effect_interpretation = "small"
        elif abs(cohens_d) < 0.8:
            effect_interpretation = "medium"
        else:
            effect_interpretation = "large"
    else:
        recommended_pvalue = wilcoxon_test.pvalue
        test_used = "Wilcoxon test"
        effect_size = r_wilcoxon
        effect_label = "r"
        
        # Interpret r
        if abs(r_wilcoxon) < 0.1:
            effect_interpretation = "negligible"
        elif abs(r_wilcoxon) < 0.3:
            effect_interpretation = "small"
        elif abs(r_wilcoxon) < 0.5:
            effect_interpretation = "medium"
        else:
            effect_interpretation = "large"
    
    # Interpretation
    print(f"Results (using {test_used}) :")
    
    if recommended_pvalue < alpha:
        print(f"SIGNIFICANT: p-value = {recommended_pvalue:.4f} < alpha = {alpha}")
        if learning_gains.mean() > 0:
            print(f"   Significant learning gain observed (+{learning_gains.mean():.2f} points)")
        else:
            print(f"   Significant learning loss observed ({learning_gains.mean():.2f} points)")
    else:
        print(f"NOT SIGNIFICANT: p-value = {recommended_pvalue:.4f} > alpha = {alpha}")
        print("   No significant learning gain observed")

    # Display effect size
    print(f"Effect Size")
    print(f"{effect_label} = {effect_size:.4f}")
    print(f"Effect size interpretation: {effect_interpretation}")



GROUP A INTRA-GROUP ANALYSIS

=== DESCRIPTIVE STATISTICS ===
Pre-test: M = 45.14, SD = 16.84
Post-test: M = 55.26, SD = 20.65
Learning gain: M = 10.12, SD = 18.10

=== NORMALITY TESTS (Shapiro-Wilk) ===
Shapiro-Wilk test for learning gains: p-value = 0.0457
Learning gains are not normally distributed -> Prefer Wilcoxon test.

=== TEST RESULTS ===
Paired T-test: t = -4.7788, p-value = 0.0000
Wilcoxon test: W = 320.0000, p-value = 0.0001
Results (using Wilcoxon test) :
SIGNIFICANT: p-value = 0.0001 < alpha = 0.05
   Significant learning gain observed (+10.12 points)
Effect Size
r = 0.4739
Effect size interpretation: medium

GROUP B INTRA-GROUP ANALYSIS

=== DESCRIPTIVE STATISTICS ===
Pre-test: M = 42.89, SD = 19.79
Post-test: M = 52.88, SD = 22.44
Learning gain: M = 9.99, SD = 17.79

=== NORMALITY TESTS (Shapiro-Wilk) ===
Shapiro-Wilk test for learning gains: p-value = 0.0166
Learning gains are not normally distributed -> Prefer Wilcoxon test.

=== TEST RESULTS ===
Paired T-test: t = -5

### 4.3/ Inter-group learning gain comparison

#### 4.3.1/ Test A VS (B+C)

Is there a difference in learning gains between students from group A and those from group B+C ?

In [10]:
# Extract learning gains
learning_gains_A = scores_data['A']['learning_gain']
learning_gains_BC = pd.concat([scores_data['B']['learning_gain'], scores_data['C']['learning_gain']])

# Descriptive statistics
mean_A = learning_gains_A.mean()
mean_BC = learning_gains_BC.mean()
std_A = learning_gains_A.std()
std_BC = learning_gains_BC.std()
diff_A_BC = mean_A - mean_BC
percent_reduction_BC = (diff_A_BC / mean_A) * 100

print("=== DESCRIPTIVE STATISTICS ===")
print(f"Group A (n={len(learning_gains_A)}): M = {mean_A:.2f}, SD = {std_A:.2f}")
print(f"Group B+C (n={len(learning_gains_BC)}): M = {mean_BC:.2f}, SD = {std_BC:.2f}")
print(f".   Difference (A - B+C) = {diff_A_BC:.2f}")
print(f".   Percentage reduction compared to A = {percent_reduction_BC:.2f}%")

=== DESCRIPTIVE STATISTICS ===
Group A (n=73): M = 10.12, SD = 18.10
Group B+C (n=162): M = 8.86, SD = 17.47
.   Difference (A - B+C) = 1.27
.   Percentage reduction compared to A = 12.50%


In [11]:
# TODO SL

#### 4.3.2/ Test A VS B VS C

Is there a difference in learning gains between students from groups A, B, and C?

In [12]:
# Extract learning gains for each group
learning_gains_A = scores_data['A']['learning_gain']
learning_gains_B = scores_data['B']['learning_gain'] 
learning_gains_C = scores_data['C']['learning_gain']

# Descriptive statistics
average_gain_A = learning_gains_A.mean()
average_gain_B = learning_gains_B.mean()
average_gain_C = learning_gains_C.mean()
diff_B_vs_A = average_gain_A - average_gain_B
diff_C_vs_A = average_gain_A - average_gain_C

percent_reduction_B = (diff_B_vs_A / average_gain_A) * 100
percent_reduction_C = (diff_C_vs_A / average_gain_A) * 100

print("\n=== DESCRIPTIVE STATISTICS ===")
print(f"Group A (n={len(learning_gains_A)}): M = {average_gain_A:.2f}, SD = {learning_gains_A.std():.2f}")
print(f"Group B (n={len(learning_gains_B)}): M = {average_gain_B:.2f}, SD = {learning_gains_B.std():.2f}")
print(f".   B vs A: Mean difference = {diff_B_vs_A:.2f}, Percentage reduction = {percent_reduction_B:.2f}%")
print(f"Group C (n={len(learning_gains_C)}): M = {average_gain_C:.2f}, SD = {learning_gains_C.std():.2f}")
print(f".   C vs A: Mean difference = {diff_C_vs_A:.2f}, Percentage reduction = {percent_reduction_C:.2f}%")


=== DESCRIPTIVE STATISTICS ===
Group A (n=73): M = 10.12, SD = 18.10
Group B (n=90): M = 9.99, SD = 17.79
.   B vs A: Mean difference = 0.13, Percentage reduction = 1.33%
Group C (n=72): M = 7.44, SD = 17.06
.   C vs A: Mean difference = 2.68, Percentage reduction = 26.46%


In [13]:
# TODO SL