![Logo Pyrates LLM](../assets/pyratesllm_logo_500.png)

# **Notebook#03 : Analysis on axis 2**
## [Usage of the digital assistant by learners]

## 1/ Imports

In [None]:
# Internal
import sys
sys.path.append("../src")
import students_constants as stu_const
import interaction_constants as int_const
import tests_constants  as tes_const
import session_date_constants as ses_const

# External
import pandas as pd
from scipy import stats
import numpy as np

## 2/ Data importation

In [None]:
interaction_data = pd.read_pickle("../data/interim/interaction_data.pkl")
pre_test_data = pd.read_pickle("../data/interim/pre_test_data.pkl")
post_test_data = pd.read_pickle("../data/interim/post_test_data.pkl")

## 3/ Assistant usage (Q2.1) [B/C]

### Number of assistant call by game

How many times do students use the digital assistant on average per game depending on the groups?

In [None]:
# Filter only groups B and C
groups_BC_interaction = interaction_data[
    interaction_data[int_const.GROUP_ID_DATA_KEY].isin([int_const.GROUP_B, int_const.GROUP_C])
]

# Filter only assistant help received events
assistant_calls = groups_BC_interaction[
    (groups_BC_interaction[int_const.ACTION_DATA_KEY] == int_const.RECEIVED_ACTION) &
    (groups_BC_interaction[int_const.OBJECT_DATA_KEY] == int_const.ASSISTANT_HELP_OBJECT)
].copy()

# Get all game_id for groups B and C
all_games = groups_BC_interaction[[int_const.GROUP_ID_DATA_KEY, int_const.GAME_ID_DATA_KEY]].drop_duplicates()

# Count number of assistant calls per game
calls_count = assistant_calls.groupby([int_const.GROUP_ID_DATA_KEY, int_const.GAME_ID_DATA_KEY]).size().reset_index(name='num_calls')

# Merge with all games to include games with 0 calls
calls_per_game_group = pd.merge(all_games, calls_count, on=[int_const.GROUP_ID_DATA_KEY, int_const.GAME_ID_DATA_KEY], how='left')

# Replace NaN (games with no calls) by 0
calls_per_game_group['num_calls'] = calls_per_game_group['num_calls'].fillna(0).astype(int)

# Export to Excel for debug
calls_per_game_group.to_excel("../debug/debug_calls_to_assistant_per_game.xlsx")


# Separate B and C
calls_B = calls_per_game_group[calls_per_game_group['group_id'] == 'B']['num_calls']
calls_C = calls_per_game_group[calls_per_game_group['group_id'] == 'C']['num_calls']

# Combine B + C if desired
calls_BC = pd.concat([calls_B, calls_C])

# Descriptive statistics
mean_B = calls_B.mean()
std_B = calls_B.std()
mean_C = calls_C.mean()
std_C = calls_C.std()

difference_BC = mean_B - mean_C
percent_reduction = (difference_BC / mean_B) * 100

print("=== DESCRIPTIVE STATISTICS ===")
print(f"Group B (n={len(calls_B)}): M = {mean_B:.2f}, SD = {std_B:.2f}")
print(f"Group C (n={len(calls_C)}): M = {mean_C:.2f}, SD = {std_C:.2f}")
print(f".   Difference (B - C): {difference_BC:.2f}")
print(f".   Percent reduction from B to C: {percent_reduction:.2f}%")

alpha = 0.05
# Normality tests
print("\n=== NORMALITY TESTS (Shapiro-Wilk) ===")
shapiro_B = stats.shapiro(calls_B)
shapiro_C = stats.shapiro(calls_C)
print(f"Group B: p-value = {shapiro_B.pvalue:.4f} -> {'normally distributed' if shapiro_B.pvalue >= alpha else 'not normally distributed'}")
print(f"Group C: p-value = {shapiro_C.pvalue:.4f} -> {'normally distributed' if shapiro_C.pvalue >= alpha else 'not normally distributed'}")

# Levene test for equal variances
print(f"\n=== HOMOGENEITY OF VARIANCES (Levene) ===")
levene_p = stats.levene(calls_B, calls_C).pvalue
print(f"Levene p-value = {levene_p:.4f} -> {'variances are similar' if levene_p >= alpha else 'variances differ'}")

print(f"\n=== TEST RESULTS ===")
# Choose statistical test
normal_B = shapiro_B.pvalue >= alpha
normal_C = shapiro_C.pvalue >= alpha
equal_var = levene_p >= alpha


if normal_B and normal_C and equal_var:
    print("Using independent t-test (parametric)")
    stat, p_value = stats.ttest_ind(calls_B, calls_C, equal_var=True)
else:
    print("Using Mann-Whitney U test (non-parametric)")
    stat, p_value = stats.mannwhitneyu(calls_B, calls_C, alternative='two-sided')

print(f"Test statistic = {stat:.4f}")
print(f"P-value = {p_value:.4f}")

if p_value < alpha:
    print(f"SIGNIFICANT difference: p = {p_value:.4f} < {alpha}")
else:
    print(f"No significant difference: p = {p_value:.4f} >= {alpha}")
