## Step 2 – Questionnaire Summary (SUS, TLX, IMI, UES)

This notebook produces clean tables, figures, and a short numeric summary comparing **Interface A vs B** on the main questionnaire scores (SUS, NASA-TLX, IMI subscales, UES).

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from pathlib import Path

sns.set_theme(style="whitegrid", font_scale=1.2)

BASE = Path("..")  # parent folder: Understanding User Behavior

logs = pd.read_csv(BASE / "logs.csv")
sessions = pd.read_csv(BASE / "sessions.csv")
questionnaires = pd.read_csv(BASE / "questionnaires.csv")

# Merge questionnaires (per user) with sessions (per session) to add interface_version
sessions_unique = sessions[["user_id", "interface_version"]].drop_duplicates()
df_q = questionnaires.merge(
    sessions_unique,
    on="user_id",
    how="left"
)

# Ensure one row per user x interface_version
df_q = df_q.drop_duplicates(subset=["user_id", "interface_version"]).reset_index(drop=True)

psych_cols = [
    "SUS_score",
    "NASA_TLX",
    "UES_engagement",
    "IMI_autonomy",
    "IMI_competence",
    "intention_reuse",
]

df_q[psych_cols].head()

In [None]:
# Overall descriptive statistics (Means / SD)
overall_desc = df_q[psych_cols].agg(['mean', 'std', 'count']).T
overall_desc = overall_desc.rename(columns={
    'mean': 'Mean',
    'std': 'SD',
    'count': 'N'
})
overall_desc = overall_desc.round(2)
overall_desc

In [None]:
# Descriptive statistics by Interface (A vs B)
by_interface = (
    df_q
    .groupby("interface_version")[psych_cols]
    .agg(['mean', 'std', 'count'])
)

# Clean up the multi-index columns for readability
by_interface.columns = [f"{measure}_{stat}" for measure, stat in by_interface.columns]
by_interface = by_interface.round(2)
by_interface

In [None]:
# Short numeric summary highlighting A vs B differences for key scales
scales_for_text = ["SUS_score", "NASA_TLX", "UES_engagement", "IMI_autonomy", "IMI_competence", "intention_reuse"]

summary_lines = []
for col in scales_for_text:
    means = df_q.groupby("interface_version")[col].mean()
    a_mean = means.get("A", float('nan'))
    b_mean = means.get("B", float('nan'))
    diff = b_mean - a_mean
    direction = "higher" if diff > 0 else "lower"
    summary_lines.append(
        f"{col}: A = {a_mean:.2f}, B = {b_mean:.2f} (B is {abs(diff):.2f} points {direction} than A)."
    )

print("Short numeric summary (A vs B):\n")
for line in summary_lines:
    print("- " + line)

In [None]:
# Clean visualizations: bar plots (Means + 95% CI) for A vs B
plot_scales = ["SUS_score", "NASA_TLX", "UES_engagement", "IMI_autonomy"]
n_plots = len(plot_scales)
fig, axes = plt.subplots(1, n_plots, figsize=(4 * n_plots, 4), sharey=False)

if n_plots == 1:
    axes = [axes]

for ax, col in zip(axes, plot_scales):
    sns.barplot(
        data=df_q,
        x="interface_version",
        y=col,
        errorbar="ci",
        palette="Set2",
        ax=ax
    )
    ax.set_title(col.replace("_", " "))
    ax.set_xlabel("Interface version")
    ax.set_ylabel("Score")

fig.suptitle("Questionnaire scores by interface (A vs B)", y=1.05)
fig.tight_layout()
plt.show()