In [5]:
import pandas as pd
from pathlib import Path

BASE = Path("..")  # parent folder: Understanding User Behavior

logs = pd.read_csv(BASE / "logs.csv")
sessions = pd.read_csv(BASE / "sessions.csv")
questionnaires = pd.read_csv(BASE / "questionnaires.csv")

In [6]:
# Merge questionnaires (per user) with sessions (per session) to add interface_version
# We keep one row per user in this simple version.

sessions_unique = sessions[["user_id", "interface_version"]].drop_duplicates()

df_q = questionnaires.merge(
    sessions_unique,
    on="user_id",
    how="left"
)

df_q.head()

Unnamed: 0,user_id,SUS_score,NASA_TLX,UES_engagement,IMI_autonomy,IMI_competence,intention_reuse,interface_version
0,1,66.07639,41.211478,4.832314,6.001214,4.962365,4.087211,A
1,1,66.07639,41.211478,4.832314,6.001214,4.962365,4.087211,B
2,2,71.015473,26.283252,3.751513,4.391925,6.103302,4.312298,A
3,2,71.015473,26.283252,3.751513,4.391925,6.103302,4.312298,B
4,3,67.652882,69.48617,6.051152,2.761875,5.114228,4.985865,B


In [7]:
# Overall descriptive statistics
psych_cols = ["SUS_score", "NASA_TLX", "UES_engagement",
              "IMI_autonomy", "IMI_competence", "intention_reuse"]

df_q[psych_cols].describe()  # mean, std, min, quartiles, max

# Grouped by interface version
df_q.groupby("interface_version")[psych_cols].agg(["mean", "std", "min", "max"])

Unnamed: 0_level_0,SUS_score,SUS_score,SUS_score,SUS_score,NASA_TLX,NASA_TLX,NASA_TLX,NASA_TLX,UES_engagement,UES_engagement,...,IMI_autonomy,IMI_autonomy,IMI_competence,IMI_competence,IMI_competence,IMI_competence,intention_reuse,intention_reuse,intention_reuse,intention_reuse
Unnamed: 0_level_1,mean,std,min,max,mean,std,min,max,mean,std,...,min,max,mean,std,min,max,mean,std,min,max
interface_version,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
A,68.561829,10.018537,35.587327,95.0,45.907796,13.53234,13.141564,77.847044,4.433699,0.993034,...,2.103113,7.0,5.047417,0.904166,3.159126,7.0,4.87313,0.948802,2.276121,7.0
B,68.274227,9.991502,35.587327,95.0,46.179705,13.463582,13.141564,77.847044,4.417067,1.003302,...,2.103113,7.0,5.049131,0.914793,3.159126,7.0,4.876591,0.959273,2.276121,7.0


In [None]:
import matplotlib.pyplot as plt

psych_cols = [
    "SUS_score",
    "NASA_TLX",
    "IMI_autonomy",
    "IMI_competence",
    "UES_engagement",
    "intention_reuse",
]

for col in psych_cols:
    stats = (
        df_q.groupby("interface_version")[col]
        .agg(["mean", "std"])
        .reindex(["A", "B"])
    )
    plt.figure(figsize=(4, 4))
    plt.bar(stats.index, stats["mean"], yerr=stats["std"], capsize=5,
            color=["#4C72B0", "#55A868"])
    plt.ylabel(col)
    plt.xlabel("Interface version")
    plt.title(f"{col} by interface version (mean ± SD)")
    plt.tight_layout()
    plt.show()