In [1]:
import os
import json
import numpy as np
import pandas as pd
from scipy import stats
import statsmodels.api as sm
import pingouin as pg

In [2]:
METRIC_ALTERNATIVE_DICT = {
    "Total Time": "less",
    "Consistency": "greater",
    "Cognitive Load": "less",
    "Confidence": "greater",
    "C1": "greater",
    "C1.1": "greater",
    "C1.2": "greater",
    "C1.3": "greater",
    "C2": "less",
    "C2.1": "less",
    "C2.2": "greater",
    "C2.3": "greater",
    "C3.1": "greater",
    "C3.2": "less",
    "C3.3": "greater",
    "C3.4": "greater",
    "C3.5": "greater"
}

METRIC_COLUMNS = [
    "Total Time"
]

In [3]:
baseline_df = pd.DataFrame(columns=METRIC_COLUMNS, index=pd.Series([f"U{i:02d}" for i in range(1, 42, 2)], name="User ID"))
baseline_df.loc[:, METRIC_COLUMNS] = pd.read_csv("time_analysis/time/baseline_time_stats.csv").set_index("user").loc[:, ["sum"]].values

In [4]:
baseline_df.to_csv("time_analysis/baseline.csv")

In [5]:
experiment_df = pd.DataFrame(columns=METRIC_COLUMNS, index=pd.Series([f"U{i:02d}" for i in range(2, 43, 2)], name="User ID"))
experiment_df.loc[:, METRIC_COLUMNS] = pd.read_csv("time_analysis/time/experiment_time_stats.csv").set_index("user").loc[:, ["sum"]].values

In [6]:
experiment_df.to_csv("time_analysis/experiment.csv")

In [7]:
# t-test assumptions

metrics = METRIC_COLUMNS
for key in metrics:
    sig = False
    y = baseline_df.loc[:, key].astype(float).values
    x = experiment_df.loc[:, key].astype(float).values
    # 1. normality
    print()
    print(key)
    print()
    print("normality")
    print(stats.shapiro(x))
    print(stats.kstest(x, stats.norm(loc=np.mean(x), scale=np.std(x)).cdf))
    print(stats.shapiro(y))
    print(stats.kstest(y, stats.norm(loc=np.mean(y), scale=np.std(y)).cdf))
    print()
    # 2. homogeneity of variance
    print("homogeneity of variance")
    print(stats.levene(x, y))
    print(stats.bartlett(x, y))
    print()


Total Time

normality
ShapiroResult(statistic=0.9385712742805481, pvalue=0.20415765047073364)
KstestResult(statistic=0.1580516519430466, pvalue=0.6148846234460839, statistic_location=1890.245, statistic_sign=1)
ShapiroResult(statistic=0.9131842851638794, pvalue=0.06348296254873276)
KstestResult(statistic=0.15453214007436983, pvalue=0.6423118249041351, statistic_location=1087.536, statistic_sign=1)

homogeneity of variance
LeveneResult(statistic=0.9482251468017145, pvalue=0.33602375683725305)
BartlettResult(statistic=2.173988148966523, pvalue=0.1403618076328265)



In [8]:
result_df = pd.DataFrame(
    index=METRIC_COLUMNS,
    # T  dof alternative  p-val         CI95%  cohen-d   BF10  power significant
    columns=[
        "T",
        "dof",
        "alternative",
        "p-val",
        "CI95%",
        "cohen-d",
        "BF10",
        "power",
        "significant",
    ],
)
metrics = METRIC_COLUMNS
for key in metrics:
    sig = False
    y = baseline_df.loc[:, key].values.tolist()
    x = experiment_df.loc[:, key].values.tolist()
    result = pg.ttest(
        x, y, paired=False, alternative=METRIC_ALTERNATIVE_DICT[key], correction=True
    )
    sig = result["p-val"].values[0] < 0.05
    print(key, result)
    result_df.loc[key, :] = result.values[0].tolist() + [sig]
    if not sig:
        result = pg.ttest(
            x,
            y,
            paired=False,
            alternative="less" if METRIC_ALTERNATIVE_DICT[key] == "greater" else "greater",
            correction=True,
        )
        result_df.loc[key, :] = result.values[0].tolist() + [
            result["p-val"].values[0] < 0.05
        ]
        print(key, result)

Total Time                T        dof alternative     p-val          CI95%   cohen-d  \
T-test  3.127275  36.184874        less  0.998263  [-inf, 810.9]  0.965098   

         BF10     power  
T-test  0.042  0.000001  
Total Time                T        dof alternative     p-val          CI95%   cohen-d  \
T-test  3.127275  36.184874     greater  0.001737  [242.36, inf]  0.965098   

          BF10     power  
T-test  23.716  0.923476  


  return np.clip(_boost._nct_cdf(x, df, nc), 0, 1)
  return np.clip(_boost._nct_sf(x, df, nc), 0, 1)


In [9]:
result_df

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power,significant
Total Time,3.127275,36.184874,greater,0.001737,"[242.36, inf]",0.965098,23.716,0.923476,True


In [10]:
if os.path.exists("result/metric_alternative.json"):
    with open("result/metric_alternative.json", "r") as f:
        METRIC_ALTERNATIVE_DICT_FILE = json.load(f)
if not os.path.exists("result/metric_alternative.json") or METRIC_ALTERNATIVE_DICT != METRIC_ALTERNATIVE_DICT_FILE:
    with open("result/metric_alternative.json", "w") as f:
        json.dump(METRIC_ALTERNATIVE_DICT, f, indent=4)

In [11]:
result_df.to_csv("result/t-test.csv")

In [12]:
sig_result = result_df[result_df['significant'] == True]

In [13]:
sig_result

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power,significant
Total Time,3.127275,36.184874,greater,0.001737,"[242.36, inf]",0.965098,23.716,0.923476,True
