# Start 

We begin with a leave-one-out participant to get for every participant a number of wrong selections and how many it would be with the help of TCNAE.

In [1]:
# save folder
import os
import utils

save_folder = utils.create_unique_folder('Results/PowerMeasurements')

In [2]:
import utils
from TCN import *
import torch
import numpy as np
import json
import train

model_folder = "Results/LOSO_a200_b300_f44_seq43_finalRounds/Models"
model_base_name = "TCNAE_1"
conditions = ["gaze", "headAndGaze", "nod"]

fps = 90
f = 44
b = 300
a = 200

for cond in conditions:
    print(f"Condition: {cond}")
    model_name = f"{model_base_name}_{cond}"
    with open(os.path.join(model_folder, model_name + "_info.json"), 'r') as file:
        model_info = json.load(file)

    angles_correct = np.load(f"../Data/Dataset_Prepare/angles_fps{fps}_{cond}_Correct_f{f}_b{b}_a{a}_finalRounds.npy")
    angles_incorrect = np.load(f"../Data/Dataset_Prepare/angles_fps{fps}_{cond}_Incorrect_f{f}_b{b}_a{a}_finalRounds.npy")
    names_correct = np.load(f"../Data/Dataset_Prepare/names_fps{fps}_{cond}_Correct_f{f}_b{b}_a{a}_finalRounds.npy")
    names_incorrect = np.load(f"../Data/Dataset_Prepare/names_fps{fps}_{cond}_Incorrect_f{f}_b{b}_a{a}_finalRounds.npy")

    
    pat_names = np.unique(names_correct)
    save_folder_condition = utils.create_unique_folder(os.path.join(save_folder, cond))
    for k, name in enumerate(pat_names):
        train_fold = angles_correct[names_correct != name]
        test_fold_correct = angles_correct[names_correct == name]
        test_fold_incorrect = angles_incorrect[names_incorrect == name]

        model = TCNAE(**model_info["model_parameter"])
        model, _ = train.train_autoencoder(
            model=model,
            train_data=train_fold,
            batch_size=model_info["train_parameter"]["batch_size"],
            num_epochs=model_info["train_parameter"]["num_epochs"],
            learning_rate=model_info["train_parameter"]["learning_rate"],
            criterion=torch.nn.MSELoss(),
            use_gpu=model_info["train_parameter"]["use_gpu"],
            desc_tqdm=f"Participant {k+1}/{len(pat_names)} {cond}"
        )

        mse_train, mse_correct, mse_incorrect = train.test_autoencoder(
            train_samples=train_fold,
            correct_samples=test_fold_correct,
            incorrect_samples=test_fold_incorrect,
            model=model,
            use_gpu=model_info["train_parameter"]["use_gpu"],
            batch_size=model_info["train_parameter"]["batch_size"]
        )

        t = np.percentile(mse_train.cpu().numpy(), model_info["th_perc"])

        results = {
            "participant": name,
            "mse_correct": mse_correct.cpu().numpy().tolist(),
            "mse_incorrect": mse_incorrect.cpu().numpy().tolist() if mse_incorrect is not None else [],
            "threshold": t
        }
        with open(os.path.join(save_folder_condition, f"Results_{name}.json"), 'w') as file:
            json.dump(results, file, indent=4)

    

Condition: gaze


  param_schemas = callee.param_schemas()
  param_schemas = callee.param_schemas()
Participant 1/52 gaze: 100%|██████████| 400/400 [00:27<00:00, 14.39it/s, Loss=0.0406]
Participant 2/52 gaze: 100%|██████████| 400/400 [00:20<00:00, 19.13it/s, Loss=0.0404]
Participant 3/52 gaze: 100%|██████████| 400/400 [00:18<00:00, 21.69it/s, Loss=0.052] 
Participant 4/52 gaze: 100%|██████████| 400/400 [00:18<00:00, 22.11it/s, Loss=0.0567]
Participant 5/52 gaze: 100%|██████████| 400/400 [00:17<00:00, 22.66it/s, Loss=0.0515]
Participant 6/52 gaze: 100%|██████████| 400/400 [00:18<00:00, 22.04it/s, Loss=0.0592]
Participant 7/52 gaze: 100%|██████████| 400/400 [00:17<00:00, 22.22it/s, Loss=0.0516]
Participant 8/52 gaze: 100%|██████████| 400/400 [00:17<00:00, 22.23it/s, Loss=0.043] 
Participant 9/52 gaze: 100%|██████████| 400/400 [00:18<00:00, 22.02it/s, Loss=0.0608]
Participant 10/52 gaze: 100%|██████████| 400/400 [00:17<00:00, 22.29it/s, Loss=0.0438]
Participant 11/52 gaze: 100%|██████████| 400/400 [00:19<0

Condition: headAndGaze


Participant 1/52 headAndGaze: 100%|██████████| 400/400 [00:19<00:00, 20.11it/s, Loss=0.0751]
Participant 2/52 headAndGaze: 100%|██████████| 400/400 [00:18<00:00, 21.29it/s, Loss=0.0973]
Participant 3/52 headAndGaze: 100%|██████████| 400/400 [00:20<00:00, 19.30it/s, Loss=0.0828]
Participant 4/52 headAndGaze: 100%|██████████| 400/400 [00:17<00:00, 22.46it/s, Loss=0.0775]
Participant 5/52 headAndGaze: 100%|██████████| 400/400 [00:17<00:00, 23.17it/s, Loss=0.0659]
Participant 6/52 headAndGaze: 100%|██████████| 400/400 [00:20<00:00, 19.46it/s, Loss=0.0771]
Participant 7/52 headAndGaze: 100%|██████████| 400/400 [00:18<00:00, 21.80it/s, Loss=0.0904]
Participant 8/52 headAndGaze: 100%|██████████| 400/400 [00:18<00:00, 21.40it/s, Loss=0.0742]
Participant 9/52 headAndGaze: 100%|██████████| 400/400 [00:20<00:00, 19.77it/s, Loss=0.0801]
Participant 10/52 headAndGaze: 100%|██████████| 400/400 [00:18<00:00, 21.07it/s, Loss=0.0786]
Participant 11/52 headAndGaze: 100%|██████████| 400/400 [00:19<00:00,

Condition: nod


Participant 1/52 nod: 100%|██████████| 400/400 [00:17<00:00, 22.64it/s, Loss=0.273]
Participant 2/52 nod: 100%|██████████| 400/400 [00:17<00:00, 23.08it/s, Loss=0.312]
Participant 3/52 nod: 100%|██████████| 400/400 [00:17<00:00, 22.50it/s, Loss=0.23] 
Participant 4/52 nod: 100%|██████████| 400/400 [00:17<00:00, 22.45it/s, Loss=0.228]
Participant 5/52 nod: 100%|██████████| 400/400 [00:17<00:00, 22.79it/s, Loss=0.267]
Participant 6/52 nod: 100%|██████████| 400/400 [00:17<00:00, 22.59it/s, Loss=0.229]
Participant 7/52 nod: 100%|██████████| 400/400 [00:17<00:00, 22.43it/s, Loss=0.228]
Participant 8/52 nod: 100%|██████████| 400/400 [00:17<00:00, 22.58it/s, Loss=0.23] 
Participant 9/52 nod: 100%|██████████| 400/400 [00:17<00:00, 23.10it/s, Loss=0.227]
Participant 10/52 nod: 100%|██████████| 400/400 [00:17<00:00, 23.18it/s, Loss=0.258]
Participant 11/52 nod: 100%|██████████| 400/400 [00:17<00:00, 22.73it/s, Loss=0.237]
Participant 12/52 nod: 100%|██████████| 400/400 [00:17<00:00, 22.53it/s, L

# Check errors and estimate effect size

In [3]:
import os
import json
import numpy as np


datapath = 'Results/PowerMeasurements'

conditions = ['gaze', 'headAndGaze', 'nod']
corrects = {x: {'with': [], 'without': []} for x in conditions}
incorrects = {x: {'with': [], 'without': []} for x in conditions}

# cond = conditions[0]
for cond in conditions:
    datapath_condition = os.path.join(datapath, cond)

    files = os.listdir(datapath_condition)

    file = files[2]
    for file in files:

        with open(os.path.join(datapath_condition, file), 'r') as f:
            data = json.load(f)

        incorrect_without = len(data['mse_incorrect'])
        incorrect_with = np.sum(np.array(data['mse_incorrect']) < data['threshold']) # less, because that is the number of incorrect selections even with the system, so the mse is smaller then the threshold even it is a incorrect selection

        incorrects[cond]['with'].append(incorrect_with)
        incorrects[cond]['without'].append(incorrect_without)

        corrects_without = len(data['mse_correct'])
        corrects_with = np.sum(np.array(data['mse_correct']) <= data['threshold'])

        corrects[cond]['with'].append(corrects_with)
        corrects[cond]['without'].append(corrects_without)

print(incorrects)


{'gaze': {'with': [0, 0, 1, 0, 2, 1, 0, 0, 0, 1, 1, 1, 0, 0, 2, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], 'without': [0, 0, 5, 1, 2, 1, 0, 5, 0, 3, 11, 4, 3, 1, 6, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 7, 3, 10, 0, 5, 2, 3, 2, 1, 0, 0, 4, 0, 0, 1, 3, 0, 2, 1, 0, 0, 2, 0, 0, 9, 0, 0]}, 'headAndGaze': {'with': [0, 0, 0, 0, 1, 0, 0, 1, 0, 2, 1, 0, 3, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'without': [0, 1, 0, 0, 6, 0, 0, 2, 1, 2, 2, 0, 3, 0, 1, 0, 0, 2, 0, 1, 3, 0, 0, 0, 1, 6, 0, 0, 1, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 0, 12, 0, 0]}, 'nod': {'with': [0, 3, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 2, 0, 0, 1, 0, 2, 0, 3, 4, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 2, 0, 0], 'without': [0, 3, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 3, 4, 0, 1, 0, 0, 0, 0, 1, 4, 7, 0, 4, 2, 0, 2, 0, 7, 3, 3, 8, 1,

In [4]:
cond = conditions[0]

calc_d = lambda a, b, c, d: (np.array(a) - np.array(b)) + (np.array(c) - np.array(d))

d = calc_d(corrects[cond]['with'], corrects[cond]['without'], incorrects[cond]['without'], incorrects[cond]['with'])
d

# np.array(corrects[cond]['without']) - np.array(corrects[cond]['with'])

array([ -1,  -1,  -6, -10,  -1,  -9, -15,   0,  -3,  -2,   6,  -1,   2,
         1,   3,   1,  -9,   0,  -2,  -8,  -4,  -9,   0,  -4, -10,  -6,
         2,   2,  -3,  -4,   0,   1,  -2,  -1,  -6,  -2,   1,  -2, -17,
        -4,   2,  -1,  -8,  -1, -24,  -3,  -4,   0,  -3,   2,  -2, -33])

In [5]:
import numpy as np
from scipy.stats import ttest_ind, ttest_rel, wilcoxon
conditions = ["gaze", "headAndGaze", "nod"]
# cond = conditions[0]
for cond in conditions:
    print(f"Condition: {cond}")
    # Example data for independent samples
    x = np.array(incorrects[cond]['with'])
    y = np.array(incorrects[cond]['without'])

    # Perform one-sided t-test (x < y)
    # t_stat, p_value = ttest_rel(x, y, alternative='less')
    t_stat, p_value = wilcoxon(x, y, alternative='less')

    print(f"T-statistic: {t_stat:.3f}")
    print(f"P-value: {p_value:.3f}")

    if p_value < 0.05:
        print("Reject the null hypothesis: x is significantly less than y")
    else:
        print("Fail to reject the null hypothesis: x is not significantly less than y")
    
    print()

Condition: gaze
T-statistic: 0.000
P-value: 0.000
Reject the null hypothesis: x is significantly less than y

Condition: headAndGaze
T-statistic: 0.000
P-value: 0.000
Reject the null hypothesis: x is significantly less than y

Condition: nod
T-statistic: 0.000
P-value: 0.000
Reject the null hypothesis: x is significantly less than y



In [6]:
p_value

3.145332252154177e-05

In [7]:
# Estimate effectsize for power analysis
import numpy as np
import pandas as pd
from scipy import stats
from statsmodels.stats.power import TTestIndPower

for cond in conditions:
    print(f"Condition: {cond}")
    # Example data
    group1 = np.array(incorrects[cond]['without'])
    group2 = np.array(incorrects[cond]['with'])

    # Calculate means and standard deviations
    mean1 = np.mean(group1)
    mean2 = np.mean(group2)
    std1 = np.std(group1, ddof=1)
    std2 = np.std(group2, ddof=1)

    # Calculate pooled standard deviation
    n1 = len(group1)
    n2 = len(group2)
    pooled_std = np.sqrt(((n1 - 1) * std1**2 + (n2 - 1) * std2**2) / (n1 + n2 - 2))

    # Calculate Cohen's d
    cohen_d = (mean1 - mean2) / pooled_std
    print(f"Mean incorrect with: {mean2:.3f}")
    print(f"Mean incorrect without: {mean1:.3f}")
    print(f"Cohen's d: {cohen_d:.3f}")

    # Perform power analysis
    alpha = 0.05  # significance level
    power = 0.80  # desired power
    analysis = TTestIndPower()
    sample_size = analysis.solve_power(effect_size=cohen_d, alpha=alpha, power=power, alternative='larger')
    print(f"Required sample size per group: {sample_size:.0f}")
    print()

Condition: gaze
Mean incorrect with: 0.288
Mean incorrect without: 1.923
Cohen's d: 0.836
Required sample size per group: 18

Condition: headAndGaze
Mean incorrect with: 0.269
Mean incorrect without: 1.115
Cohen's d: 0.486
Required sample size per group: 53

Condition: nod
Mean incorrect with: 0.558
Mean incorrect without: 1.308
Cohen's d: 0.489
Required sample size per group: 52



In [8]:
sample_size

52.392351556853164

In [9]:
for cond in conditions:
    x = np.array(incorrects[cond]['with'])
    y = np.array(incorrects[cond]['without'])
    print(x)
    print(y)

[0 0 1 0 2 1 0 0 0 1 1 1 0 0 2 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 1 0 0 0 0
 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0]
[ 0  0  5  1  2  1  0  5  0  3 11  4  3  1  6  1  1  0  0  0  0  0  0  1
  0  7  3 10  0  5  2  3  2  1  0  0  4  0  0  1  3  0  2  1  0  0  2  0
  0  9  0  0]
[0 0 0 0 1 0 0 1 0 2 1 0 3 0 0 0 0 1 0 0 2 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0]
[ 0  1  0  0  6  0  0  2  1  2  2  0  3  0  1  0  0  2  0  1  3  0  0  0
  1  6  0  0  1  0  0  0  0  9  0  0  0  0  0  0  4  0  0  0  1  0  0  0
  0 12  0  0]
[0 3 0 0 1 0 0 0 0 0 1 0 0 0 0 0 2 2 0 0 0 0 0 0 0 1 2 0 2 0 0 1 0 2 0 3 4
 0 0 1 0 0 0 0 0 1 0 1 0 2 0 0]
[0 3 0 1 1 1 0 1 0 0 1 0 0 0 0 0 3 4 0 1 0 0 0 0 1 4 7 0 4 2 0 2 0 7 3 3 8
 1 0 2 0 1 0 0 0 2 0 1 0 2 0 2]


In [10]:
d

array([ -1,  -1,  -6, -10,  -1,  -9, -15,   0,  -3,  -2,   6,  -1,   2,
         1,   3,   1,  -9,   0,  -2,  -8,  -4,  -9,   0,  -4, -10,  -6,
         2,   2,  -3,  -4,   0,   1,  -2,  -1,  -6,  -2,   1,  -2, -17,
        -4,   2,  -1,  -8,  -1, -24,  -3,  -4,   0,  -3,   2,  -2, -33])

In [11]:
import numpy as np
from scipy.stats import wilcoxon, norm
from statsmodels.stats.power import NormalIndPower


for cond in conditions:
    print(f"Condition: {cond}")

    # Example data for paired samples
    x = np.array(incorrects[cond]['with'])
    y = np.array(incorrects[cond]['without'])

    # Perform one-sided Wilcoxon signed-rank test (x < y)
    # d = x - y
    # stat, p_value = wilcoxon(d, alternative='less')
    stat, p_value = wilcoxon(x, y, alternative='less')

    print(f"Wilcoxon statistic: {stat:.3f}")
    print(f"P-value: {p_value:.3f}")

    if p_value < 0.05:
        print("Reject the null hypothesis: x is significantly less than y")
    else:
        print("Fail to reject the null hypothesis: x is not significantly less than y")

    # Calculate effect size (rank-biserial correlation)
    n = len(x)
    rank_biserial = (stat - (n * (n + 1) / 4)) / np.sqrt((n * (n + 1) * (2 * n + 1)) / 24)
    print(f"Rank-biserial correlation: {rank_biserial:.3f}")

    # Calculate standardized test statistic (r)
    z = (stat - (n * (n + 1) / 4)) / np.sqrt((n * (n + 1) * (2 * n + 1)) / 24)
    r = z / np.sqrt(n)
    print(f"Standardized test statistic (r): {r:.3f}")

    # Perform power analysis to estimate required sample size
    alpha = 0.05  # significance level
    power = 0.80   # desired power
    effect_size = np.abs(r)  # use rank-biserial correlation as effect size

    analysis = NormalIndPower()
    sample_size = analysis.solve_power(effect_size=effect_size, alpha=alpha, power=power, nobs1=None, ratio=1.0, alternative='larger')
    print(f"Required sample size per group: {sample_size:.0f}")

    # Compare power for a fixed sample size
    fixed_sample_size = 50
    power_estimate = analysis.solve_power(effect_size=effect_size, alpha=alpha, nobs1=fixed_sample_size, alternative='larger')
    print(f"Power for sample size {fixed_sample_size}: {power_estimate:.3f}")

    print()

Condition: gaze
Wilcoxon statistic: 0.000
P-value: 0.000
Reject the null hypothesis: x is significantly less than y
Rank-biserial correlation: -6.275
Standardized test statistic (r): -0.870
Required sample size per group: 16
Power for sample size 50: 0.997

Condition: headAndGaze
Wilcoxon statistic: 0.000
P-value: 0.000
Reject the null hypothesis: x is significantly less than y
Rank-biserial correlation: -6.275
Standardized test statistic (r): -0.870
Required sample size per group: 16
Power for sample size 50: 0.997

Condition: nod
Wilcoxon statistic: 0.000
P-value: 0.000
Reject the null hypothesis: x is significantly less than y
Rank-biserial correlation: -6.275
Standardized test statistic (r): -0.870
Required sample size per group: 16
Power for sample size 50: 0.997



In [32]:
import statsmodels.stats.power as smp

cond = conditions[0]

for cond in conditions:
    print(f"Condition: {cond}")

    errors_with = np.array(incorrects[cond]['with'])
    errors_without = np.array(incorrects[cond]['without'])

    not_zero_errors_idx = np.where(errors_without != 0)
    errors_with = errors_with[not_zero_errors_idx]
    errors_without = errors_without[not_zero_errors_idx]

    diff = errors_without - errors_with

    r = np.median(diff) / np.std(diff)
    print(f"Median difference: {np.median(diff):.3f}")
    print(f"Effect size: {r:.3f}")

    # Define parameters
    effect_size = r    
    alpha = 0.05       
    power = 0.95        
    alternative = 'larger'  # Because we expect fewer incorrect selections

    # Compute sample size
    for power in [0.8, 0.9, 0.95]:
        sample_size = smp.TTestIndPower().solve_power(effect_size, alpha=alpha, power=power, alternative=alternative)

        sample_size = int(round(sample_size))
        print(f"Required sample size for power {power:.2f}: {sample_size}")
    print()
    print()

Condition: gaze
Median difference: 2.000
Effect size: 0.707
Required sample size for power 0.80: 25
Required sample size for power 0.90: 35
Required sample size for power 0.95: 44


Condition: headAndGaze
Median difference: 1.000
Effect size: 0.311
Required sample size for power 0.80: 129
Required sample size for power 0.90: 178
Required sample size for power 0.95: 225


Condition: nod
Median difference: 1.000
Effect size: 0.704
Required sample size for power 0.80: 26
Required sample size for power 0.90: 35
Required sample size for power 0.95: 44




In [31]:
errors_with = np.array(incorrects[cond]['with'])
errors_without = np.array(incorrects[cond]['without'])
print(errors_with)
print(errors_without)

diff = errors_without - errors_with
print(diff)

[0 3 0 0 1 0 0 0 0 0 1 0 0 0 0 0 2 2 0 0 0 0 0 0 0 1 2 0 2 0 0 1 0 2 0 3 4
 0 0 1 0 0 0 0 0 1 0 1 0 2 0 0]
[0 3 0 1 1 1 0 1 0 0 1 0 0 0 0 0 3 4 0 1 0 0 0 0 1 4 7 0 4 2 0 2 0 7 3 3 8
 1 0 2 0 1 0 0 0 2 0 1 0 2 0 2]
[0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 1 2 0 1 0 0 0 0 1 3 5 0 2 2 0 1 0 5 3 0 4
 1 0 1 0 1 0 0 0 1 0 0 0 0 0 2]
