In [10]:
import os
import re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib

matplotlib.use("Qt5Agg")

study = "data/adhd"
subject_id = 'P5'
subject = os.path.join(study, subject_id)

data_standard = pd.DataFrame()
data_game = pd.DataFrame()


for subject in os.scandir(study):
    if subject.is_dir():
        for group in os.scandir(subject):
            if group.is_dir():
                for task in os.scandir(group.path):
                    if task.is_dir():
                        if re.match("t[1-3]_yellow*", task.name) or re.match("t[1-3]_green*", task.name):
                            for file in os.scandir(task.path):
                                if re.match("output_PerformanceMetrics_score*", file.name):
                                    data = pd.read_csv(file.path)
                                    data['subject'] = subject.name
                                    data['task'] = task.name
                                    data['group'] = group.name
                                    data['session'] = [str(score) + '_class' for score in data['n_classes']]
                                    if re.match("standard", group.name):
                                        data_standard = pd.concat([data_standard, data])
                                    elif re.match("gamified", group.name):
                                        data_game = pd.concat([data_game, data])
                                        
data_standard

Unnamed: 0,start_ts,stop_ts,n_classes,corr_classified,miss_classified,itr,accuracy,elapsed_time_s,subject,task,group,session
0,17-5-2024 12:35:18,17-5-2024 12:35:46,2,8,0,17.1,1.0,28.0,P10,t1_yellow,standard,2_class
0,17-5-2024 12:37:08,17-5-2024 12:37:37,4,8,3,12.0,0.727,29.0,P10,t2_green,standard,4_class
0,17-5-2024 12:41:38,17-5-2024 12:42:16,8,8,3,17.3,0.727,38.6,P10,t3_green,standard,8_class
0,17-5-2024 14:43:13,17-5-2024 14:43:39,2,8,1,9.02,0.889,26.4,P11,t1_green,standard,2_class
0,17-5-2024 14:46:34,17-5-2024 14:47:27,4,8,2,8.7,0.8,53.0,P11,t2_green,standard,4_class
0,17-5-2024 14:49:25,17-5-2024 14:49:59,8,8,2,24.4,0.8,33.8,P11,t3_green,standard,8_class
0,15-5-2024 14:29:14,15-5-2024 14:30:36,4,8,1,7.74,0.889,81.9,P5,t2_yellow,standard,4_class
0,15-5-2024 16:08:29,15-5-2024 16:09:26,2,8,1,4.22,0.889,56.5,P6,t1_yellow,standard,2_class
0,15-5-2024 16:10:45,15-5-2024 16:12:01,4,8,7,1.65,0.533,76.6,P6,t2_green,standard,4_class
0,15-5-2024 16:15:44,15-5-2024 16:16:56,8,8,1,14.5,0.889,72.2,P6,t3_green,standard,8_class


## Create tables for statistical analysis

In [46]:
# Summary table for standard tasks
table = pd.DataFrame(columns=['subject', 'condition', 'accuracy', 'classifier', 'elapsed_time', 'group'])
table['subject'] = data_game['subject']
table['condition'] = data_game['session']
table['classifier'] = data_game['task']
table['accuracy'] = data_game['accuracy']
table['elapsed_time'] = data_game['elapsed_time_s']
table['group'] = data_game['group']

table2 = pd.DataFrame(columns=['subject', 'condition', 'accuracy', 'classifier', 'elapsed_time', 'group'])   
table2['subject'] = data_standard['subject']
table2['condition'] = data_standard['session']
table2['classifier'] = data_standard['task']
table2['accuracy'] = data_standard['accuracy']
table2['elapsed_time'] = data_standard['elapsed_time_s']
table2['group'] = data_standard['group']

complete_table = pd.concat([table, table2]).reset_index(drop=True)

complete_table.to_csv('data/adhd/gamified_validation_table.csv', index=False)
complete_table

Unnamed: 0,subject,condition,accuracy,classifier,elapsed_time,group
0,P10,8_class,0.889,t3_green,45.1,gamified
1,P11,2_class,1.0,t1_green,23.7,gamified
2,P11,4_class,0.8,t2_yellow,31.0,gamified
3,P11,8_class,0.667,t3_green,39.6,gamified
4,P5,2_class,0.889,t1_yellow,26.1,gamified
5,P5,4_class,0.8,t2_green,111.0,gamified
6,P5,8_class,0.667,t3_yellow,150.0,gamified
7,P6,2_class,1.0,t1_yellow,42.5,gamified
8,P6,8_class,0.444,t3_green,110.0,gamified
9,P7,2_class,1.0,t1_green,27.5,gamified


In [80]:
# Summary table for classifier quality
# Count how many green/yellow classifier for each task
dataset = {'standard': data_standard, 'game': data_game}
classifiers = pd.DataFrame(columns=['group', 'task', 'green', 'yellow'])
groups = ['standard', 'game']
tasks = ['t1', 't2', 't3']
for group in groups:
    group_data = dataset[group]
    for task in tasks:
        green_count = len(group_data[(group_data['task'] == task + '_green')])
        yellow_count = len(group_data[(group_data['task'] == task + '_yellow')])
        row = pd.DataFrame([[group, task, green_count, yellow_count]], columns=['group', 'task', 'green', 'yellow'])
        classifiers = pd.concat([classifiers, row])
classifiers.to_csv('data/adhd/classifier_quality.csv', index=False)
classifiers

Unnamed: 0,group,task,green,yellow
0,standard,t1,2,4
0,standard,t2,5,1
0,standard,t3,4,1
0,game,t1,3,3
0,game,t2,2,2
0,game,t3,4,2


## Plotting the accuracy for each group

In [43]:
dataset = {'standard': data_standard, 'game': data_game}
group = dataset['standard']
plt.rcParams['figure.figsize'] = [10, 5]
fig, (ax1, ax2) = plt.subplots(1, 2, gridspec_kw={'width_ratios': [5, 5]})
classes = [2, 4, 6]
metric = 'accuracy'
t1 = group[group['n_classes'] == 2]
t2 = group[group['n_classes'] == 4]
t3 = group[group['n_classes'] == 8]


measures = [np.mean(t1[metric]), np.mean(t2[metric]), np.mean(t3[metric])]
stds = [np.std(t1[metric]), np.std(t2[metric]), np.std(t3[metric])]

for mean in range(len(measures)):
    print(f"Mean accuracy for {classes[mean]} classes: {measures[mean]:.2f} ± {stds[mean]:.2f}")

# clam stds values to never exceed accuracy of 100%
cstds = [min(std, 1 - mean) for mean, std in zip(measures, stds)]


colors = ['red', 'orange', 'pink']  # Define colors for each bar
labels = ['2 Classes', '4 Classes', '8 classes']  # Define labels for each bar

# Plot bars for each task
for i in range(len(classes)):
    ax1.bar(classes[i], measures[i], yerr=cstds[i], color=colors[i], alpha=0.95, label=labels[i], width=2, capsize=5)
ax1.errorbar(classes, measures, yerr=cstds, marker='x', color='black', capsize=5)

ax1.set_title('Average task ' + metric + ' (Standard)')
ax1.set_xlabel('N classes', fontsize=16)
ax1.set_ylabel('Task ' + metric, fontsize=16)
ax1.set_ylim(0.2, 1)
ax1.set_yticks([0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0])
ax1.set_xticks([2, 4, 8])
ax1.legend(loc='lower right', bbox_to_anchor=(1, 0))


group = dataset['game']
metric = 'accuracy'
t1 = group[group['n_classes'] == 2]
t2 = group[group['n_classes'] == 4]
t3 = group[group['n_classes'] == 8]


measures = [np.mean(t1[metric]), np.mean(t2[metric]), np.mean(t3[metric])]
stds = [np.std(t1[metric]), np.std(t2[metric]), np.std(t3[metric])]

for mean in range(len(measures)):
    print(f"Mean accuracy for {classes[mean]} classes: {measures[mean]:.2f} ± {stds[mean]:.2f}")

# clam stds values to never exceed accuracy of 100%
cstds = [min(std, 1 - mean) for mean, std in zip(measures, stds)]


colors = ['red', 'orange', 'pink']  # Define colors for each bar
labels = ['2 Classes', '4 Classes', '8 classes']  # Define labels for each bar

# Plot bars for each task
for i in range(len(classes)):
    ax2.bar(classes[i], measures[i], yerr=cstds[i], color=colors[i], alpha=0.95, label=labels[i], width=2, capsize=5)
ax2.errorbar(classes, measures, yerr=cstds, marker='x', color='black', capsize=5)

ax2.set_title('Average task ' + metric + ' (Gamified)')
ax2.set_xlabel('N classes', fontsize=16)
ax2.set_ylabel('Task ' + metric, fontsize=16)
ax2.set_ylim(0.2, 1)
ax2.set_yticks([0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0])
ax2.set_xticks([2, 4, 8])
ax2.legend(loc='lower right', bbox_to_anchor=(1, 0))




plt.tight_layout()
plt.grid(True)
plt.show()

Mean accuracy for 2 classes: 0.91 ± 0.07
Mean accuracy for 4 classes: 0.69 ± 0.14
Mean accuracy for 6 classes: 0.74 ± 0.12
Mean accuracy for 2 classes: 0.98 ± 0.04
Mean accuracy for 4 classes: 0.76 ± 0.12
Mean accuracy for 6 classes: 0.64 ± 0.16


In [17]:

# Dataset
dataset = {'standard': data_standard, 'game': data_game}
classes = [2, 4, 8]
metric = 'accuracy'

# Prepare figure
plt.rcParams['figure.figsize'] = [12, 10]
fig, ax = plt.subplots()

# Colors and labels
colors = ['red', 'orange', 'pink']  # Define colors for each bar
labels = ['2 Classes', '4 Classes', '8 Classes']  # Define labels for each bar

# Width for bars
width = 0.35

# Standard condition
group = dataset['standard']
t1 = group[group['n_classes'] == 2]
t2 = group[group['n_classes'] == 4]
t3 = group[group['n_classes'] == 8]
measures_standard = [np.mean(t1[metric]), np.mean(t2[metric]), np.mean(t3[metric])]
stds_standard = [np.std(t1[metric]), np.std(t2[metric]), np.std(t3[metric])]
cstds_standard = [min(std, 1 - mean) for mean, std in zip(measures_standard, stds_standard)]
x = np.arange(len(classes))

# Gamified condition
group = dataset['game']
t1 = group[group['n_classes'] == 2]
t2 = group[group['n_classes'] == 4]
t3 = group[group['n_classes'] == 8]
measures_game = [np.mean(t1[metric]), np.mean(t2[metric]), np.mean(t3[metric])]
stds_game = [np.std(t1[metric]), np.std(t2[metric]), np.std(t3[metric])]
cstds_game = [min(std, 1 - mean) for mean, std in zip(measures_game, stds_game)]

# Plotting bars
rects1 = ax.bar(x - width/2, measures_standard, width, yerr=cstds_standard, label='Standard', color='sandybrown', capsize=5)
rects2 = ax.bar(x + width/2, measures_game, width, yerr=cstds_game, label='Gamified', color='indigo', alpha=1, capsize=5)

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_xlabel('N classes', fontsize=16)
ax.set_ylabel('Task ' + metric, fontsize=16)
ax.set_title('Average task ' + metric + ' by condition', fontsize=16)
ax.set_xticks(x)
ax.set_xticklabels(classes)
ax.set_ylim(0.2, 1.05)
ax.set_yticks([0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0])
ax.legend()

# Increase font
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)

plt.grid(False)
plt.tight_layout()
plt.savefig('data/adhd/task_accuracy.png', dpi=300)
# Show plot
plt.show()


## Plotting the elapsed time for each group

In [27]:
# Prepare figure
plt.rcParams['figure.figsize'] = [12, 10]
fig, ax = plt.subplots()

# Width for bars
width = 0.35
classes = [2, 4, 8]
x = np.arange(len(classes))
metric = 'elapsed_time_s'

# Standard condition
group = dataset['standard']
t1 = data_standard[data_standard['n_classes'] == 2]
t2 = data_standard[data_standard['n_classes'] == 4]
t3 = data_standard[data_standard['n_classes'] == 8]
measures_std = [np.mean(t1[metric]), np.mean(t2[metric]), np.mean(t3[metric])]
stds_std = [np.std(t1[metric]), np.std(t2[metric]), np.std(t3[metric])]

print ("Standard condition")
for mean in range(len(measures_std)):
    print(f"Mean elapsed time for {classes[mean]} classes: {measures_std[mean]:.2f} ± {stds_std[mean]:.2f}")
    print(f"Mean time for each class: {measures_std[mean] / classes[mean]:.2f} ± {stds_std[mean] / classes[mean]:.2f}")


# Gamified condition
group = dataset['game']
t1 = group[group['n_classes'] == 2]
t2 = group[group['n_classes'] == 4]
t3 = group[group['n_classes'] == 8]
measures_game = [np.mean(t1[metric]), np.mean(t2[metric]), np.mean(t3[metric])]
stds_game = [np.std(t1[metric]), np.std(t2[metric]), np.std(t3[metric])]

print ("Gamified condition")
for mean in range(len(measures_game)):
    print(f"Mean elapsed time for {classes[mean]} classes: {measures_game[mean]:.2f} ± {stds_game[mean]:.2f}")
    print(f"Mean time for each class: {measures_game[mean] / classes[mean]:.2f} ± {stds_game[mean] / classes[mean]:.2f}")

# Create a summary table with both conditions
summary = pd.DataFrame({'n_classes': classes, 'time_standard': np.round(measures_std,2),'std_standard':np.round(stds_std), 'time_game': np.round(measures_game), 'std_game': np.round(stds_game)})
summary.to_csv('data/adhd/elapsed_time_summary.csv', index=False)


# Plotting bars
rects1 = ax.bar(x - width/2, measures_std, width, yerr=stds_std, label='Standard', color='sandybrown', capsize=5)
rects2 = ax.bar(x + width/2, measures_game, width, yerr=stds_game, label='Gamified', color='indigo', alpha=1, capsize=5)
    
#ax.errorbar(classes, measures, yerr=stds, marker='x', color='blue', capsize=5)
ax.set_title('Average task completion time (s)' )
ax.set_xlabel('N classes', fontsize=16)
ax.set_ylabel('Time (s)', fontsize=16)
# Increase font
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
ax.set_ylim(0, 150)
ax.set_xticks(x)
ax.set_xticklabels(classes)
ax.legend(loc='upper left', bbox_to_anchor=(0, 1))
plt.savefig('data/adhd/task_time.png', dpi=300)
plt.tight_layout()
plt.show()


Standard condition
Mean elapsed time for 2 classes: 40.03 ± 10.98
Mean time for each class: 20.02 ± 5.49
Mean elapsed time for 4 classes: 61.72 ± 17.92
Mean time for each class: 15.43 ± 4.48
Mean elapsed time for 8 classes: 70.32 ± 30.23
Mean time for each class: 8.79 ± 3.78
Gamified condition
Mean elapsed time for 2 classes: 38.30 ± 16.32
Mean time for each class: 19.15 ± 8.16
Mean elapsed time for 4 classes: 64.60 ± 31.50
Mean time for each class: 16.15 ± 7.88
Mean elapsed time for 8 classes: 102.95 ± 45.62
Mean time for each class: 12.87 ± 5.70
