In [1]:
import numpy as np
from scipy.stats import ttest_rel

# STATISTICS FOR WITHIN GROUPS

# Accuracy

In [2]:
#Data obtained from Visual Turing Test

In [3]:
r_advanced = np.array([0.416, 0.50, 0.40, 0.483])# accuracy for experts with advanced experience
r_interm = np.array([0.50, 0.467, 0.467, 0.450 ])# accuracy for experts with intermediary experience

In [4]:
print(round(r_advanced.mean(),3))
print(round(r_interm.mean(),3))

0.45
0.471


In [5]:
t_test = ttest_rel(r_advanced, r_interm,)

In [6]:
print(t_test)# t-test result for accuracy

Ttest_relResult(statistic=-0.6743271460355288, pvalue=0.5484152049582721)


# Sensitivity

In [7]:
r_advanced_sens = np.array([0.433, 0.40, 0.367, 0.4333])# sensitiviy for experts with advanced experience
r_interm_sens = np.array([0.433, 0.40, 0.40, 0.633])# sensitivity for experts with intermediary experience

In [8]:
t_test_sens = ttest_rel(r_advanced_sens, r_interm_sens)

In [9]:
print(t_test_sens)#t-test result for sensitivity

Ttest_relResult(statistic=-1.2167465384108442, pvalue=0.3106925737415292)


In [10]:
print(round(r_advanced_sens.mean(),3))
print(round(r_interm_sens.mean(),3))

0.408
0.466


# Specificity

In [11]:
r_advanced_spe = np.array([0.40, 0.60, 0.433, 0.533])# specificity for experts with advanced experience
r_interm_spe = np.array([0.566, 0.533, 0.533, 0.267])# specificity for experts with intermediary experience

In [12]:
t_test_spe = ttest_rel(r_advanced_spe, r_interm_spe)

In [13]:
print(t_test_spe)#t-test result for specificity

Ttest_relResult(statistic=0.17362815973508886, pvalue=0.8732124397897068)


In [14]:
print(round(r_advanced_spe.mean(),3))
print(round(r_interm_spe.mean(),3))

0.492
0.475


# Confidence interval

In [15]:
#ACCURACY

In [16]:
from scipy.stats import bayes_mvs

In [17]:
#Advanced readers
r_adv_acc_mean, r_adv_acc_var, r_adv_std = bayes_mvs(r_advanced, alpha = 0.95)
print(r_adv_acc_mean)

Mean(statistic=0.44975, minmax=(0.37155442749635426, 0.5279455725036457))


In [18]:
#Intermediary readers
r_int_acc_mean, r_int_acc_var, r_adv_std = bayes_mvs(r_interm, alpha = 0.95)
print(r_int_acc_mean)

Mean(statistic=0.47100000000000003, minmax=(0.4376981665486882, 0.5043018334513119))


In [19]:
#SENSITIVITY

In [20]:
#Advanced readers
r_adv_sen, _, _, = bayes_mvs(r_advanced_sens, alpha = 0.95)
print(r_adv_sen)

Mean(statistic=0.408325, minmax=(0.35792509461424143, 0.45872490538575855))


In [21]:
#Intermediary readers
r_int_sen, _, _, = bayes_mvs(r_interm_sens, alpha = 0.95)
print(r_int_sen)

Mean(statistic=0.4665, minmax=(0.28814808815305815, 0.6448519118469419))


In [22]:
#SPECIFICITY 

In [23]:
#Advanced readers
r_adv_spe, _, _, = bayes_mvs(r_advanced_spe, alpha = 0.95)
print(r_adv_spe)

Mean(statistic=0.49150000000000005, minmax=(0.34540468468413754, 0.6375953153158626))


In [24]:
#Intermediary readers
r_int_spe, _, _, = bayes_mvs(r_interm_spe, alpha = 0.95)
print(r_int_spe)

Mean(statistic=0.47475, minmax=(0.25297978436705904, 0.696520215632941))


# STATISTICS OVERALL EXPERIMENT

In [25]:
#combination of advanced and intermediary readers
overall_acc = np.array([50.0, 46.7, 46.7, 45, 41.7, 50, 40, 48.3]) 
print(round(overall_acc.mean(),2))
overall_sen = np.array([43.3, 40, 40, 63, 43.3, 40, 36.7, 43.3])
print(round(overall_sen.mean(),2))
overall_spe = np.array([56.7, 53.3, 53, 26.7, 40, 60, 43.3, 53.3])
print(round(overall_spe.mean(),2))

46.05
43.7
48.29


In [26]:
CI_acc, _, _ = bayes_mvs(overall_acc, alpha=0.95)
CI_sen, _, _ = bayes_mvs(overall_sen, alpha = 0.95)
CI_spe, _, _ = bayes_mvs(overall_spe, alpha = 0.95)

print(CI_acc)
print(CI_sen)
print(CI_spe)

Mean(statistic=46.05, minmax=(42.99063988566235, 49.10936011433765))
Mean(statistic=43.7, minmax=(36.900493699968116, 50.49950630003189))
Mean(statistic=48.287499999999994, minmax=(39.12941098290388, 57.44558901709611))


# T-Test for readers vs random guess

In [27]:
random = np.array([50, 50, 50, 50, 50, 50, 50, 50])

In [28]:
vs = ttest_rel(overall_acc, random)
print(vs)

Ttest_relResult(statistic=-3.053012866225735, pvalue=0.018504538111167413)


In [29]:
gettting_fake = ttest_rel(random, overall_spe)
print(gettting_fake)

Ttest_relResult(statistic=0.44216855965210383, pvalue=0.6717060020452486)


In [30]:
getting_real = ttest_rel(random, overall_sen)
print(getting_real)

Ttest_relResult(statistic=2.1909138875707828, pvalue=0.06459157295194755)
