In [19]:
import numpy as np
import matplotlib.pyplot as plt
import pylab
from scipy import stats
import glob
import math

In [2]:
#Loading events text file to read information
this_file = "./Humans_T_test_data.txt"
file = open(this_file,"r") #open file with 'r' extension
lines = [line.rstrip().split("\t") for line in file] #read lines in file
lines = [[st.replace("\"","") for st in line] for line in lines] #remove quotation marks in the different strings
cats = lines[0]
data = lines[1:]
print(cats)
print(data[56])

['name', 'Strategy', 'Measure', 'Value']
['A19X8IA9EKC3XH', 'streamed', 'Right_Regularity', '0.114278710097203']


In [3]:
Measures = {"Score":0,"Entropy":1,"LogCV":2,"Shot_Periodicity":3,"Shot_Regularity":4,"Right_Regularity":5,"Right_Periodicity":6,"Resets":7,"Deflations":8,"Misses":9}
Strategy = {"streamed":0,"coupled":1} #Strategy mapping
Measure_List = list(Measures.keys())
Strategy_List = list(Strategy.keys())
Measure_Nb = len(Measure_List)
Strategy_Nb = len(Strategy_List)

Human_Data = [[[] for jj in range(Measure_Nb)] for ii in range(Strategy_Nb)]

for curline in data:
    cur_Strategy = curline[1]
    strat_idx = Strategy[cur_Strategy]
    curMeasure = curline[2]
    measure_idx = Measures[curMeasure]
    this_val = curline[3]
    Human_Data[strat_idx][measure_idx].append(float(this_val))

In [6]:
def cohend(d1, d2):
    n1, n2 = d1.size, d2.size  #sample sizes
    s1, s2 = np.var(d1, ddof=1), np.var(d2, ddof=1) #variances  for each sample
    s = np.sqrt(((n1 - 1) * s1 + (n2 - 1) * s2) / (n1 + n2 - 2))  #pooled standard deviations
    u1, u2 = np.mean(d1), np.mean(d2) #Means
    ToReturn = (u1 - u2) / s
    return ToReturn

In [8]:
#Print all t-test results
for mm in range(Measure_Nb):
    c_Mea = Measure_List[mm]
    ToPrint1 ="Measure: "+str(c_Mea)
    print(ToPrint1)

    streamed_dat = np.array(Human_Data[0][mm])
    coupled_dat = np.array(Human_Data[1][mm])
    u1, u2 = np.mean(streamed_dat), np.mean(coupled_dat) #Means
    M_D = (u1 - u2)

    (t,p)=stats.ttest_ind(streamed_dat, coupled_dat, equal_var=False) #Welch t-test which assumes unequal variances
    c = cohend(streamed_dat, coupled_dat)

    ToPrint2 = "M\tt\tp\td\n"+str(round(M_D,2))+"\t"+str(round(t,2))+"\t"+str(round(p,10))+"\t"+str(round(c,2))+"\n"
    print(ToPrint2)

Measure: Score
M	t	p	d
1121.65	8.24	7e-09	2.18

Measure: Entropy
M	t	p	d
-0.01	-0.11	0.9126889468	-0.02

Measure: LogCV
M	t	p	d
-0.13	-2.39	0.0219854007	-0.5

Measure: Shot_Periodicity
M	t	p	d
-129.76	-12.16	3.86e-08	-4.81

Measure: Shot_Regularity
M	t	p	d
-0.02	-0.49	0.6356136736	-0.18

Measure: Right_Regularity
M	t	p	d
-0.25	-9.32	0.0	-2.06

Measure: Right_Periodicity
M	t	p	d
-86.94	-9.04	1.6437e-06	-3.79

Measure: Resets
M	t	p	d
3.2	1.65	0.1139495049	0.47

Measure: Deflations
M	t	p	d
-15.67	-5.29	5.3336e-06	-1.21

Measure: Misses
M	t	p	d
-5.13	-0.8	0.4302219764	-0.2



In [16]:
#Compute correlation
All_shot_per = Human_Data[0][3] + Human_Data[1][3]
All_right_reg = Human_Data[0][5] + Human_Data[1][5]
r_size = len(All_shot_per)
print(len(All_shot_per))
print(len(All_right_reg))

res = stats.pearsonr(All_shot_per, All_right_reg)
print(res)
#print(res.confidence_interval())

41
41
(0.7592651673513323, 8.704100311247966e-09)


In [20]:
#Confidence interval - reference
#https://onlinestatbook.com/2/estimation/correlation_ci.html
#https://stackoverflow.com/questions/33176049/how-do-you-compute-the-confidence-interval-for-pearsons-r-in-python
#
def r_to_z(r):
    return math.log((1 + r) / (1 - r)) / 2.0

def z_to_r(z):
    e = math.exp(2 * z)
    return((e - 1) / (e + 1))

def r_confidence_interval(r, alpha, n):
    z = r_to_z(r)
    se = 1.0 / math.sqrt(n - 3)
    z_crit = stats.norm.ppf(1 - alpha/2)  # 2-tailed z critical value

    lo = z - z_crit * se
    hi = z + z_crit * se

    # Return a sequence
    return (z_to_r(lo), z_to_r(hi))

In [21]:
CI = r_confidence_interval(res[0],0.05,r_size)
print(CI)

(0.5892586229706419, 0.8648879379348754)
