In [97]:
import pandas as pd
import numpy as np
import scipy.io
import matplotlib.pyplot as plt


from scipy.stats import f_oneway, mannwhitneyu, shapiro, kstest, norm, kruskal

import statsmodels.stats.proportion as proportion

PATH_UPV = "./DatosExperimento1/UPV"
PATH_UGR = "./DatosExperimento1/UGR"
PATH_VARIABLES = "./variables"

# Leer el archivo Excel en un DataFrame de pandas
df_upv = pd.read_excel(PATH_UPV+'/Exp1_Observadores_UPV.xlsx')
df_ugr = pd.read_excel(PATH_UGR+'/Exp1_Observadores_UGR.xlsx')

# Array de ficheros .mat, nos da igual el volunatrio
upv = []
ugr = []

# Iterar sobre cada fila del DataFrame
for idx, row in df_upv.iterrows():
    result = scipy.io.loadmat(PATH_UPV+"/"+row['Fichero'])
    upv.append(np.array(result['Results'])[:,0])
    
for idx, row in df_ugr.iterrows():
    result = scipy.io.loadmat(PATH_UGR+"/"+row['Fichero'])
    ugr.append(np.array(result['Results'])[:,0])

In [10]:
len(upv)

60

In [11]:
len(ugr)

73

In [18]:
# Calcular intervalos de confianza para cada variable utilizando el método de Wilson
conf_intervals = []
for rate1, rate2 in zip(ugr, upv):
    conf_int = proportion.proportion_confint(rate1, 1, alpha=0.05, method='wilson'), proportion.proportion_confint(rate2, 1, alpha=0.05, method='wilson')
    new_conf_int = {"ugr": [], "upv":[]}
    for i in range(160):
        new_conf_int["ugr"].append("[" + str(conf_int[0][0][i]) + ", " + str(conf_int[0][1][i]) + "]")
        new_conf_int["upv"].append("[" + str(conf_int[1][0][i]) + ", " + str(conf_int[1][1][i]) + "]")
    conf_intervals.append(new_conf_int)

In [19]:
conf_intervals

[{'ugr': ['[0.02533624364650111, 0.9058139848944199]',
   '[0.09418601510558011, 0.9746637563534989]',
   '[0.09418601510558011, 0.9746637563534989]',
   '[0.09418601510558011, 0.9746637563534989]',
   '[0.2065493143772374, 1.0]',
   '[0.09418601510558011, 0.9746637563534989]',
   '[0.09418601510558011, 0.9746637563534989]',
   '[0.2065493143772374, 1.0]',
   '[0.09418601510558011, 0.9746637563534989]',
   '[0.09418601510558011, 0.9746637563534989]',
   '[0.02533624364650111, 0.9058139848944199]',
   '[0.09418601510558011, 0.9746637563534989]',
   '[0.02533624364650111, 0.9058139848944199]',
   '[0.2065493143772374, 1.0]',
   '[0.09418601510558011, 0.9746637563534989]',
   '[0.09418601510558011, 0.9746637563534989]',
   '[0.09418601510558011, 0.9746637563534989]',
   '[0.2065493143772374, 1.0]',
   '[0.2065493143772374, 1.0]',
   '[0.09418601510558011, 0.9746637563534989]',
   '[0.09418601510558011, 0.9746637563534989]',
   '[0.2065493143772374, 1.0]',
   '[0.2065493143772374, 1.0]',
 

## Test de normalidad,  ANOVA y no paramétricos en las 160 variables

In [103]:
p_valores_anova = []
normalidad_upv = []
normalidad_ugr = []
p_valores_mannwhitneyu = []
p_valores_kruskal = []
for i in range(160):
    upv_in_i = [sesion[i] for sesion in upv]
    ugr_in_i = [sesion[i] for sesion in ugr]
    
    _, p_value_norm_upv = kstest(upv_in_i, 'norm')
    normalidad_upv.append(p_value_norm_upv)
    _, p_value_norm_ugr = kstest(ugr_in_i, 'norm')
    normalidad_ugr.append(p_value_norm_ugr)
    
    _, p_value_anova = f_oneway(upv_in_i, ugr_in_i)
    p_valores_anova.append(p_value_anova)
    
    _, p_value_mannwhitneyu = mannwhitneyu(upv_in_i, ugr_in_i)
    p_valores_mannwhitneyu.append(p_value_mannwhitneyu)
    
    _, p_value_kruskal = kruskal(upv_in_i, ugr_in_i)
    p_valores_kruskal.append(p_value_kruskal)

print("ANOVA")
for row in [p_valores_anova[i:i+10] for i in range(0, len(p_valores_anova), 10)]:
    for value in row:
        if value < 0.05:
            print('\033[91m'+format(value, ".4f")+'\033[0m', end=', ')
        else:
            print(format(value, ".4f"), end=', ')
    print("\n")
print(f"Número de variables no independientes con una confianza del 95%: '{sum(p < 0.05 for p in p_valores_anova)}'\n")

print("Test de normalidad para UPV")
for row in [normalidad_upv[i:i+10] for i in range(0, len(normalidad_upv), 10)]:
    for value in row:
        if value < 0.05:
            print('\033[91m'+format(value, ".4f")+'\033[0m', end=', ')
        else:
            print(format(value, ".4f"), end=', ')
    print("\n")
print(f"Número de variables no normales con una confianza del 95%: '{sum(p < 0.05 for p in normalidad_upv)}'\n")


print("Test de normalidad para UGR")
for row in [normalidad_ugr[i:i+10] for i in range(0, len(normalidad_ugr), 10)]:
    for value in row:
        if value < 0.05:
            print('\033[91m'+format(value, ".4f")+'\033[0m', end=', ')
        else:
            print(format(value, ".4f"), end=', ')
    print("\n")
print(f"Número de variables no normales con una confianza del 95%: '{sum(p < 0.05 for p in normalidad_ugr)}'\n")


print("Test de Mannwhiteney\n")
for row in [p_valores_mannwhitneyu[i:i+10] for i in range(0, len(p_valores_mannwhitneyu), 10)]:
    for value in row:
        if value < 0.01:
            print('\033[91m'+format(value, ".4f")+'\033[0m', end=', ')
        elif value < 0.05:
            print('\033[93m'+format(value, ".4f")+'\033[0m', end=', ')
        else:
            print(format(value, ".4f"), end=', ')
    print("\n")
print(f"Número de variables no independientes con una confianza del 95%: \033[93m'{sum(p < 0.05 for p in p_valores_mannwhitneyu)}'\033[0m")
print(f"Número de variables no independientes con una confianza del 99%: \033[91m'{sum(p < 0.01 for p in p_valores_mannwhitneyu)}'\033[0m\n")


print("Test de Kruskal")
for row in [p_valores_kruskal[i:i+10] for i in range(0, len(p_valores_kruskal), 10)]:
    for value in row:
        if value < 0.01:
            print('\033[91m'+format(value, ".4f")+'\033[0m', end=', ')
        elif value < 0.05:
            print('\033[93m'+format(value, ".4f")+'\033[0m', end=', ')
        else:
            print(format(value, ".4f"), end=', ')
    print("\n")
print(f"Número de variables no independientes con una confianza del 95%: \033[93m'{sum(p < 0.05 for p in p_valores_kruskal)}'\033[0m")
print(f"Número de variables no independientes con una confianza del 99%: \033[91m'{sum(p < 0.01 for p in p_valores_kruskal)}'\033[0m\n")

ANOVA
0.6602, 0.9668, 0.1720, [91m0.0212[0m, 0.1262, 0.2358, [91m0.0309[0m, [91m0.0009[0m, [91m0.0090[0m, [91m0.0007[0m, 

0.3481, 0.5980, [91m0.0238[0m, 0.0551, [91m0.0049[0m, 0.1186, [91m0.0128[0m, [91m0.0005[0m, [91m0.0005[0m, [91m0.0048[0m, 

0.5999, 0.0576, 0.1115, 0.5465, [91m0.0007[0m, [91m0.0188[0m, 0.5387, 0.0998, [91m0.0186[0m, [91m0.0080[0m, 

0.7271, 0.5969, 0.2238, [91m0.0149[0m, [91m0.0118[0m, 0.1320, 0.5558, [91m0.0307[0m, [91m0.0046[0m, 0.0602, 

0.5049, [91m0.0233[0m, 0.2825, 0.1527, 0.2024, [91m0.0458[0m, [91m0.0478[0m, [91m0.0020[0m, [91m0.0001[0m, 0.0540, 

0.2342, 0.6445, 0.2172, 0.2323, [91m0.0442[0m, 0.0741, [91m0.0199[0m, [91m0.0458[0m, [91m0.0035[0m, [91m0.0004[0m, 

0.7567, 0.0537, 0.0920, [91m0.0023[0m, [91m0.0168[0m, [91m0.0360[0m, [91m0.0021[0m, [91m0.0238[0m, [91m0.0041[0m, [91m0.0003[0m, 

0.0657, [91m0.0100[0m, 0.0701, [91m0.0027[0m, [91m0.0202[0m, [91m0.0034[0m, [91m0.0000

## Test de normalidad, ANOVA y no paramétricos en los 16 grupos, promediando las 10 iteraciones

In [117]:
p_valores_anova = []
normalidad_upv = []
normalidad_ugr = []
p_valores_mannwhitneyu = []
p_valores_kruskal = []
for i in range(16):
    upv_in_i = [sum(sesion[(i*10):((i*10)+10)])/10 for sesion in upv]
    ugr_in_i = [sum(sesion[(i*10):((i*10)+10)])/10 for sesion in ugr]
    _, p_value_norm_upv = kstest(upv_in_i, 'norm')
    normalidad_upv.append(p_value_norm_upv)
    _, p_value_norm_ugr = kstest(ugr_in_i, 'norm')
    normalidad_ugr.append(p_value_norm_ugr)
    
    _, p_value_anova = f_oneway(upv_in_i, ugr_in_i)
    p_valores_anova.append(p_value_anova)
    
    _, p_value_mannwhitneyu = mannwhitneyu(upv_in_i, ugr_in_i)
    p_valores_mannwhitneyu.append(p_value_mannwhitneyu)
    
    _, p_value_kruskal = kruskal(upv_in_i, ugr_in_i)
    p_valores_kruskal.append(p_value_kruskal)

print("ANOVA")
for value in p_valores_anova:
    if value < 0.05:
        print('\033[91m'+format(value, ".4f")+'\033[0m')
    else:
        print(format(value, ".4f"))
print(f"Número de variables no independientes con una confianza del 95%: '{sum(p < 0.05 for p in p_valores_anova)}'\n")

print("Test de normalidad para UPV")
for value in normalidad_upv:
    if value < 0.05:
        print('\033[91m'+format(value, ".4f")+'\033[0m')
    else:
        print(format(value, ".4f"))
print(f"Número de variables no normales con una confianza del 95%: '{sum(p < 0.05 for p in normalidad_upv)}'\n")


print("Test de normalidad para UGR")
for value in normalidad_ugr:
    if value < 0.05:
        print('\033[91m'+format(value, ".4f")+'\033[0m')
    else:
        print(format(value, ".4f"))
print(f"Número de variables no normales con una confianza del 95%: '{sum(p < 0.05 for p in normalidad_ugr)}'\n")


print("Test de Mannwhiteney\n")
for value in p_valores_mannwhitneyu:
    if value < 0.01:
        print('\033[91m'+format(value, ".4f")+'\033[0m')
    elif value < 0.05:
        print('\033[93m'+format(value, ".4f")+'\033[0m')
    else:
        print(format(value, ".4f"))
print(f"Número de variables no independientes con una confianza del 95%: \033[93m'{sum(p < 0.05 for p in p_valores_mannwhitneyu)}'\033[0m")
print(f"Número de variables no independientes con una confianza del 99%: \033[91m'{sum(p < 0.01 for p in p_valores_mannwhitneyu)}'\033[0m\n")


print("Test de Kruskal")
for value in p_valores_kruskal:
    if value < 0.01:
        print('\033[91m'+format(value, ".4f")+'\033[0m')
    elif value < 0.05:
        print('\033[93m'+format(value, ".4f")+'\033[0m')
    else:
        print(format(value, ".4f"))
print(f"Número de variables no independientes con una confianza del 95%: \033[93m'{sum(p < 0.05 for p in p_valores_kruskal)}'\033[0m")
print(f"Número de variables no independientes con una confianza del 99%: \033[91m'{sum(p < 0.01 for p in p_valores_kruskal)}'\033[0m\n")

ANOVA
[91m0.0075[0m
[91m0.0019[0m
[91m0.0143[0m
[91m0.0383[0m
[91m0.0014[0m
[91m0.0079[0m
[91m0.0015[0m
[91m0.0001[0m
[91m0.0005[0m
[91m0.0003[0m
[91m0.0003[0m
[91m0.0014[0m
0.0786
[91m0.0059[0m
[91m0.0000[0m
[91m0.0004[0m
Número de variables no independientes con una confianza del 95%: '15'

Test de normalidad para UPV
[91m0.0000[0m
[91m0.0000[0m
[91m0.0000[0m
[91m0.0000[0m
[91m0.0000[0m
[91m0.0000[0m
[91m0.0000[0m
[91m0.0000[0m
[91m0.0000[0m
[91m0.0000[0m
[91m0.0000[0m
[91m0.0000[0m
[91m0.0000[0m
[91m0.0000[0m
[91m0.0000[0m
[91m0.0000[0m
Número de variables no normales con una confianza del 95%: '16'

Test de normalidad para UGR
[91m0.0000[0m
[91m0.0000[0m
[91m0.0000[0m
[91m0.0000[0m
[91m0.0000[0m
[91m0.0000[0m
[91m0.0000[0m
[91m0.0000[0m
[91m0.0000[0m
[91m0.0000[0m
[91m0.0000[0m
[91m0.0000[0m
[91m0.0000[0m
[91m0.0000[0m
[91m0.0000[0m
[91m0.0000[0m
Número de variables no normales con una con

In [None]:
# Umbrales

In [119]:
mean_std_tmp_ugr = np.load(PATH_VARIABLES+"/mean_std_ugr.npy")
mean_std_ugr = mean_std_tmp_ugr[:,1] - mean_std_tmp_ugr[:,0]
mean_std_tmp_upv = np.load(PATH_VARIABLES+"/mean_std_upv.npy")
mean_std_upv = mean_std_tmp_upv[:,1] - mean_std_tmp_upv[:,0]

In [120]:
print(mean_std_ugr)
print(mean_std_upv)

[0.000867 0.002187 0.004682 0.007834 0.001123 0.002106 0.002921 0.003683
 0.001551 0.000899 0.000758 0.000668 0.00439  0.004073 0.00334  0.003154]
[0.001607 0.005164 0.009522 0.013994 0.002196 0.00368  0.006786 0.009028
 0.002747 0.002552 0.002933 0.003185 0.004579 0.007725 0.007835 0.006935]


In [121]:
f_value, p_value = mannwhitneyu(mean_std_upv, mean_std_ugr)

In [122]:
p_value

0.008808980932423759

Se rechaza la hipótesis nula. Hay diferencias significativas entre los grupos.