# Difficulty and personal data

### Is there a statistically significant difference in the assessment of the task difficulty depending on the personality characteristics from the survey?

In [1]:
import pandas as pd

from functions.tests.independent_groups.two_groups import mann_whitney_test
from functions.tests.independent_groups.more_than_two_groups import kruskal_wallis_test
from functions.tests.posthoc import posthocNemenyi2_test
from functions.others import choose_dominant_kind, change_to_ranges_roz, change_to_ranges_podst

In [2]:
df_answers = pd.read_csv('/Users/martasolarz/Studies/Thesis/Master_thesis/prepare_datasets/sets/correctness.csv')

df_quest = pd.read_csv('/Users/martasolarz/Studies/Thesis/data/survey.csv').fillna('NO_VAL')

df_quest = df_quest[df_quest.ID != 6]  # rejected person due to eye tracking metrics

In [3]:
# change values to ranges
df_quest['MAT'] = df_quest['MAT_roz'].apply(lambda x: x if x == 'NO_VAL' else 'VAL')
df_quest['POL'] = df_quest['JP_roz'].apply(lambda x: x if x == 'NO_VAL' else 'VAL')
df_quest['HIST'] = df_quest['HIST_roz'].apply(lambda x: x if x == 'NO_VAL' else 'VAL')
df_quest['Typ_uczenia'] = df_quest.apply(choose_dominant_kind, axis=1)
df_quest['Kierunek_studiów'] = df_quest['Kierunek_studiów'].apply(lambda x: 'MISMaP Geografia' if x == 'Geografia, w ramach studiów międzyobszarowych' else x)
df_quest['JP_podst'] = df_quest['JP_podst'].apply(change_to_ranges_podst)
df_quest['MAT_podst'] = df_quest['MAT_podst'].apply(change_to_ranges_podst)
df_quest['JP_roz'] = df_quest['JP_roz'].apply(change_to_ranges_roz)
df_quest['MAT_roz'] = df_quest['MAT_roz'].apply(change_to_ranges_roz)
df_quest['HIST_roz'] = df_quest['HIST_roz'].apply(change_to_ranges_roz)
df_quest['FIZ_roz'] = df_quest['FIZ_roz'].apply(change_to_ranges_roz)
df_quest.head()

Unnamed: 0,ID,Płeć,Rok_urodzenia,Kierunek_studiów,Rok_studiów,Specjalność,Wady_wzroku,Daltonizm,Ilość_snu,JP_podst,...,Rank_map,Rank_txt,Rank_plt,Rank_tab,Data,Godzina,MAT,POL,HIST,Typ_uczenia
0,1,M,2002,MISMaP Geografia,2,SPOŁ-EKO,0,1,6,4,...,3,4,1,2,2023-05-24,11:44:20,VAL,VAL,NO_VAL,Kinestetyk
1,2,M,2003,Gospodarka przestrzenna,1,NO_VAL,1,1,<6,2,...,2,3,1,4,2023-05-24,12:21:03,VAL,VAL,NO_VAL,Brak dominującego
2,3,M,2001,Geografia,3,GEOINF,0,1,8,2,...,1,4,3,2,2023-05-24,12:41:09,VAL,NO_VAL,NO_VAL,Wzrokowiec
3,4,K,2000,Geografia,3,GEOINF,0,1,8,2,...,2,4,3,1,2023-05-24,13:15:20,NO_VAL,VAL,NO_VAL,Brak dominującego
4,5,K,2002,Geografia,3,GEOINF,0,1,7,2,...,2,4,1,3,2023-05-24,13:35:17,NO_VAL,NO_VAL,NO_VAL,Wzrokowiec


In [4]:
df_merge = df_answers.merge(df_quest, on='ID').set_index('ID')
df_merge.tail()

Unnamed: 0_level_0,1a_trud,1b_trud,2a_trud,2b_trud,3a_trud,3b_trud,Trud_all,Corr_all,1a_corr,1b_corr,...,Rank_map,Rank_txt,Rank_plt,Rank_tab,Data,Godzina,MAT,POL,HIST,Typ_uczenia
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
42,2,1,2,1,2,2,10,5,1,1,...,2,4,1,3,2023-06-07,11:42:55,NO_VAL,NO_VAL,NO_VAL,Wzrokowiec
43,1,2,2,2,3,3,13,5,1,1,...,2,3,1,2,2023-06-07,13:04:51,NO_VAL,NO_VAL,NO_VAL,Brak dominującego
44,1,1,3,1,2,3,11,3,1,1,...,2,1,2,1,2023-06-07,14:13:38,VAL,NO_VAL,NO_VAL,Brak dominującego
45,2,1,2,2,2,2,11,3,1,1,...,2,4,3,1,2023-06-07,14:27:37,VAL,NO_VAL,NO_VAL,Słuchowiec
46,1,3,2,2,2,3,13,4,0,1,...,1,3,3,3,2023-06-07,15:19:52,NO_VAL,NO_VAL,NO_VAL,Brak dominującego


Preparation of data for the analysis:

In [5]:
# sum of difficulty - all tasks
df_all = df_merge.copy()
df_all.rename(columns={'Corr_all': 'corr', 'Trud_all': 'diff'}, inplace=True)

# sum of difficulty - tasks without redundancy
df_sumA = df_merge.copy()
df_sumA.rename(columns={'SumCorr_a': 'corr', 'SumTrud_a': 'diff'}, inplace=True)

# sum of difficulty - tasks with redundancy
df_sumB = df_merge.copy()
df_sumB.rename(columns={'SumCorr_b': 'corr', 'SumTrud_b': 'diff'}, inplace=True)

# sum of difficulty - task 1
df_sum1 = df_merge.copy()
df_sum1.rename(columns={'SumCorr_1': 'corr', 'SumTrud_1': 'diff'}, inplace=True)

# sum of difficulty - task 2
df_sum2 = df_merge.copy()
df_sum2.rename(columns={'SumCorr_2': 'corr', 'SumTrud_2': 'diff'}, inplace=True)

# sum of difficulty - task 3
df_sum3 = df_merge.copy()
df_sum3.rename(columns={'SumCorr_3': 'corr', 'SumTrud_3': 'diff'}, inplace=True)

##### H0: There is no statistically significant relationship between task difficulty and the value of feature X.

##### H1: There is a statistically significant relationship between task difficulty and the value of feature X.

##### Significance level: $\alpha=0.05$

In [6]:
ALPHA = 0.05

In [7]:
df_quest.columns

Index(['ID', 'Płeć', 'Rok_urodzenia', 'Kierunek_studiów', 'Rok_studiów',
       'Specjalność', 'Wady_wzroku', 'Daltonizm', 'Ilość_snu', 'JP_podst',
       'MAT_podst', 'JP_roz', 'MAT_roz', 'FIZ_roz', 'HIST_roz', 'Samopocz',
       'Stres', 'Zmęcz', 'Czyt_map', 'Podziel_uwg', 'Analit_myśl', 'Spostrzeg',
       'Czyt_ze_zroz', 'Wzrokowiec', 'Słuchowiec', 'Kinestetyk',
       'Quest1_after', 'Quest2_after', 'Rank_map', 'Rank_txt', 'Rank_plt',
       'Rank_tab', 'Data', 'Godzina', 'MAT', 'POL', 'HIST', 'Typ_uczenia'],
      dtype='object')

In [8]:
dfs = [df_all, df_sumA, df_sumB, df_sum1, df_sum2, df_sum3]
names = {
    1: 'Zsumowane wszystkie grafiki:',
    2: 'Zsumowane grafiki A:',
    3: 'Zsumowane grafiki B:',
    4: 'Zsumowane grafiki 1:',
    5: 'Zsumowane grafiki 2:',
    6: 'Zsumowane grafiki 3:',
}

In [9]:
# sex
var = 'Płeć'
for i, df in enumerate(dfs):
    print('-------------------------------------')
    print(names[i+1])
    mann_whitney_test([df['diff'][df[var] == 'M'], df['diff'][df[var] == 'K']], print_flag=True, alpha=ALPHA)

-------------------------------------
Zsumowane wszystkie grafiki:
U-statistic: 143.0
p-value: 0.1528053578197892
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki A:
U-statistic: 161.5
p-value: 0.34328943875180506
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki B:
U-statistic: 135.5
p-value: 0.09861224981647494
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 1:
U-statistic: 128.5
p-value: 0.05953075986836803
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 2:
U-statistic: 148.0
p-value: 0.18919273653641733
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 3:
U-statistic: 198.0
p-value: 0.9558883028260049
No reason to reject H0, accept H0.


In [10]:
# eye defects
var = 'Wady_wzroku'
for i, df in enumerate(dfs):
    print('-------------------------------------')
    print(names[i+1])
    mann_whitney_test([df['diff'][df[var] == 1], df['diff'][df[var] == 0]], print_flag=True, alpha=ALPHA)

-------------------------------------
Zsumowane wszystkie grafiki:
U-statistic: 223.0
p-value: 0.39745334029926815
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki A:
U-statistic: 197.0
p-value: 0.8977880121734001
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki B:
U-statistic: 237.0
p-value: 0.21259837851487695
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 1:
U-statistic: 215.5
p-value: 0.5107889940850671
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 2:
U-statistic: 222.5
p-value: 0.39773647561841163
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 3:
U-statistic: 206.5
p-value: 0.6960106723132915
No reason to reject H0, accept H0.


In [11]:
# people who wrote the extended matura exam
# Polish language
var = 'POL'
for i, df in enumerate(dfs):
    print('-------------------------------------')
    print(names[i+1])
    mann_whitney_test([df['diff'][df[var] == 'VAL'], df['diff'][df[var] == 'NO_VAL']], print_flag=True, alpha=ALPHA)

-------------------------------------
Zsumowane wszystkie grafiki:
U-statistic: 167.5
p-value: 0.37074835068742207
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki A:
U-statistic: 171.0
p-value: 0.29919196701555795
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki B:
U-statistic: 153.5
p-value: 0.6573159950447007
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 1:
U-statistic: 129.5
p-value: 0.7499844035445999
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 2:
U-statistic: 161.5
p-value: 0.47707165972839605
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 3:
U-statistic: 186.0
p-value: 0.1320455730740121
No reason to reject H0, accept H0.


In [12]:
# Mathematics
var = 'MAT'
for i, df in enumerate(dfs):
    print('-------------------------------------')
    print(names[i+1])
    mann_whitney_test([df['diff'][df[var] == 'VAL'], df['diff'][df[var] == 'NO_VAL']], print_flag=True, alpha=ALPHA)

-------------------------------------
Zsumowane wszystkie grafiki:
U-statistic: 187.0
p-value: 0.7742198640107305
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki A:
U-statistic: 198.0
p-value: 1.0
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki B:
U-statistic: 180.5
p-value: 0.6391392858005757
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 1:
U-statistic: 186.5
p-value: 0.7567837669083153
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 2:
U-statistic: 187.5
p-value: 0.7813299413851593
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 3:
U-statistic: 205.0
p-value: 0.858226662752887
No reason to reject H0, accept H0.


In [13]:
# History
var = 'HIST'
for i, df in enumerate(dfs):
    print('-------------------------------------')
    print(names[i+1])
    mann_whitney_test([df['diff'][df[var] == 'VAL'], df['diff'][df[var] == 'NO_VAL']], print_flag=True, alpha=ALPHA)

-------------------------------------
Zsumowane wszystkie grafiki:
U-statistic: 92.0
p-value: 0.3769942885834122
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki A:
U-statistic: 100.0
p-value: 0.19987230251814758
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki B:
U-statistic: 82.0
p-value: 0.6639008035149362
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 1:
U-statistic: 63.5
p-value: 0.7087609305285979
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 2:
U-statistic: 53.5
p-value: 0.40733946149215483
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 3:
U-statistic: 118.5
p-value: 0.03604845847024687
[31mThere are grounds to reject H0, accept H1.[0m


In [14]:
posthocNemenyi2_test(df_sum3, var)

+--------+----------+----------+
|        |   NO_VAL |      VAL |
| NO_VAL | 1        | [31m0.034076[0m |
+--------+----------+----------+
| VAL    | [31m0.034076[0m | 1        |
+--------+----------+----------+


In [15]:
# year of birth
var = 'Rok_urodzenia'
param = 'diff'
for i, df in enumerate(dfs):
    print('-------------------------------------')
    print(names[i+1])
    data = [df[param][df[var] == category] for category in pd.unique(df[var])]
    kruskal_wallis_test(data, print_flag=True, alpha=ALPHA)

-------------------------------------
Zsumowane wszystkie grafiki:
H: 3.4166517116250956
p-value: 0.4906642580694025
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki A:
H: 4.260339610538997
p-value: 0.371917750000771
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki B:
H: 2.3099202159795817
p-value: 0.6789635084273522
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 1:
H: 7.59297049569217
p-value: 0.10767887743137851
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 2:
H: 10.525382274492355
p-value: 0.03244914610404347
[31mThere are grounds to reject H0, accept H1.[0m
-------------------------------------
Zsumowane grafiki 3:
H: 2.951840292159447
p-value: 0.5659168934292141
No reason to reject H0, accept H0.


In [16]:
posthocNemenyi2_test(df_sum2, var)

+------+----------+----------+----------+----------+----------+
|      |     2002 |     2003 |     2001 |     2000 |     2004 |
| 2002 | 1        | 0.914119 | 0.748524 | 0.665974 | 0.066324 |
+------+----------+----------+----------+----------+----------+
| 2003 | 0.914119 | 1        | 0.984088 | 0.838973 | 0.189499 |
+------+----------+----------+----------+----------+----------+
| 2001 | 0.748524 | 0.984088 | 1        | 0.934406 | 0.461229 |
+------+----------+----------+----------+----------+----------+
| 2000 | 0.665974 | 0.838973 | 0.934406 | 1        | 0.999279 |
+------+----------+----------+----------+----------+----------+
| 2004 | 0.066324 | 0.189499 | 0.461229 | 0.999279 | 1        |
+------+----------+----------+----------+----------+----------+


In [17]:
# field of study
var = 'Kierunek_studiów'
param = 'diff'
for i, df in enumerate(dfs):
    print('-------------------------------------')
    print(names[i+1])
    data = [df[param][df[var] == category] for category in pd.unique(df[var])]
    kruskal_wallis_test(data, print_flag=True, alpha=ALPHA)

-------------------------------------
Zsumowane wszystkie grafiki:
H: 2.5027362992610716
p-value: 0.28611308345059944
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki A:
H: 3.3599698500651836
p-value: 0.18637678564220178
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki B:
H: 0.8429373672267334
p-value: 0.6560825342061488
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 1:
H: 0.5887171118824773
p-value: 0.7450093160309443
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 2:
H: 1.047612469437648
p-value: 0.5922619642960079
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 3:
H: 3.1191370302914057
p-value: 0.21022676129712384
No reason to reject H0, accept H0.


In [18]:
# year of study
var = 'Rok_studiów'
param = 'diff'
for i, df in enumerate(dfs):
    print('-------------------------------------')
    print(names[i+1])
    data = [df[param][df[var] == category] for category in pd.unique(df[var])]
    kruskal_wallis_test(data, print_flag=True, alpha=ALPHA)

-------------------------------------
Zsumowane wszystkie grafiki:
H: 3.1760447901049726
p-value: 0.2043292950644148
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki A:
H: 4.706034002229639
p-value: 0.09508186694981491
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki B:
H: 2.4638558442630805
p-value: 0.2917296032113091
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 1:
H: 1.9432665163293257
p-value: 0.37846440296929207
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 2:
H: 2.91261427660253
p-value: 0.2330954766542771
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 3:
H: 3.0356595092024667
p-value: 0.2191870608540109
No reason to reject H0, accept H0.


In [19]:
# specialization
var = 'Specjalność'
param = 'diff'
for i, df in enumerate(dfs):
    print('-------------------------------------')
    print(names[i+1])
    data = [df[param][df[var] == category] for category in pd.unique(df[var])]
    kruskal_wallis_test(data, print_flag=True, alpha=ALPHA)

-------------------------------------
Zsumowane wszystkie grafiki:
H: 2.0108374384236676
p-value: 0.5701604033777508
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki A:
H: 4.307132768361599
p-value: 0.23015235183000057
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki B:
H: 1.042499114844859
p-value: 0.790969930838497
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 1:
H: 0.8983470000335592
p-value: 0.82582669760532
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 2:
H: 2.6561373268133797
p-value: 0.44773283160484845
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 3:
H: 1.6402239902863025
p-value: 0.6503040781249882
No reason to reject H0, accept H0.


In [20]:
# hours of sleep
var = 'Ilość_snu'
param = 'diff'
for i, df in enumerate(dfs):
    print('-------------------------------------')
    print(names[i+1])
    data = [df[param][df[var] == category] for category in pd.unique(df[var])]
    kruskal_wallis_test(data, print_flag=True, alpha=ALPHA)

-------------------------------------
Zsumowane wszystkie grafiki:
H: 4.172812173458719
p-value: 0.3831231689897035
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki A:
H: 6.519485599146643
p-value: 0.1635667660747716
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki B:
H: 1.0571810222737408
p-value: 0.9010050334441351
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 1:
H: 4.267387426940692
p-value: 0.37102668025039726
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 2:
H: 1.3761805586426838
p-value: 0.8483242028540706
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 3:
H: 6.016921999907062
p-value: 0.19788808747849176
No reason to reject H0, accept H0.


In [21]:
# matura exam - Polish language - basic level
var = 'JP_podst'
param = 'diff'
for i, df in enumerate(dfs):
    print('-------------------------------------')
    print(names[i+1])
    data = [df[param][df[var] == category] for category in pd.unique(df[var])]
    kruskal_wallis_test(data, print_flag=True, alpha=ALPHA)

-------------------------------------
Zsumowane wszystkie grafiki:
H: 5.568155272719159
p-value: 0.13461853277974856
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki A:
H: 3.7015922646262496
p-value: 0.2955419642804597
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki B:
H: 4.140798173266982
p-value: 0.24665635348832698
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 1:
H: 1.1406757692512821
p-value: 0.7672649594961272
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 2:
H: 5.430865525672392
p-value: 0.14283259732888848
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 3:
H: 3.1870500270660456
p-value: 0.363675059983752
No reason to reject H0, accept H0.


In [22]:
# matura exam - Mathematics - basic level
var = 'MAT_podst'
param = 'diff'
for i, df in enumerate(dfs):
    print('-------------------------------------')
    print(names[i+1])
    data = [df[param][df[var] == category] for category in pd.unique(df[var])]
    kruskal_wallis_test(data, print_flag=True, alpha=ALPHA)

-------------------------------------
Zsumowane wszystkie grafiki:
H: 3.930572660098535
p-value: 0.26905927901740023
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki A:
H: 3.3767926988266104
p-value: 0.3370967437423226
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki B:
H: 3.2113207319428545
p-value: 0.36017706954147777
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 1:
H: 4.308481738605507
p-value: 0.23002274970820727
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 2:
H: 2.5822799511002366
p-value: 0.46060457258483767
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 3:
H: 1.306059480444788
p-value: 0.7276946979844789
No reason to reject H0, accept H0.


In [23]:
# matura exam - Polish language - extended level
var = 'JP_roz'
param = 'diff'
for i, df in enumerate(dfs):
    print('-------------------------------------')
    print(names[i+1])
    data = [df[param][df[var] == category] for category in pd.unique(df[var])]
    kruskal_wallis_test(data, print_flag=True, alpha=ALPHA)

-------------------------------------
Zsumowane wszystkie grafiki:
H: 4.328746225965354
p-value: 0.22808390213431287
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki A:
H: 3.111797114858893
p-value: 0.374707384833796
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki B:
H: 3.671954547153586
p-value: 0.29913537529937223
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 1:
H: 2.807647942773506
p-value: 0.42224247647945623
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 2:
H: 1.086377159081951
p-value: 0.7803636678451314
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 3:
H: 5.91081288343558
p-value: 0.11603092159336194
No reason to reject H0, accept H0.


In [24]:
# matura exam - Mathematics - extended level
var = 'MAT_roz'
param = 'diff'
for i, df in enumerate(dfs):
    print('-------------------------------------')
    print(names[i+1])
    data = [df[param][df[var] == category] for category in pd.unique(df[var])]
    kruskal_wallis_test(data, print_flag=True, alpha=ALPHA)

-------------------------------------
Zsumowane wszystkie grafiki:
H: 5.061371100164221
p-value: 0.28105820631988676
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki A:
H: 5.290286831812276
p-value: 0.2587875807612174
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki B:
H: 3.8392847946440107
p-value: 0.42819299796579535
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 1:
H: 4.792477445752438
p-value: 0.3092608567590565
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 2:
H: 3.100738386308093
p-value: 0.5411108824414991
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 3:
H: 1.7398757029652356
p-value: 0.783462049367232
No reason to reject H0, accept H0.


In [25]:
# matura exam - History - extended level
var = 'HIST_roz'
param = 'diff'
for i, df in enumerate(dfs):
    print('-------------------------------------')
    print(names[i+1])
    data = [df[param][df[var] == category] for category in pd.unique(df[var])]
    kruskal_wallis_test(data, print_flag=True, alpha=ALPHA)

-------------------------------------
Zsumowane wszystkie grafiki:
H: 2.360426929392467
p-value: 0.3072131524389508
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki A:
H: 2.548457192524996
p-value: 0.27964660547566594
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki B:
H: 1.9063988670014085
p-value: 0.38550564857167036
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 1:
H: 0.9359132374149152
p-value: 0.6262806919341646
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 2:
H: 3.052033414832965
p-value: 0.21739991236141346
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 3:
H: 4.896029045245403
p-value: 0.08646509066565125
No reason to reject H0, accept H0.


In [26]:
# well-being
var = 'Samopocz'
param = 'diff'
for i, df in enumerate(dfs):
    print('-------------------------------------')
    print(names[i+1])
    data = [df[param][df[var] == category] for category in pd.unique(df[var])]
    kruskal_wallis_test(data, print_flag=True, alpha=ALPHA)

-------------------------------------
Zsumowane wszystkie grafiki:
H: 5.857132594417089
p-value: 0.11877173846793515
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki A:
H: 7.094611038678855
p-value: 0.06894256035247275
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki B:
H: 3.896171301660884
p-value: 0.27289622392344237
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 1:
H: 0.12284099674683249
p-value: 0.9889621286520118
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 2:
H: 13.768515077424624
p-value: 0.003237790960749848
[31mThere are grounds to reject H0, accept H1.[0m
-------------------------------------
Zsumowane grafiki 3:
H: 11.339442740286295
p-value: 0.010025099161899045
[31mThere are grounds to reject H0, accept H1.[0m


In [27]:
posthocNemenyi2_test(df_sum2, var)
print('-----------------------')
posthocNemenyi2_test(df_sum3, var)

+----+----------+----------+----------+----------+
|    |        4 |        3 |        5 |        2 |
|  4 | 1        | [31m0.013977[0m | 0.866504 | 0.125671 |
+----+----------+----------+----------+----------+
|  3 | [31m0.013977[0m | 1        | 0.174463 | 0.963799 |
+----+----------+----------+----------+----------+
|  5 | 0.866504 | 0.174463 | 1        | 0.504105 |
+----+----------+----------+----------+----------+
|  2 | 0.125671 | 0.963799 | 0.504105 | 1        |
+----+----------+----------+----------+----------+
-----------------------
+----+----------+----------+----------+----------+
|    |        4 |        3 |        5 |        2 |
|  4 | 1        | 0.907294 | [31m0.011354[0m | 0.650858 |
+----+----------+----------+----------+----------+
|  3 | 0.907294 | 1        | 0.317472 | 0.971625 |
+----+----------+----------+----------+----------+
|  5 | [31m0.011354[0m | 0.317472 | 1        | 0.669576 |
+----+----------+----------+----------+----------+
|  2 | 0.650858 | 0.97

In [28]:
# stress
var = 'Stres'
param = 'diff'
for i, df in enumerate(dfs):
    print('-------------------------------------')
    print(names[i+1])
    data = [df[param][df[var] == category] for category in pd.unique(df[var])]
    kruskal_wallis_test(data, print_flag=True, alpha=ALPHA)

-------------------------------------
Zsumowane wszystkie grafiki:
H: 4.573050082101809
p-value: 0.333973292094693
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki A:
H: 4.0041720990873735
p-value: 0.4054415119607423
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki B:
H: 3.1660615424230887
p-value: 0.5304299662800964
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 1:
H: 0.6871029949357902
p-value: 0.9529098095149333
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 2:
H: 8.499160554197232
p-value: 0.07491267696507058
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 3:
H: 2.187353016359918
p-value: 0.7013453269492258
No reason to reject H0, accept H0.


In [29]:
# tiredness
var = 'Zmęcz'
param = 'diff'
for i, df in enumerate(dfs):
    print('-------------------------------------')
    print(names[i+1])
    data = [df[param][df[var] == category] for category in pd.unique(df[var])]
    kruskal_wallis_test(data, print_flag=True, alpha=ALPHA)

-------------------------------------
Zsumowane wszystkie grafiki:
H: 12.216923234811162
p-value: 0.015809001818138834
[31mThere are grounds to reject H0, accept H1.[0m
-------------------------------------
Zsumowane grafiki A:
H: 13.559343763581056
p-value: 0.008842746866751508
[31mThere are grounds to reject H0, accept H1.[0m
-------------------------------------
Zsumowane grafiki B:
H: 6.9963499420625865
p-value: 0.13608124001885877
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 1:
H: 5.456418318408968
p-value: 0.24358698824116382
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 2:
H: 3.1813977180114055
p-value: 0.5279407645298922
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 3:
H: 6.800190918967282
p-value: 0.1468315564869646
No reason to reject H0, accept H0.


In [30]:
posthocNemenyi2_test(df_all, var)
posthocNemenyi2_test(df_sumA, var)

+----+----------+----------+----------+----------+----------+
|    |        3 |        2 |        4 |        1 |        5 |
|  3 | 1        | 0.997688 | 0.286521 | 0.81744  | 0.986519 |
+----+----------+----------+----------+----------+----------+
|  2 | 0.997688 | 1        | 0.122895 | 0.919459 | 0.948749 |
+----+----------+----------+----------+----------+----------+
|  4 | 0.286521 | 0.122895 | 1        | [31m0.039471[0m | 0.921473 |
+----+----------+----------+----------+----------+----------+
|  1 | 0.81744  | 0.919459 | [31m0.039471[0m | 1        | 0.712216 |
+----+----------+----------+----------+----------+----------+
|  5 | 0.986519 | 0.948749 | 0.921473 | 0.712216 | 1        |
+----+----------+----------+----------+----------+----------+
+----+----------+----------+----------+----------+----------+
|    |        3 |        2 |        4 |        1 |        5 |
|  3 | 1        | 0.977702 | 0.365277 | 0.64931  | 0.986399 |
+----+----------+----------+----------+----------+--

In [31]:
# reading maps skills
var = 'Czyt_map'
param = 'diff'
for i, df in enumerate(dfs):
    print('-------------------------------------')
    print(names[i+1])
    data = [df[param][df[var] == category] for category in pd.unique(df[var])]
    kruskal_wallis_test(data, print_flag=True, alpha=ALPHA)

-------------------------------------
Zsumowane wszystkie grafiki:
H: 0.46507389162562046
p-value: 0.7925204684930018
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki A:
H: 0.4934159061277776
p-value: 0.7813688565931339
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki B:
H: 0.46742757821554387
p-value: 0.7915883446829433
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 1:
H: 4.070808934500467
p-value: 0.130627637216422
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 2:
H: 0.309978484107582
p-value: 0.8564243908015836
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 3:
H: 1.0687615030674826
p-value: 0.5860320772705431
No reason to reject H0, accept H0.


In [32]:
# divisibility of attention
var = 'Podziel_uwg'
param = 'diff'
for i, df in enumerate(dfs):
    print('-------------------------------------')
    print(names[i+1])
    data = [df[param][df[var] == category] for category in pd.unique(df[var])]
    kruskal_wallis_test(data, print_flag=True, alpha=ALPHA)

-------------------------------------
Zsumowane wszystkie grafiki:
H: 7.0765745953553845
p-value: 0.13189651989755047
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki A:
H: 4.314195691314323
p-value: 0.365150879133989
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki B:
H: 6.945082445879101
p-value: 0.13881898376755455
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 1:
H: 4.013376836799728
p-value: 0.4041985188094168
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 2:
H: 4.473394749097701
p-value: 0.3457138416204418
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 3:
H: 7.007946017747593
p-value: 0.1354689100986426
No reason to reject H0, accept H0.


In [33]:
# analytical thinking
var = 'Analit_myśl'
param = 'diff'
for i, df in enumerate(dfs):
    print('-------------------------------------')
    print(names[i+1])
    data = [df[param][df[var] == category] for category in pd.unique(df[var])]
    kruskal_wallis_test(data, print_flag=True, alpha=ALPHA)

-------------------------------------
Zsumowane wszystkie grafiki:
H: 2.838675843213051
p-value: 0.41717269483928365
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki A:
H: 1.8118770910920852
p-value: 0.6123537646450705
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki B:
H: 5.082227409177463
p-value: 0.16587412380439587
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 1:
H: 2.7703697498922084
p-value: 0.42840083909083393
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 2:
H: 0.10231575827766066
p-value: 0.9915580830634744
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 3:
H: 4.17039369722411
p-value: 0.24364280357259796
No reason to reject H0, accept H0.


In [34]:
# perceptiveness
var = 'Spostrzeg'
param = 'diff'
for i, df in enumerate(dfs):
    print('-------------------------------------')
    print(names[i+1])
    data = [df[param][df[var] == category] for category in pd.unique(df[var])]
    kruskal_wallis_test(data, print_flag=True, alpha=ALPHA)

-------------------------------------
Zsumowane wszystkie grafiki:
H: 4.735775285819684
p-value: 0.19219939593349217
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki A:
H: 5.931322727621361
p-value: 0.11499975196608873
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki B:
H: 3.244465639542453
p-value: 0.35544708446883766
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 1:
H: 2.8481624607969582
p-value: 0.4156327635309951
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 2:
H: 1.5490686492423287
p-value: 0.6709924815985111
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 3:
H: 10.451229319852947
p-value: 0.015095412780192365
[31mThere are grounds to reject H0, accept H1.[0m


In [35]:
posthocNemenyi2_test(df_sum3, var)

+----+----------+----------+----------+----------+
|    |        4 |        5 |        3 |        2 |
|  4 | 1        | 0.141555 | 0.999961 | 0.502265 |
+----+----------+----------+----------+----------+
|  5 | 0.141555 | 1        | 0.184509 | [31m0.027199[0m |
+----+----------+----------+----------+----------+
|  3 | 0.999961 | 0.184509 | 1        | 0.565824 |
+----+----------+----------+----------+----------+
|  2 | 0.502265 | [31m0.027199[0m | 0.565824 | 1        |
+----+----------+----------+----------+----------+


In [36]:
# reading with understanding
var = 'Czyt_ze_zroz'
param = 'diff'
for i, df in enumerate(dfs):
    print('-------------------------------------')
    print(names[i+1])
    data = [df[param][df[var] == category] for category in pd.unique(df[var])]
    kruskal_wallis_test(data, print_flag=True, alpha=ALPHA)

-------------------------------------
Zsumowane wszystkie grafiki:
H: 2.910394114542352
p-value: 0.5729309752769332
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki A:
H: 1.3710626362670004
p-value: 0.8492085493255179
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki B:
H: 5.052637045548233
p-value: 0.28193912294368495
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 1:
H: 5.955244058082035
p-value: 0.20251569921610246
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 2:
H: 3.8357632368350885
p-value: 0.42868892545586057
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 3:
H: 1.707288820436153
p-value: 0.78939317825957
No reason to reject H0, accept H0.


In [37]:
# type of learning: visual
var = 'Wzrokowiec'
param = 'diff'
for i, df in enumerate(dfs):
    print('-------------------------------------')
    print(names[i+1])
    data = [df[param][df[var] == category] for category in pd.unique(df[var])]
    kruskal_wallis_test(data, print_flag=True, alpha=ALPHA)

-------------------------------------
Zsumowane wszystkie grafiki:
H: 4.684517945109088
p-value: 0.19641048387003973
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki A:
H: 8.748407524678726
p-value: 0.032829895839042555
[31mThere are grounds to reject H0, accept H1.[0m
-------------------------------------
Zsumowane grafiki B:
H: 0.6903253692361342
p-value: 0.8754771266264871
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 1:
H: 4.1435767706821265
p-value: 0.2463719820310401
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 2:
H: 0.622368378158121
p-value: 0.8912930912867831
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 3:
H: 5.343033112401401
p-value: 0.14833428439008087
No reason to reject H0, accept H0.


In [38]:
posthocNemenyi2_test(df_sumA, var)

+----+----------+----------+----------+----------+
|    |        4 |        3 |        5 |        2 |
|  4 | 1        | 0.524298 | 0.339003 | 0.481383 |
+----+----------+----------+----------+----------+
|  3 | 0.524298 | 1        | 0.999562 | 0.146025 |
+----+----------+----------+----------+----------+
|  5 | 0.339003 | 0.999562 | 1        | 0.107862 |
+----+----------+----------+----------+----------+
|  2 | 0.481383 | 0.146025 | 0.107862 | 1        |
+----+----------+----------+----------+----------+


In [39]:
# type of learning: auditory
var = 'Słuchowiec'
param = 'diff'
for i, df in enumerate(dfs):
    print('-------------------------------------')
    print(names[i+1])
    data = [df[param][df[var] == category] for category in pd.unique(df[var])]
    kruskal_wallis_test(data, print_flag=True, alpha=ALPHA)

-------------------------------------
Zsumowane wszystkie grafiki:
H: 4.314140570021123
p-value: 0.3651577553065407
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki A:
H: 2.2611985472155256
p-value: 0.6878420923355089
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki B:
H: 6.886481094465612
p-value: 0.14201017286940953
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 1:
H: 3.3811949319420016
p-value: 0.4961712296765247
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 2:
H: 5.422673244848071
p-value: 0.24661064179628575
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 3:
H: 1.260205035330874
p-value: 0.868090236562957
No reason to reject H0, accept H0.


In [40]:
# type of learning: kinesthetic
var = 'Kinestetyk'
param = 'diff'
for i, df in enumerate(dfs):
    print('-------------------------------------')
    print(names[i+1])
    data = [df[param][df[var] == category] for category in pd.unique(df[var])]
    kruskal_wallis_test(data, print_flag=True, alpha=ALPHA)

-------------------------------------
Zsumowane wszystkie grafiki:
H: 4.962024292475618
p-value: 0.2912162827694289
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki A:
H: 9.128198737121961
p-value: 0.057973791835403965
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki B:
H: 2.9011367853437333
p-value: 0.5745038972373079
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 1:
H: 0.4117657909325229
p-value: 0.9815023180959477
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 2:
H: 3.017879668248743
p-value: 0.5548377518135508
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 3:
H: 7.188967111000838
p-value: 0.12623283269593288
No reason to reject H0, accept H0.


In [41]:
# dominant type of learning
var = 'Typ_uczenia'
param = 'diff'
for i, df in enumerate(dfs):
    print('-------------------------------------')
    print(names[i+1])
    data = [df[param][df[var] == category] for category in pd.unique(df[var])]
    kruskal_wallis_test(data, print_flag=True, alpha=ALPHA)

-------------------------------------
Zsumowane wszystkie grafiki:
H: 1.6168383340797354
p-value: 0.6555778958722058
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki A:
H: 1.9000829678795972
p-value: 0.5934015337211782
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki B:
H: 2.2907712700586482
p-value: 0.5142911918238899
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 1:
H: 1.4502510770244166
p-value: 0.6937990125656261
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 2:
H: 2.197418981996001
p-value: 0.532456930184467
No reason to reject H0, accept H0.
-------------------------------------
Zsumowane grafiki 3:
H: 0.33805075292805103
p-value: 0.9527202618718195
No reason to reject H0, accept H0.


### Summary:

Significant statistically:

- matura exam, history, wrote / did not write (3)
- year of birth (2 - weakly differentiates)
- well-being (2 - 3 vs 4, 3 - 5 vs 4)
- perceptiveness (3 - 2 vs 5)
- type of learning: visual (1 - weakly differentiates)

Each graphic separately:

In [42]:
columns = ['Płeć', 'Rok_urodzenia', 'Kierunek_studiów',
       'Rok_studiów', 'Specjalność', 'Wady_wzroku', 'Ilość_snu',
       'JP_podst', 'MAT_podst', 'JP_roz', 'MAT_roz', 'HIST_roz',
       'Samopocz', 'Stres', 'Zmęcz', 'Czyt_map', 'Podziel_uwg', 'Analit_myśl',
       'Spostrzeg', 'Czyt_ze_zroz', 'Wzrokowiec', 'Słuchowiec', 'Kinestetyk', 'MAT', 'POL', 'HIST', 'Typ_uczenia']

prefix = ['1a', '1b', '2a', '2b', '3a', '3b']

In [43]:
for var in columns:
    print('##############################################################')
    print(var)
    for pref in prefix:
        print('-------------------------------------')
        print(pref)
        param = f'{pref}_trud'
        data = [df_merge[param][df_merge[var] == category] for category in pd.unique(df_merge[var])]
        
        if var in ['Płeć', 'Wady_wzroku', 'MAT', 'POL', 'HIST']:
            mann_whitney_test(data, print_flag=True, alpha=ALPHA)
        else:
            kruskal_wallis_test(data, print_flag=True, alpha=ALPHA)

##############################################################
Płeć
-------------------------------------
1a
U-statistic: 170.0
p-value: 0.472984485873845
No reason to reject H0, accept H0.
-------------------------------------
1b
U-statistic: 123.0
p-value: 0.03300139133560838
[31mThere are grounds to reject H0, accept H1.[0m
-------------------------------------
2a
U-statistic: 144.0
p-value: 0.1450988351231113
No reason to reject H0, accept H0.
-------------------------------------
2b
U-statistic: 172.5
p-value: 0.5195266959385567
No reason to reject H0, accept H0.
-------------------------------------
3a
U-statistic: 222.5
p-value: 0.4530956659255446
No reason to reject H0, accept H0.
-------------------------------------
3b
U-statistic: 167.5
p-value: 0.4357450402677764
No reason to reject H0, accept H0.
##############################################################
Rok_urodzenia
-------------------------------------
1a
H: 3.199817612214409
p-value: 0.5249604060568736
No reason 

In [44]:
df_merge[['Spostrzeg', 'SumCorr_3', 'Trud_all']].groupby('Spostrzeg').mean()

Unnamed: 0_level_0,SumCorr_3,Trud_all
Spostrzeg,Unnamed: 1_level_1,Unnamed: 2_level_1
2,1.25,17.0
3,0.909091,15.0
4,1.117647,14.882353
5,0.75,12.625


In [45]:
posthocNemenyi2_test(df_merge, 'Rok_urodzenia', '2b_trud')
print()
posthocNemenyi2_test(df_merge, 'JP_roz', '3b_trud')
print()
posthocNemenyi2_test(df_merge, 'MAT_roz', '1b_trud')
print()
posthocNemenyi2_test(df_merge, 'HIST_roz', '3a_trud')

+------+----------+----------+----------+----------+----------+
|      |     2002 |     2003 |     2001 |     2000 |     2004 |
| 2002 | 1        | 0.889256 | 0.488802 | 0.575202 | 0.113956 |
+------+----------+----------+----------+----------+----------+
| 2003 | 0.889256 | 1        | 0.894945 | 0.779299 | 0.312899 |
+------+----------+----------+----------+----------+----------+
| 2001 | 0.488802 | 0.894945 | 1        | 0.945276 | 0.765875 |
+------+----------+----------+----------+----------+----------+
| 2000 | 0.575202 | 0.779299 | 0.945276 | 1        | 1        |
+------+----------+----------+----------+----------+----------+
| 2004 | 0.113956 | 0.312899 | 0.765875 | 1        | 1        |
+------+----------+----------+----------+----------+----------+

+----+----------+----------+----------+----------+
|    |        4 |        3 |        0 |        2 |
|  4 | 1        | 0.644832 | 0.969223 | 0.757358 |
+----+----------+----------+----------+----------+
|  3 | 0.644832 | 1        

In [46]:
posthocNemenyi2_test(df_merge, 'Samopocz', '2a_trud')
print()
posthocNemenyi2_test(df_merge, 'Samopocz', '2b_trud')
print()
posthocNemenyi2_test(df_merge, 'Samopocz', '3a_trud')

+----+----------+----------+----------+----------+
|    |        4 |        3 |        5 |        2 |
|  4 | 1        | [31m0.046144[0m | 0.860614 | 0.717672 |
+----+----------+----------+----------+----------+
|  3 | [31m0.046144[0m | 1        | 0.337881 | 0.688035 |
+----+----------+----------+----------+----------+
|  5 | 0.860614 | 0.337881 | 1        | 0.98177  |
+----+----------+----------+----------+----------+
|  2 | 0.717672 | 0.688035 | 0.98177  | 1        |
+----+----------+----------+----------+----------+

+----+----------+----------+----------+----------+
|    |        4 |        3 |        5 |        2 |
|  4 | 1        | 0.300598 | 0.983318 | 0.084164 |
+----+----------+----------+----------+----------+
|  3 | 0.300598 | 1        | 0.591272 | 0.933431 |
+----+----------+----------+----------+----------+
|  5 | 0.983318 | 0.591272 | 1        | 0.252931 |
+----+----------+----------+----------+----------+
|  2 | 0.084164 | 0.933431 | 0.252931 | 1        |
+----+------

In [47]:
posthocNemenyi2_test(df_merge, 'Stres', '2b_trud')
print()
posthocNemenyi2_test(df_merge, 'Spostrzeg', '3a_trud')
print()
posthocNemenyi2_test(df_merge, 'Słuchowiec', '2b_trud')

+----+----------+----------+----------+----------+----------+
|    |        2 |        1 |        3 |        4 |        5 |
|  2 | 1        | 0.707825 | 0.982759 | 0.237241 | 0.928157 |
+----+----------+----------+----------+----------+----------+
|  1 | 0.707825 | 1        | 0.967318 | [31m0.028951[0m | 0.581118 |
+----+----------+----------+----------+----------+----------+
|  3 | 0.982759 | 0.967318 | 1        | 0.13391  | 0.819844 |
+----+----------+----------+----------+----------+----------+
|  4 | 0.237241 | [31m0.028951[0m | 0.13391  | 1        | 0.984722 |
+----+----------+----------+----------+----------+----------+
|  5 | 0.928157 | 0.581118 | 0.819844 | 0.984722 | 1        |
+----+----------+----------+----------+----------+----------+

+----+----------+----------+----------+----------+
|    |        4 |        5 |        3 |        2 |
|  4 | 1        | 0.224518 | 0.964116 | 0.212392 |
+----+----------+----------+----------+----------+
|  5 | 0.224518 | 1        | 0.13

### Summary:
Statistically significant:
- sex (1b)
- year of birth (2b) - weakly differentiates
- JP_roz (3b) - weakly differentiates
- MAT_roz (1b) - 1 vs 4 !!!
- HIST_roz (3a) - weakly differentiates
- Well-being (2a - 3 vs 4, 2b - weakly, 3a - 5 vs 4)
- Stress (2b) - 1 vs 4
- Perceptiveness (3a) - 2 vs 5
- Auditory (2b) - weakly differentiates
- POL - wrote/did not write (2a)
- HIST - wrote/did not write (3a)