In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from scipy.stats import kruskal, mannwhitneyu

from itertools import product
 

In [2]:
df = pd.read_csv('data/IST_corrected_clean.csv',  index_col= [0])

In [4]:
def print_stats(stat, p):
    print('stat = %.3f, p = %.3f' % (stat, p))
    if p > 0.05:
	    print('Probably the same distribution')
    else:
	    print('Probably different distributions')

# Hypothesis testing

+ aspirin vs non-aspirin
+ heparin vs non-heparin
    + non vs low vs medium heparin
+ combination of aspirin and heparin
    + (significantly) different outcome than separate effects of aspirin and heparin

## Aspirin vs  No aspirin



### Outcome after 14 days

The endpoint after 14 days is 'DDEAD'. It includes information whether a patient has passed away within 14 days or not.

Group patients and their status concerning Aspirin treatment (Y/N) - and their outcome after 14 days.

In [5]:
list_asp14 = df.groupby('DASP14')['DDEAD'].apply(list)

In [6]:
list_asp14

DASP14
N    [N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, ...
Y    [N, N, N, N, N, N, Y, N, N, N, N, N, N, N, N, ...
Name: DDEAD, dtype: object

In [7]:
stat, p = kruskal(list_asp14[0], list_asp14[1])
print('Kruskal Wallis')
print_stats(stat, p)

Kruskal Wallis
stat = 10.229, p = 0.001
Probably different distributions


In [8]:
group_asp = df.groupby('DASP14')
group_asp['DDEAD'].value_counts()

DASP14  DDEAD
N       N        8318
        Y        1102
Y       N        7429
        Y         843
Name: DDEAD, dtype: int64

In [9]:
asp14_nn = group_asp['DDEAD'].value_counts()[0]
asp14_ny = group_asp['DDEAD'].value_counts()[1]
asp14_yn = group_asp['DDEAD'].value_counts()[2]
asp14_yy = group_asp['DDEAD'].value_counts()[3]

In [10]:
print(f"--- No aspirin during study ---")
print(f'{round((asp14_nn / (asp14_nn + asp14_ny) * 100),1)} % of patients living 14 days after their stroke')
print()
print(f"--- Aspirin during study ---")
print(f'{round((asp14_yn/ (asp14_yn + asp14_yy) * 100),1)} % of patients living 14 days after their stroke')

--- No aspirin during study ---
88.3 % of patients living 14 days after their stroke

--- Aspirin during study ---
89.8 % of patients living 14 days after their stroke


When comparing the groups by implementing the Kruskal Wallis Test, we see that there is a significant difference between patients who were given Aspirin in comparison to those who didn't, after 14 days. More patients that did not receive Aspirin passed away compared to those who received Aspirin.


### Outcome after 6 months

There are two endpoints that are interesting after 6 months, that is 'FRECOVER', which is the information of full recovery, as well as 'FDEAD', which is information on patients who have passed away.

In [11]:
# FRECOVER (Y) is not equal to "recovered" in OCCODE
r_group = df.groupby('FRECOVER')
r_group.OCCODE.value_counts()

FRECOVER  OCCODE       
N         Dependent        6594
          Not recovered    3562
          Dead                1
Y         Recovered        2999
          Dependent         374
Name: OCCODE, dtype: int64

FRECOVER (Y) is not equal to "recovered" in OCCODE. So we create a new variable based on OCCODE.

In [12]:
df['RECO'] = (df.OCCODE == 'Recovered')

In [13]:
list_asp6 = df.groupby('DASP14')['RECO'].apply(list)
print(list_asp6)
print()
stat, p = kruskal(list_asp6[0], list_asp6[1])
print('Kruskal Wallis')
print_stats(stat, p)

DASP14
N    [False, True, False, False, False, True, False...
Y    [False, False, False, False, False, False, Fal...
Name: RECO, dtype: object

Kruskal Wallis
stat = 17.712, p = 0.000
Probably different distributions


In [14]:
group_asp['RECO'].value_counts()

DASP14  RECO 
N       False    7928
        True     1492
Y       False    6765
        True     1507
Name: RECO, dtype: int64

In [15]:
asp6_nf = group_asp['RECO'].value_counts()[0]
asp6_nt = group_asp['RECO'].value_counts()[1]
asp6_yf = group_asp['RECO'].value_counts()[2]
asp6_yt = group_asp['RECO'].value_counts()[3]

In [16]:
print(f"--- No aspirin during study ---")
print(f'{round((asp6_nf / (asp6_nf + asp6_nt) * 100),1)} % of patients living 14 days after their stroke')
print()
print(f"--- Aspirin during study ---")
print(f'{round((asp6_yf / (asp6_yf + asp6_yt) * 100),1)} % of patients living 14 days after their stroke')

--- No aspirin during study ---
84.2 % of patients living 14 days after their stroke

--- Aspirin during study ---
81.8 % of patients living 14 days after their stroke


Patients who did not receive aspirin during the study were more likely to recover within 6 months after their stroke. In slight contrast, there were more patients alive after 6 months who did receive aspirin.

In [17]:
list_asp6 = df.groupby('DASP14')['FDEAD'].apply(list)
print(list_asp6)
print()
stat, p = kruskal(list_asp6[0], list_asp6[1])
print('Kruskal Wallis')
print_stats(stat, p)

DASP14
N    [N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, ...
Y    [N, N, N, N, N, N, Y, N, N, N, N, N, N, N, N, ...
Name: FDEAD, dtype: object

Kruskal Wallis
stat = 20.170, p = 0.000
Probably different distributions


In [18]:
group_asp['FDEAD'].value_counts()

DASP14  FDEAD
N       N        7077
        Y        2343
Y       N        6452
        Y        1820
Name: FDEAD, dtype: int64

In [19]:
asp6_nn = group_asp['FDEAD'].value_counts()[0]
asp6_ny = group_asp['FDEAD'].value_counts()[1]
asp6_yn = group_asp['FDEAD'].value_counts()[2]
asp6_yy = group_asp['FDEAD'].value_counts()[3]

In [20]:
print(f"--- No aspirin during study ---")
print(f'{round((asp6_nn / (asp6_nn + asp6_ny) * 100),1)} % of patients living 6 months after their stroke')
print()
print(f"--- Aspirin during study ---")
print(f'{round((asp6_yn/ (asp6_yn + asp6_yy) * 100),1)} % of patients living 6 months after their stroke')

--- No aspirin during study ---
75.1 % of patients living 6 months after their stroke

--- Aspirin during study ---
78.0 % of patients living 6 months after their stroke


## Heparin vs non-heparin


### Outcome after 14 days

In [21]:
list_hep14 = df.groupby(by = ['RXHEP14'])['DDEAD'].apply(list)

In [22]:
list_hep14

RXHEP14
N    [N, N, N, N, N, N, Y, N, N, N, N, N, N, N, N, ...
Y    [N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, ...
Name: DDEAD, dtype: object

In [23]:
stat, p = kruskal(list_hep14[0], list_hep14[1])
print_stats(stat, p)

stat = 0.657, p = 0.418
Probably the same distribution


After 14 days: No difference between heparin and no heparin.

### Outcome after 6 months

In [24]:
list_hep6 = df.groupby(by = ['RXHEP14'])['RECO'].apply(list)
stat, p = kruskal(list_hep6[0] , list_hep6[1])
print_stats(stat, p)

stat = 5.983, p = 0.014
Probably different distributions


In [25]:
group_hep = df.groupby('RXHEP14')
group_hep['RECO'].value_counts()

RXHEP14  RECO 
N        False    8153
         True     1591
Y        False    6540
         True     1408
Name: RECO, dtype: int64

In [26]:
hep6_nf = group_hep['RECO'].value_counts()[0]
hep6_nt = group_hep['RECO'].value_counts()[1]
hep6_yf = group_hep['RECO'].value_counts()[2]
hep6_yt = group_hep['RECO'].value_counts()[3]

In [27]:
print(f"--- No heparin during study ---")
print(f'{round((hep6_nf / (hep6_nf + hep6_nt) * 100),1)} % of patients living 6 months after their stroke')
print()
print(f"--- Heparin during study ---")
print(f'{round((hep6_yf / (hep6_yf + hep6_yt) * 100),1)} % of patients living 6 months after their stroke')

--- No heparin during study ---
83.7 % of patients living 6 months after their stroke

--- Heparin during study ---
82.3 % of patients living 6 months after their stroke


Patients who did not receive heparin during the study were more likely to be alive after 6 months. Though, the difference seems to be minor. Heparin treatment did not have an effect on the outcome 'FDEAD'.

In [28]:
list_hep6 = df.groupby(by = ['RXHEP14'])['FDEAD'].apply(list)
stat, p = kruskal(list_hep6[0] , list_hep6[1])
print_stats(stat, p)

stat = 0.683, p = 0.409
Probably the same distribution


## No vs low vs medium heparin


## Outcome after 14 days

In [29]:
list2_hep14 = df.groupby(by = ['HEP14'])['DDEAD'].apply(list)

In [30]:
list2_hep14 

HEP14
L    [N, N, N, N, N, N, N, N, Y, N, N, N, N, N, N, ...
M    [N, N, N, N, N, N, N, N, Y, N, N, N, N, N, N, ...
N    [N, N, N, N, N, N, Y, N, N, N, N, N, N, N, N, ...
Name: DDEAD, dtype: object

In [31]:
stat, p = kruskal(list2_hep14[0], list2_hep14[1], list2_hep14[2])
print_stats(stat, p)

stat = 1.507, p = 0.471
Probably the same distribution


After 14 days: No difference between low, medium and no heparin.

### Outcome after 6 months

In [32]:
list2_hep6 = df.groupby(by = ['HEP14'])['RECO'].apply(list)
stat, p = kruskal(list2_hep6[0], list2_hep6[1], list2_hep6[2])
print_stats(stat, p)

stat = 6.073, p = 0.048
Probably different distributions


In [33]:
group_hep2 = df.groupby('HEP14')
group_hep2['RECO'].value_counts()

HEP14  RECO 
L      False    3354
       True      716
M      False    3186
       True      692
N      False    8153
       True     1591
Name: RECO, dtype: int64

In [34]:
hep6_lf = group_hep2['RECO'].value_counts()[0]
hep6_lt = group_hep2['RECO'].value_counts()[1]
hep6_mf = group_hep2['RECO'].value_counts()[2]
hep6_mt = group_hep2['RECO'].value_counts()[3]
hep6_nf = group_hep2['RECO'].value_counts()[4]
hep6_nt = group_hep2['RECO'].value_counts()[5]

In [35]:
print(f"--- No heparin during study ---")
print(f'{round((hep6_nf / (hep6_nf + hep6_nt) * 100),1)} % of patients living 6 months after their stroke')
print()
print(f"--- Low heparin during study ---")
print(f'{round((hep6_lf / (hep6_lf + hep6_lt) * 100),1)} % of patients living 6 months after their stroke')
print()
print(f"--- Medium heparin during study ---")
print(f'{round((hep6_mf / (hep6_mf + hep6_mt) * 100),1)} % of patients living 6 months after their stroke')

--- No heparin during study ---
83.7 % of patients living 6 months after their stroke

--- Low heparin during study ---
82.4 % of patients living 6 months after their stroke

--- Medium heparin during study ---
82.2 % of patients living 6 months after their stroke


Slightly more patients were alive after 6 months when they received neither low nor medium heparin. Again, the different heparin treatments did not have an effect on outcome 'FDEAD'.

In [36]:
list2_hep6 = df.groupby(by = ['HEP14'])['FDEAD'].apply(list)
stat, p = kruskal(list2_hep6[0], list2_hep6[1], list2_hep6[2])
print_stats(stat, p)

stat = 0.724, p = 0.696
Probably the same distribution


## Combination of aspirin and heparin
(significantly) different outcome than separate effects of aspirin and heparin

In [37]:
df.TREAT14.unique()

array(['Aspirin', 'Low Heparin', 'Medium Heparin',
       'Aspirin + Medium Heparin', 'Control', 'Aspirin + Low Heparin'],
      dtype=object)

### Outcome after 14 days

In [38]:
list_treat14 = df.groupby(by = ['TREAT14'])['DDEAD'].apply(list)

In [39]:
list_treat14

TREAT14
Aspirin                     [N, N, Y, N, N, N, N, N, N, N, N, Y, N, N, N, ...
Aspirin + Low Heparin       [N, N, N, N, N, N, N, N, N, N, N, N, N, N, Y, ...
Aspirin + Medium Heparin    [N, N, N, N, N, N, N, N, N, N, N, Y, N, N, N, ...
Control                     [N, N, N, N, N, N, N, N, N, N, N, Y, N, N, N, ...
Low Heparin                 [N, N, N, Y, N, N, N, N, N, N, N, N, N, N, Y, ...
Medium Heparin              [N, N, N, Y, N, N, N, N, N, N, N, N, Y, N, N, ...
Name: DDEAD, dtype: object

In [40]:
stat, p = kruskal(list_treat14[0], list_treat14[1], list_treat14[2], list_treat14[3],list_treat14[4], list_treat14[5])
print_stats(stat, p)

stat = 13.481, p = 0.019
Probably different distributions


In [41]:
iterator = product(enumerate(list_treat14), enumerate(list_treat14))
for (first_idx, first_group), (second_idx, second_group) in iterator:
    print(first_idx, second_idx)
    stat, p = mannwhitneyu(first_group, second_group)
    print_stats(stat, p)
    print()

0 0
stat = 9741698.000, p = 0.500
Probably the same distribution

0 1
stat = 4385251.000, p = 0.331
Probably the same distribution

0 2
stat = 4078360.000, p = 0.153
Probably the same distribution

0 3
stat = 11530134.000, p = 0.001
Probably different distributions

0 4
stat = 4531618.000, p = 0.089
Probably the same distribution

0 5
stat = 4392024.000, p = 0.074
Probably the same distribution

1 0
stat = 4385251.000, p = 0.331
Probably the same distribution

1 1
stat = 1988018.000, p = 0.500
Probably the same distribution

1 2
stat = 1835828.000, p = 0.108
Probably the same distribution

1 3
stat = 5189949.000, p = 0.003
Probably different distributions

1 4
stat = 2039840.000, p = 0.066
Probably the same distribution

1 5
stat = 1976997.000, p = 0.056
Probably the same distribution

2 0
stat = 4078360.000, p = 0.153
Probably the same distribution

2 1
stat = 1835828.000, p = 0.108
Probably the same distribution

2 2
stat = 1737248.000, p = 0.500
Probably the same distribution

2 3
s

In [42]:
group_treat = df.groupby(by = ['TREAT14'])
group_treat['DDEAD'].value_counts()

TREAT14                   DDEAD
Aspirin                   N        3969
                          Y         445
Aspirin + Low Heparin     N        1800
                          Y         194
Aspirin + Medium Heparin  N        1660
                          Y         204
Control                   N        4687
                          Y         643
Low Heparin               N        1844
                          Y         232
Medium Heparin            N        1787
                          Y         227
Name: DDEAD, dtype: int64

In [43]:
treat14_a_n = group_treat['DDEAD'].value_counts()[0]
treat14_a_y = group_treat['DDEAD'].value_counts()[1]

treat14_alh_n = group_treat['DDEAD'].value_counts()[2]
treat14_alh_y = group_treat['DDEAD'].value_counts()[3]

treat14_amh_n = group_treat['DDEAD'].value_counts()[4]
treat14_amh_y = group_treat['DDEAD'].value_counts()[5]

treat14_c_n = group_treat['DDEAD'].value_counts()[6]
treat14_c_y = group_treat['DDEAD'].value_counts()[7]

treat14_lh_n = group_treat['DDEAD'].value_counts()[8]
treat14_lh_y = group_treat['DDEAD'].value_counts()[9]

treat14_mh_n = group_treat['DDEAD'].value_counts()[10]
treat14_mh_y = group_treat['DDEAD'].value_counts()[11]


In [44]:
print(f"--- Aspirin during study ---")
print(f'{round((treat14_a_n / (treat14_a_n + treat14_a_y) * 100),1)} % of patients living 14 days after their stroke')
print(f'-- Significantly different compared to the control group (p < 0.05)')
print()
print(f"--- Aspirin + Low Heparin during study ---")
print(f'{round((treat14_alh_n / (treat14_alh_n + treat14_alh_y) * 100),1)} % of patients living 14 days after their stroke')
print(f'-- Significantly different compared to both Heparin groups and the control group (p < 0.05)')
print()
print(f"--- Aspirin + Medium Heparin during study ---")
print(f'{round((treat14_amh_n / (treat14_amh_n + treat14_amh_y) * 100),1)} % of patients living 14 days after their stroke')
print()
print(f"--- Low Heparin during study ---")
print(f'{round((treat14_lh_n / (treat14_lh_n + treat14_lh_y) * 100),1)} % of patients living 14 days after their stroke')
print()
print(f"--- Medium Heparin during study ---")
print(f'{round((treat14_mh_n / (treat14_mh_n + treat14_mh_y) * 100),1)} % of patients living 14 days after their stroke')
print()
print(f"--- Control (neither Aspirin nor Heparin during study) ---")
print(f'{round((treat14_c_n / (treat14_c_n + treat14_c_y) * 100),1)} % of patients living 14 days after their stroke')
print()

--- Aspirin during study ---
89.9 % of patients living 14 days after their stroke
-- Significantly different compared to the control group (p < 0.05)

--- Aspirin + Low Heparin during study ---
90.3 % of patients living 14 days after their stroke
-- Significantly different compared to both Heparin groups and the control group (p < 0.05)

--- Aspirin + Medium Heparin during study ---
89.1 % of patients living 14 days after their stroke

--- Low Heparin during study ---
88.8 % of patients living 14 days after their stroke

--- Medium Heparin during study ---
88.7 % of patients living 14 days after their stroke

--- Control (neither Aspirin nor Heparin during study) ---
87.9 % of patients living 14 days after their stroke



### Outcome after 6 months

In [45]:
list_treat6 = df.groupby(by = ['TREAT14'])['RECO'].apply(list)

In [46]:
stat, p = kruskal(list_treat6[0], list_treat6[1], list_treat6[2], list_treat6[3],list_treat6[4], list_treat6[5])
print_stats(stat, p)

stat = 26.771, p = 0.000
Probably different distributions


In [47]:
iterator = product(enumerate(list_treat6), enumerate(list_treat6))
for (first_idx, first_group), (second_idx, second_group) in iterator:
    print(first_idx, second_idx)
    stat, p = mannwhitneyu(first_group, second_group)
    print_stats(stat, p)
    print()

0 0
stat = 9741698.000, p = 0.500
Probably the same distribution

0 1
stat = 4395471.000, p = 0.454
Probably the same distribution

0 2
stat = 4091412.000, p = 0.305
Probably the same distribution

0 3
stat = 11377047.000, p = 0.000
Probably different distributions

0 4
stat = 4539231.000, p = 0.181
Probably the same distribution

0 5
stat = 4398506.000, p = 0.155
Probably the same distribution

1 0
stat = 4395471.000, p = 0.454
Probably the same distribution

1 1
stat = 1988018.000, p = 0.500
Probably the same distribution

1 2
stat = 1846040.000, p = 0.297
Probably the same distribution

1 3
stat = 5145902.000, p = 0.000
Probably different distributions

1 4
stat = 2053059.000, p = 0.249
Probably the same distribution

1 5
stat = 1989413.000, p = 0.221
Probably the same distribution

2 0
stat = 4091412.000, p = 0.305
Probably the same distribution

2 1
stat = 1846040.000, p = 0.297
Probably the same distribution

2 2
stat = 1737248.000, p = 0.500
Probably the same distribution

2 3
s

In [48]:
group_treat['RECO'].value_counts()

TREAT14                   RECO 
Aspirin                   False    3614
                          True      800
Aspirin + Low Heparin     False    1635
                          True      359
Aspirin + Medium Heparin  False    1516
                          True      348
Control                   False    4539
                          True      791
Low Heparin               False    1719
                          True      357
Medium Heparin            False    1670
                          True      344
Name: RECO, dtype: int64

In [49]:
treat6_a_f = group_treat['RECO'].value_counts()[0]
treat6_a_t = group_treat['RECO'].value_counts()[1]

treat6_alh_f = group_treat['RECO'].value_counts()[2]
treat6_alh_t = group_treat['RECO'].value_counts()[3]

treat6_amh_f = group_treat['RECO'].value_counts()[4]
treat6_amh_t = group_treat['RECO'].value_counts()[5]

treat6_c_f = group_treat['RECO'].value_counts()[6]
treat6_c_t = group_treat['RECO'].value_counts()[7]

treat6_lh_f = group_treat['RECO'].value_counts()[8]
treat6_lh_t = group_treat['RECO'].value_counts()[9]

treat6_mh_f = group_treat['RECO'].value_counts()[10]
treat6_mh_t = group_treat['RECO'].value_counts()[11]

In [50]:
print(f"--- Aspirin during study ---")
print(f'{round((treat6_a_f / (treat6_a_f + treat6_a_t) * 100),1)} % of patients living 6 months after their stroke')
print(f'-- Significantly different compared to control (p < 0.01)')
print()
print(f"--- Aspirin + Low Heparin during study ---")
print(f'{round((treat6_alh_f / (treat6_alh_f + treat6_alh_t) * 100),1)} % of patients living 6 months after their stroke')
print(f'-- Significantly different compared to control (p < 0.01)')
print()
print(f"--- Aspirin + Medium Heparin during study ---")
print(f'{round((treat6_amh_f / (treat6_amh_f + treat6_amh_t) * 100),1)} % of patients living 6 months after their stroke')
print(f'-- Significantly different compared to control (p < 0.01)')
print()
print(f"--- Low Heparin during study ---")
print(f'{round((treat6_lh_f / (treat6_lh_f + treat6_lh_t) * 100),1)} % of patients living 6 months after their stroke')
print(f'-- Significantly different compared to control (p < 0.01)')
print()
print(f"--- Medium Heparin during study ---")
print(f'{round((treat6_mh_f / (treat6_mh_f + treat6_mh_t) * 100),1)} % of patients living 6 monthsafter their stroke')
print(f'-- Significantly different compared to control (p < 0.01)')
print()
print(f"--- Control (neither Aspirin nor Heparin during study) ---")
print(f'{round((treat6_c_f / (treat6_c_f + treat6_c_t) * 100),1)} % of patients living 6 months after their stroke')
print(f'-- Significantly different compared to all treatments (p < 0.01)')
print()

--- Aspirin during study ---
81.9 % of patients living 6 months after their stroke
-- Significantly different compared to control (p < 0.01)

--- Aspirin + Low Heparin during study ---
82.0 % of patients living 6 months after their stroke
-- Significantly different compared to control (p < 0.01)

--- Aspirin + Medium Heparin during study ---
81.3 % of patients living 6 months after their stroke
-- Significantly different compared to control (p < 0.01)

--- Low Heparin during study ---
82.8 % of patients living 6 months after their stroke
-- Significantly different compared to control (p < 0.01)

--- Medium Heparin during study ---
82.9 % of patients living 6 monthsafter their stroke
-- Significantly different compared to control (p < 0.01)

--- Control (neither Aspirin nor Heparin during study) ---
85.2 % of patients living 6 months after their stroke
-- Significantly different compared to all treatments (p < 0.01)



Patients who did receive neither of the treatments were more likely to recover within 6 months. Though, the group itself was much larger (over 5000 patients, compared to about 2000 patients in the other groups), which may have an impact on the outcome. 

When looking at the 'FDEAD' outcome, we see the same as before. The patients who did not receive either treatment were more likely to be dead after 6 months.

In [51]:
list_treat6 = df.groupby(by = ['TREAT14'])['FDEAD'].apply(list)
stat, p = kruskal(list_treat6[0], list_treat6[1], list_treat6[2], list_treat6[3],list_treat6[4], list_treat6[5])
print_stats(stat, p)

stat = 21.883, p = 0.001
Probably different distributions


In [52]:
list_treat6

TREAT14
Aspirin                     [N, N, Y, N, N, N, N, N, N, N, N, Y, Y, N, N, ...
Aspirin + Low Heparin       [N, N, N, N, N, Y, N, N, N, N, N, Y, N, N, Y, ...
Aspirin + Medium Heparin    [N, N, N, N, N, N, N, N, N, N, N, Y, N, N, N, ...
Control                     [N, N, N, N, N, N, N, N, N, N, N, Y, Y, N, N, ...
Low Heparin                 [N, N, N, Y, N, Y, N, N, N, N, N, N, N, N, Y, ...
Medium Heparin              [N, N, N, Y, N, N, N, N, N, N, N, N, Y, N, N, ...
Name: FDEAD, dtype: object

In [53]:
iterator = product(enumerate(list_treat6), enumerate(list_treat6))
for (first_idx, first_group), (second_idx, second_group) in iterator:
    print(first_idx, second_idx)
    stat, p = mannwhitneyu(first_group, second_group)
    print_stats(stat, p)
    print()

0 0
stat = 9741698.000, p = 0.500
Probably the same distribution

0 1
stat = 4385882.000, p = 0.381
Probably the same distribution

0 2
stat = 4090016.000, p = 0.307
Probably the same distribution

0 3
stat = 11372866.000, p = 0.000
Probably different distributions

0 4
stat = 4459777.000, p = 0.009
Probably different distributions

0 5
stat = 4350286.000, p = 0.029
Probably different distributions

1 0
stat = 4385882.000, p = 0.381
Probably the same distribution

1 1
stat = 1988018.000, p = 0.500
Probably the same distribution

1 2
stat = 1841360.000, p = 0.246
Probably the same distribution

1 3
stat = 5119666.000, p = 0.001
Probably different distributions

1 4
stat = 2007683.000, p = 0.012
Probably different distributions

1 5
stat = 1958430.000, p = 0.031
Probably different distributions

2 0
stat = 4090016.000, p = 0.307
Probably the same distribution

2 1
stat = 1841360.000, p = 0.246
Probably the same distribution

2 2
stat = 1737248.000, p = 0.500
Probably the same distributio

In [54]:
group_treat['FDEAD'].value_counts()

TREAT14                   FDEAD
Aspirin                   N        3445
                          Y         969
Aspirin + Low Heparin     N        1563
                          Y         431
Aspirin + Medium Heparin  N        1444
                          Y         420
Control                   N        3983
                          Y        1347
Low Heparin               N        1565
                          Y         511
Medium Heparin            N        1529
                          Y         485
Name: FDEAD, dtype: int64

In [55]:
treat6_a_n = group_treat['FDEAD'].value_counts()[0]
treat6_a_y = group_treat['FDEAD'].value_counts()[1]

treat6_alh_n = group_treat['FDEAD'].value_counts()[2]
treat6_alh_y = group_treat['FDEAD'].value_counts()[3]

treat6_amh_n = group_treat['FDEAD'].value_counts()[4]
treat6_amh_y = group_treat['FDEAD'].value_counts()[5]

treat6_c_n = group_treat['FDEAD'].value_counts()[6]
treat6_c_y = group_treat['FDEAD'].value_counts()[7]

treat6_lh_n = group_treat['FDEAD'].value_counts()[8]
treat6_lh_y = group_treat['FDEAD'].value_counts()[9]

treat6_mh_n = group_treat['FDEAD'].value_counts()[10]
treat6_mh_y = group_treat['FDEAD'].value_counts()[11]

In [56]:
print(f"--- Aspirin during study ---")
print(f'{round((treat6_a_n / (treat6_a_n + treat6_a_y) * 100),1)} % of patients living 6 months after their stroke')
print(f'-- Significantly different compared to both Heparin groups and the control group (p < 0.05)')
print()
print(f"--- Aspirin + Low Heparin during study ---")
print(f'{round((treat6_alh_n / (treat6_alh_n + treat6_alh_y) * 100),1)} % of patients living 6 months after their stroke')
print(f'-- Significantly different compared to both Heparin groups and the control group(p < 0.05)')
print()
print(f"--- Aspirin + Medium Heparin during study ---")
print(f'{round((treat6_amh_n / (treat6_amh_n + treat6_amh_y) * 100),1)} % of patients living 6 months after their stroke')
print(f'-- Significantly different compared to the control group (p < 0.05)')
print()
print(f"--- Low Heparin during study ---")
print(f'{round((treat6_lh_n / (treat6_lh_n + treat6_lh_y) * 100),1)} % of patients living 6 months after their stroke')
print()
print(f"--- Medium Heparin during study ---")
print(f'{round((treat6_mh_n / (treat6_mh_n + treat6_mh_y) * 100),1)} % of patients living 6 monthsafter their stroke')
print()
print(f"--- Control (neither Aspirin nor Heparin during study) ---")
print(f'{round((treat6_c_n / (treat6_c_n + treat6_c_y) * 100),1)} % of patients living 6 months after their stroke')
print()

--- Aspirin during study ---
78.0 % of patients living 6 months after their stroke
-- Significantly different compared to both Heparin groups and the control group (p < 0.05)

--- Aspirin + Low Heparin during study ---
78.4 % of patients living 6 months after their stroke
-- Significantly different compared to both Heparin groups and the control group(p < 0.05)

--- Aspirin + Medium Heparin during study ---
77.5 % of patients living 6 months after their stroke
-- Significantly different compared to the control group (p < 0.05)

--- Low Heparin during study ---
75.4 % of patients living 6 months after their stroke

--- Medium Heparin during study ---
75.9 % of patients living 6 monthsafter their stroke

--- Control (neither Aspirin nor Heparin during study) ---
74.7 % of patients living 6 months after their stroke

