# ⭐️ Data Analysis

A comprehensive analysis pertaining to the statistics presented below can be found in Section 4.7 (Data Analysis).

Import all the required packages.

In [21]:
import sys

sys.path.append("./preprocessing_utils")
sys.path.append("./feature_selection_utils")
sys.path.append("./visual_utils")
sys.path.append("./experiment_utils")

In [22]:
import pandas as pd
import numpy as np
import experiments_utils

# configure pandas settings for data display
pd.options.mode.chained_assignment = None
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

Load the dataset.

In [23]:
df_endo = pd.read_csv(experiments_utils.ENDO_DATA_PREDICTION_PATH)

In [24]:
df_endo = df_endo[
    experiments_utils.ALL_AVAILABLE_FEATURES
    + ["country", "ethnicity", "7plus_days_period_duration"]
]

In [25]:
(
    endo_df,
    no_endo_df,
    pain_frequency_map,
    PELVIC_PAIN_DURATION_CATEGORIES,
    LOWER_BACK_PAIN_DURATION_CATEGORIES,
    HEADACHE_DURATION_CATEGORIES,
) = experiments_utils.prep_data_for_analysis(df_endo)

In [26]:
endo_cases_num = endo_df.shape[0]
no_endo_cases_num = no_endo_df.shape[0]

## 🧍‍♀️ Demographics

### 👵🧑👶 Age Distribution

In [27]:
for i in df_endo["age_group"].unique():
    text = f"Age of {i.lower()} for women"
    experiments_utils.print_stats(endo_df, no_endo_df, text, "age_group", cat=i)

Age of 18-24 for women with endometriosis: 52 (22.7%), without endometriosis: 79 (24.5%).
Age of 25-34 for women with endometriosis: 94 (41.0%), without endometriosis: 124 (38.4%).
Age of 35-44 for women with endometriosis: 63 (27.5%), without endometriosis: 79 (24.5%).
Age of under 18 for women with endometriosis: 7 (3.1%), without endometriosis: 19 (5.9%).
Age of 45-54 for women with endometriosis: 12 (5.2%), without endometriosis: 19 (5.9%).
Age of unknown for women with endometriosis: 1 (0.4%), without endometriosis: 3 (0.9%).


### 🗺️ Country Distribution

In [28]:
table = pd.crosstab(
    df_endo["country"], df_endo["has_endometriosis"], margins=True
).sort_values(by="All", ascending=False)
print(table)

has_endometriosis         0    1  All
country                              
All                     318  224  542
United States           131   77  208
United Kingdom           42   48   90
Canada                   15   34   49
Ireland                  21    9   30
Australia                10   14   24
Germany                  18    4   22
Ukraine                   5    5   10
Netherlands               4    3    7
India                     4    2    6
New Zealand               3    3    6
France                    4    1    5
Finland                   2    3    5
Brazil                    4    1    5
Poland                    4    1    5
Hungary                   4    0    4
Denmark                   2    2    4
Romania                   3    1    4
Sweden                    4    0    4
Nigeria                   0    3    3
Portugal                  1    2    3
Pakistan                  2    1    3
Italy                     3    0    3
Austria                   3    0    3
Croatia     

In [106]:
print(
    f"Participants come from {len(df_endo['country'].unique()) - 1} distinct countries."
)  # - 1 because of None

Participants come from 52 distinct countries.


### 🌍 Ethnicity Distribution

In [30]:
table = pd.crosstab(
    df_endo["ethnicity"], df_endo["has_endometriosis"], margins=True
).sort_values(by="All", ascending=False)

print(table)

has_endometriosis                                     0    1  All
ethnicity                                                        
All                                                 323  229  552
White                                               274  181  455
Asian or Asian British                               15    8   23
Black, Black British, Caribbean or African            9   11   20
Mixed or multiple ethnic groups                      12    8   20
Hispanic                                              0    3    3
Mixed or multiple ethnic groups, White                0    2    2
South Asian                                           2    0    2
Black, Black British, Caribbean or African, White     1    1    2
Pashtun (Pakistan)                                    1    0    1
South Asian Indian                                    0    1    1
West Asian                                            1    0    1
White, Hispanic                                       0    1    1
White, His

### 📏 BMI Distribution

In [31]:
for i in df_endo["bmi_category"].unique():
    text = f"BMI category of {i} for women"
    experiments_utils.print_stats(endo_df, no_endo_df, text, "bmi_category", cat=i)

BMI category of Healthy Weight for women with endometriosis: 97 (42.4%), without endometriosis: 153 (47.4%).
BMI category of Overweight for women with endometriosis: 57 (24.9%), without endometriosis: 67 (20.7%).
BMI category of Class 3 Obesity for women with endometriosis: 12 (5.2%), without endometriosis: 25 (7.7%).
BMI category of Underweight for women with endometriosis: 26 (11.4%), without endometriosis: 25 (7.7%).
BMI category of Class 2 Obesity for women with endometriosis: 14 (6.1%), without endometriosis: 16 (5.0%).
BMI category of Class 1 Obesity for women with endometriosis: 23 (10.0%), without endometriosis: 37 (11.5%).


In [108]:
print(
    f"Mean BMI values: {df_endo['BMI'].mean():.2f}; Median BMI values: {df_endo['BMI'].median():.2f}"
)

Mean BMI values: 25.33; Median BMI values: 23.84


### 🤰 Pregnancy

In [109]:
experiments_utils.print_stats(endo_df, no_endo_df, "Pregnant", "was_pregnant")

Pregnant with endometriosis: 76.0 (33.2%), without endometriosis: 81.0 (25.1%).


### Infertility

In [34]:
experiments_utils.print_stats(
    endo_df, no_endo_df, "Experienced infertility", "experienced_infertility"
)

Experienced infertility with endometriosis: 71.0 (31.0%), without endometriosis: 34.0 (10.5%).


In [None]:
# distribution of infertility experience by age groups
table = pd.crosstab(
    df_endo["age_group"], df_endo["experienced_infertility"], margins=True
).sort_values(by="All", ascending=False)
print(table)

experienced_infertility  0.0  1.0  All
age_group                             
All                      443  105  548
25-34                    184   33  217
35-44                     91   50  141
18-24                    117   12  129
45-54                     21   10   31
Under 18                  26    0   26
Unknown                    4    0    4


### Thyroid disorders

In [37]:
experiments_utils.print_stats(
    endo_df, no_endo_df, "Has thyroid disorders", "has_thyroid_disorder"
)

Has thyroid disorders with endometriosis: 28.0 (12.2%), without endometriosis: 28.0 (8.7%).


### Anemia

In [38]:
experiments_utils.print_stats(endo_df, no_endo_df, "Has anemia", "has_anemia")

Has anemia with endometriosis: 81 (35.4%), without endometriosis: 84 (26.0%).


### 💊 Hormonal Contraceptives

In [40]:
experiments_utils.print_stats(
    endo_df,
    no_endo_df,
    "Use hormonal contraception",
    "takes_hormones_only_for_contracep",
)

Use hormonal contraception with endometriosis: 37 (16.2%), without endometriosis: 57 (17.6%).


In [112]:
print(
    f"{(df_endo['takes_hormones_only_for_contracep'].sum() / len(df_endo) * 100):.2f}% of all participants use hormonal contraception"
)

17.03% of all participants use hormonal contraception


In [42]:
experiments_utils.print_stats(
    endo_df,
    no_endo_df,
    "Use hormonal medication for pain relief",
    "takes_hormones_for_pain",
)

Use hormonal medication for pain relief with endometriosis: 101 (44.1%), without endometriosis: 50 (15.5%).


### Fibroids

In [43]:
experiments_utils.print_stats(endo_df, no_endo_df, "Has fibroids", "has_fibroids")

Has fibroids with endometriosis: 50 (21.8%), without endometriosis: 48 (14.9%).


## ❓ Symptoms

### Pelvic Pain

In [44]:
experiments_utils.print_stats(
    endo_df,
    no_endo_df,
    "Experiences pelvic pain before period",
    "pelvic_pain_before_period",
)

Experiences pelvic pain before period with endometriosis: 177 (77.3%), without endometriosis: 182 (56.3%).


In [45]:
experiments_utils.print_stats(
    endo_df,
    no_endo_df,
    "Experiences pelvic pain after period",
    "pelvic_pain_after_period",
)

Experiences pelvic pain after period with endometriosis: 124 (54.1%), without endometriosis: 62 (19.2%).


In [48]:
for i in PELVIC_PAIN_DURATION_CATEGORIES.values():
    text = (
        f"Experiences pelvic pain {i}"
        if i != "I do not experience pelvic pain during my period"
        else "Does not experience pelvic pain during period"
    )
    experiments_utils.print_stats(
        endo_df, no_endo_df, text, "pelvic_pain_days_during_period", cat=i
    )

Does not experience pelvic pain during period with endometriosis: 7 (3.1%), without endometriosis: 26 (8.0%).
Experiences pelvic pain Less than 1 day with endometriosis: 2 (0.9%), without endometriosis: 24 (7.4%).
Experiences pelvic pain 1-2 days with endometriosis: 34 (14.8%), without endometriosis: 126 (39.0%).
Experiences pelvic pain 3–4 days with endometriosis: 59 (25.8%), without endometriosis: 88 (27.2%).
Experiences pelvic pain 5 or more days with endometriosis: 124 (54.1%), without endometriosis: 58 (18.0%).


In [49]:
for i in pain_frequency_map.values():
    text = f"Experiences pelvic pain between periods as often as {i.lower()}"
    experiments_utils.print_stats(
        endo_df, no_endo_df, text, "pelvic_pain_frequency_between_periods", cat=i
    )

Experiences pelvic pain between periods as often as never with endometriosis: 5 (2.2%), without endometriosis: 73 (22.6%).
Experiences pelvic pain between periods as often as rarely with endometriosis: 15 (6.6%), without endometriosis: 96 (29.7%).
Experiences pelvic pain between periods as often as sometimes with endometriosis: 50 (21.8%), without endometriosis: 77 (23.8%).
Experiences pelvic pain between periods as often as often with endometriosis: 80 (34.9%), without endometriosis: 56 (17.3%).
Experiences pelvic pain between periods as often as always with endometriosis: 79 (34.5%), without endometriosis: 20 (6.2%).


In [50]:
for i in range(0, 10):
    text = f"Overall average pelvic pain scored at {i} for women"
    experiments_utils.print_stats(
        endo_df, no_endo_df, text, "pelvic_pain_average", cat=f"{i}"
    )

Overall average pelvic pain scored at 0 for women with endometriosis: 5 (2.2%), without endometriosis: 34 (10.5%).
Overall average pelvic pain scored at 1 for women with endometriosis: 14 (6.1%), without endometriosis: 32 (9.9%).
Overall average pelvic pain scored at 2 for women with endometriosis: 11 (4.8%), without endometriosis: 47 (14.6%).
Overall average pelvic pain scored at 3 for women with endometriosis: 34 (14.8%), without endometriosis: 62 (19.2%).
Overall average pelvic pain scored at 4 for women with endometriosis: 50 (21.8%), without endometriosis: 70 (21.7%).
Overall average pelvic pain scored at 5 for women with endometriosis: 47 (20.5%), without endometriosis: 44 (13.6%).
Overall average pelvic pain scored at 6 for women with endometriosis: 38 (16.6%), without endometriosis: 17 (5.3%).
Overall average pelvic pain scored at 7 for women with endometriosis: 13 (5.7%), without endometriosis: 10 (3.1%).
Overall average pelvic pain scored at 8 for women with endometriosis: 10

In [114]:
print(
    f"Mean average pelvic pain scores: {endo_df['pelvic_pain_average'].mean():.2f}; Median average pelvic pain scores: {endo_df['pelvic_pain_average'].median():.2f}"
)

Mean average pelvic pain scores: 4.52; Median average pelvic pain scores: 5.00


In [52]:
for i in range(0, 10):
    text = f"Overall worst pelvic pain scored at {i} for women"
    experiments_utils.print_stats(endo_df, no_endo_df, text, "pelvic_pain_worst", cat=i)

Overall worst pelvic pain scored at 0 for women with endometriosis: 3 (1.3%), without endometriosis: 22 (6.8%).
Overall worst pelvic pain scored at 1 for women with endometriosis: 0 (0.0%), without endometriosis: 5 (1.5%).
Overall worst pelvic pain scored at 2 for women with endometriosis: 3 (1.3%), without endometriosis: 18 (5.6%).
Overall worst pelvic pain scored at 3 for women with endometriosis: 5 (2.2%), without endometriosis: 18 (5.6%).
Overall worst pelvic pain scored at 4 for women with endometriosis: 4 (1.7%), without endometriosis: 22 (6.8%).
Overall worst pelvic pain scored at 5 for women with endometriosis: 7 (3.1%), without endometriosis: 42 (13.0%).
Overall worst pelvic pain scored at 6 for women with endometriosis: 27 (11.8%), without endometriosis: 72 (22.3%).
Overall worst pelvic pain scored at 7 for women with endometriosis: 60 (26.2%), without endometriosis: 71 (22.0%).
Overall worst pelvic pain scored at 8 for women with endometriosis: 54 (23.6%), without endometrio

In [115]:
print(
    f"Mean worst pelvic pain scores: {endo_df['pelvic_pain_worst'].mean():.2f}; Median worst pelvic pain scores: {endo_df['pelvic_pain_worst'].median():.2f}"
)

Mean worst pelvic pain scores: 7.34; Median worst pelvic pain scores: 8.00


### Lower Back Pain

In [54]:
experiments_utils.print_stats(
    endo_df,
    no_endo_df,
    "Experiences lower back pain before period",
    "lower_back_pain_before_period",
)

Experiences lower back pain before period with endometriosis: 138 (60.3%), without endometriosis: 134 (41.5%).


In [55]:
(
    df_endo[df_endo["has_endometriosis"] == 1]["lower_back_pain_after_period"].sum(),
    df_endo[df_endo["has_endometriosis"] == 0]["lower_back_pain_after_period"].sum(),
)
experiments_utils.print_stats(
    endo_df,
    no_endo_df,
    "Experiences lower back pain after period",
    "lower_back_pain_after_period",
)

Experiences lower back pain after period with endometriosis: 88 (38.4%), without endometriosis: 62 (19.2%).


In [56]:
for i in LOWER_BACK_PAIN_DURATION_CATEGORIES.values():
    text = (
        f"Experiences lower back pain {i}"
        if i != "I do not experience pelvic pain during my period"
        else "Does not experience pelvic pain during period"
    )
    experiments_utils.print_stats(
        endo_df, no_endo_df, text, "lower_back_pain_days_during_period", cat=i
    )

Experiences lower back pain I do not experience lower pain during my period with endometriosis: 24 (10.5%), without endometriosis: 83 (25.7%).
Experiences lower back pain Less than 1 day with endometriosis: 13 (5.7%), without endometriosis: 32 (9.9%).
Experiences lower back pain 1-2 days with endometriosis: 43 (18.8%), without endometriosis: 104 (32.2%).
Experiences lower back pain 3–4 days with endometriosis: 53 (23.1%), without endometriosis: 55 (17.0%).
Experiences lower back pain 5 or more days with endometriosis: 92 (40.2%), without endometriosis: 48 (14.9%).


In [57]:
for i in pain_frequency_map.values():
    text = f"Experiences lower back pain between periods as often as {i.lower()}"
    experiments_utils.print_stats(
        endo_df, no_endo_df, text, "lower_back_pain_frequency_between_periods", cat=i
    )

Experiences lower back pain between periods as often as never with endometriosis: 23 (10.0%), without endometriosis: 94 (29.1%).
Experiences lower back pain between periods as often as rarely with endometriosis: 36 (15.7%), without endometriosis: 88 (27.2%).
Experiences lower back pain between periods as often as sometimes with endometriosis: 60 (26.2%), without endometriosis: 73 (22.6%).
Experiences lower back pain between periods as often as often with endometriosis: 56 (24.5%), without endometriosis: 45 (13.9%).
Experiences lower back pain between periods as often as always with endometriosis: 54 (23.6%), without endometriosis: 22 (6.8%).


In [58]:
for i in range(0, 10):
    text = f"Overall average lower back pain scored at {i} for women"
    experiments_utils.print_stats(
        endo_df, no_endo_df, text, "lower_back_pain_average", cat=i
    )

Overall average lower back pain scored at 0 for women with endometriosis: 25 (10.9%), without endometriosis: 72 (22.3%).
Overall average lower back pain scored at 1 for women with endometriosis: 36 (15.7%), without endometriosis: 69 (21.4%).
Overall average lower back pain scored at 2 for women with endometriosis: 34 (14.8%), without endometriosis: 70 (21.7%).
Overall average lower back pain scored at 3 for women with endometriosis: 34 (14.8%), without endometriosis: 44 (13.6%).
Overall average lower back pain scored at 4 for women with endometriosis: 33 (14.4%), without endometriosis: 28 (8.7%).
Overall average lower back pain scored at 5 for women with endometriosis: 27 (11.8%), without endometriosis: 18 (5.6%).
Overall average lower back pain scored at 6 for women with endometriosis: 17 (7.4%), without endometriosis: 7 (2.2%).
Overall average lower back pain scored at 7 for women with endometriosis: 12 (5.2%), without endometriosis: 5 (1.5%).
Overall average lower back pain scored a

In [116]:
print(
    f"Mean average lower back pain scores: {endo_df['lower_back_pain_average'].mean():.2f}; Median average lower back pain scores: {endo_df['lower_back_pain_average'].median():.2f}"
)

Mean average lower back pain scores: 3.22; Median average lower back pain scores: 3.00


In [60]:
for i in range(0, 10):
    text = f"Overall worst lower back pain scored at {i} for women"
    experiments_utils.print_stats(
        endo_df, no_endo_df, text, "lower_back_pain_worst", cat=i
    )

Overall worst lower back pain scored at 0 for women with endometriosis: 14 (6.1%), without endometriosis: 53 (16.4%).
Overall worst lower back pain scored at 1 for women with endometriosis: 10 (4.4%), without endometriosis: 18 (5.6%).
Overall worst lower back pain scored at 2 for women with endometriosis: 11 (4.8%), without endometriosis: 30 (9.3%).
Overall worst lower back pain scored at 3 for women with endometriosis: 20 (8.7%), without endometriosis: 52 (16.1%).
Overall worst lower back pain scored at 4 for women with endometriosis: 26 (11.4%), without endometriosis: 42 (13.0%).
Overall worst lower back pain scored at 5 for women with endometriosis: 30 (13.1%), without endometriosis: 45 (13.9%).
Overall worst lower back pain scored at 6 for women with endometriosis: 37 (16.2%), without endometriosis: 32 (9.9%).
Overall worst lower back pain scored at 7 for women with endometriosis: 38 (16.6%), without endometriosis: 30 (9.3%).
Overall worst lower back pain scored at 8 for women with

In [117]:
print(
    f"Mean worst lower back pain scores: {endo_df['lower_back_pain_worst'].mean():.2f}; Median worst lower back pain scores: {endo_df['lower_back_pain_worst'].median():.2f}"
)

Mean worst lower back pain scores: 5.24; Median worst lower back pain scores: 6.00


### Headache

In [62]:
experiments_utils.print_stats(
    endo_df, no_endo_df, "Experience headache before period", "headache_before_period"
)

Experience headache before period with endometriosis: 101 (44.1%), without endometriosis: 104 (32.2%).


In [63]:
experiments_utils.print_stats(
    endo_df, no_endo_df, "Experience headache after period", "headache_after_period"
)

Experience headache after period with endometriosis: 60 (26.2%), without endometriosis: 54 (16.7%).


In [64]:
for i in HEADACHE_DURATION_CATEGORIES.values():
    text = (
        f"Experiences headache {i}"
        if i != "I do not experience headaches during my period"
        else "Does not experience headache during period"
    )
    experiments_utils.print_stats(
        endo_df, no_endo_df, text, "headache_days_during_period", cat=i
    )

Does not experience headache during period with endometriosis: 53 (23.1%), without endometriosis: 110 (34.1%).
Experiences headache Less than 1 day with endometriosis: 27 (11.8%), without endometriosis: 67 (20.7%).
Experiences headache 1-2 days with endometriosis: 87 (38.0%), without endometriosis: 79 (24.5%).
Experiences headache 3–4 days with endometriosis: 32 (14.0%), without endometriosis: 34 (10.5%).
Experiences headache 5 or more days with endometriosis: 28 (12.2%), without endometriosis: 32 (9.9%).


In [65]:
for i in pain_frequency_map.values():
    text = f"Experiences headache between periods as often as {i.lower()}"
    experiments_utils.print_stats(
        endo_df, no_endo_df, text, "headache_frequency_between_periods", cat=i
    )

Experiences headache between periods as often as never with endometriosis: 20 (8.7%), without endometriosis: 47 (14.6%).
Experiences headache between periods as often as rarely with endometriosis: 60 (26.2%), without endometriosis: 76 (23.5%).
Experiences headache between periods as often as sometimes with endometriosis: 90 (39.3%), without endometriosis: 113 (35.0%).
Experiences headache between periods as often as often with endometriosis: 43 (18.8%), without endometriosis: 70 (21.7%).
Experiences headache between periods as often as always with endometriosis: 16 (7.0%), without endometriosis: 16 (5.0%).


In [66]:
for i in range(0, 10):
    text = f"Overall average headache scored at {i} for women"
    experiments_utils.print_stats(endo_df, no_endo_df, text, "headache_average", cat=i)

Overall average headache scored at 0 for women with endometriosis: 37 (16.2%), without endometriosis: 63 (19.5%).
Overall average headache scored at 1 for women with endometriosis: 31 (13.5%), without endometriosis: 53 (16.4%).
Overall average headache scored at 2 for women with endometriosis: 46 (20.1%), without endometriosis: 56 (17.3%).
Overall average headache scored at 3 for women with endometriosis: 23 (10.0%), without endometriosis: 58 (18.0%).
Overall average headache scored at 4 for women with endometriosis: 41 (17.9%), without endometriosis: 44 (13.6%).
Overall average headache scored at 5 for women with endometriosis: 17 (7.4%), without endometriosis: 24 (7.4%).
Overall average headache scored at 6 for women with endometriosis: 16 (7.0%), without endometriosis: 14 (4.3%).
Overall average headache scored at 7 for women with endometriosis: 8 (3.5%), without endometriosis: 4 (1.2%).
Overall average headache scored at 8 for women with endometriosis: 2 (0.9%), without endometrios

In [119]:
print(
    f"Mean average headache scores: {endo_df['headache_average'].mean():.2f}; Median average headache pain scores: {endo_df['headache_average'].median():.2f}"
)

Mean average headache scores: 2.84; Median average headache pain scores: 2.00


In [68]:
for i in range(0, 10):
    text = f"Overall worst headache scored at {i} for women"
    experiments_utils.print_stats(endo_df, no_endo_df, text, "headache_worst", cat=i)

Overall worst headache scored at 0 for women with endometriosis: 18 (7.9%), without endometriosis: 39 (12.1%).
Overall worst headache scored at 1 for women with endometriosis: 7 (3.1%), without endometriosis: 8 (2.5%).
Overall worst headache scored at 2 for women with endometriosis: 24 (10.5%), without endometriosis: 24 (7.4%).
Overall worst headache scored at 3 for women with endometriosis: 21 (9.2%), without endometriosis: 45 (13.9%).
Overall worst headache scored at 4 for women with endometriosis: 35 (15.3%), without endometriosis: 35 (10.8%).
Overall worst headache scored at 5 for women with endometriosis: 27 (11.8%), without endometriosis: 47 (14.6%).
Overall worst headache scored at 6 for women with endometriosis: 31 (13.5%), without endometriosis: 51 (15.8%).
Overall worst headache scored at 7 for women with endometriosis: 24 (10.5%), without endometriosis: 33 (10.2%).
Overall worst headache scored at 8 for women with endometriosis: 20 (8.7%), without endometriosis: 24 (7.4%).
O

In [118]:
print(
    f"Mean worst headache scores: {endo_df['headache_worst'].mean():.2f}; Median worst headache pain scores: {endo_df['headache_worst'].median():.2f}"
)

Mean worst headache scores: 4.77; Median worst headache pain scores: 5.00


### Bloating

In [70]:
for i in pain_frequency_map.values():
    text = f"Experiences bloating during period as often as {i.lower()}"
    experiments_utils.print_stats(
        endo_df, no_endo_df, text, "bloating_during_period", cat=i
    )

Experiences bloating during period as often as never with endometriosis: 7 (3.1%), without endometriosis: 23 (7.1%).
Experiences bloating during period as often as rarely with endometriosis: 7 (3.1%), without endometriosis: 27 (8.4%).
Experiences bloating during period as often as sometimes with endometriosis: 24 (10.5%), without endometriosis: 73 (22.6%).
Experiences bloating during period as often as often with endometriosis: 61 (26.6%), without endometriosis: 93 (28.8%).
Experiences bloating during period as often as always with endometriosis: 127 (55.5%), without endometriosis: 107 (33.1%).


In [71]:
for i in pain_frequency_map.values():
    text = f"Experiences bloating between periods as often as {i.lower()}"
    experiments_utils.print_stats(
        endo_df, no_endo_df, text, "bloating_between_periods", cat=i
    )

Experiences bloating between periods as often as never with endometriosis: 8 (3.5%), without endometriosis: 54 (16.7%).
Experiences bloating between periods as often as rarely with endometriosis: 19 (8.3%), without endometriosis: 81 (25.1%).
Experiences bloating between periods as often as sometimes with endometriosis: 67 (29.3%), without endometriosis: 91 (28.2%).
Experiences bloating between periods as often as often with endometriosis: 85 (37.1%), without endometriosis: 65 (20.1%).
Experiences bloating between periods as often as always with endometriosis: 49 (21.4%), without endometriosis: 31 (9.6%).


### Pain in legs/hips

In [72]:
for i in pain_frequency_map.values():
    text = f"Experiences pain in legs or hips during period as often as {i.lower()}"
    experiments_utils.print_stats(
        endo_df, no_endo_df, text, "pain_in_legs_hips_during_period", cat=i
    )

Experiences pain in legs or hips during period as often as never with endometriosis: 21 (9.2%), without endometriosis: 85 (26.3%).
Experiences pain in legs or hips during period as often as rarely with endometriosis: 18 (7.9%), without endometriosis: 43 (13.3%).
Experiences pain in legs or hips during period as often as sometimes with endometriosis: 36 (15.7%), without endometriosis: 58 (18.0%).
Experiences pain in legs or hips during period as often as often with endometriosis: 57 (24.9%), without endometriosis: 66 (20.4%).
Experiences pain in legs or hips during period as often as always with endometriosis: 94 (41.0%), without endometriosis: 69 (21.4%).


In [73]:
for i in pain_frequency_map.values():
    text = f"Experiences pain in legs or hips between periods as often as {i.lower()}"
    experiments_utils.print_stats(
        endo_df, no_endo_df, text, "bloating_between_periods", cat=i
    )

Experiences pain in legs or hips between periods as often as never with endometriosis: 8 (3.5%), without endometriosis: 54 (16.7%).
Experiences pain in legs or hips between periods as often as rarely with endometriosis: 19 (8.3%), without endometriosis: 81 (25.1%).
Experiences pain in legs or hips between periods as often as sometimes with endometriosis: 67 (29.3%), without endometriosis: 91 (28.2%).
Experiences pain in legs or hips between periods as often as often with endometriosis: 85 (37.1%), without endometriosis: 65 (20.1%).
Experiences pain in legs or hips between periods as often as always with endometriosis: 49 (21.4%), without endometriosis: 31 (9.6%).


### Fatigue

In [74]:
for i in pain_frequency_map.values():
    text = f"Experiences fatigue during period as often as {i.lower()}"
    experiments_utils.print_stats(
        endo_df, no_endo_df, text, "fatigue_during_period", cat=i
    )

Experiences fatigue during period as often as never with endometriosis: 6 (2.6%), without endometriosis: 15 (4.6%).
Experiences fatigue during period as often as rarely with endometriosis: 2 (0.9%), without endometriosis: 22 (6.8%).
Experiences fatigue during period as often as sometimes with endometriosis: 27 (11.8%), without endometriosis: 48 (14.9%).
Experiences fatigue during period as often as often with endometriosis: 48 (21.0%), without endometriosis: 94 (29.1%).
Experiences fatigue during period as often as always with endometriosis: 143 (62.4%), without endometriosis: 144 (44.6%).


In [75]:
for i in pain_frequency_map.values():
    text = f"Experiences fatigue between periods as often as {i.lower()}"
    experiments_utils.print_stats(
        endo_df, no_endo_df, text, "fatigue_between_periods", cat=i
    )

Experiences fatigue between periods as often as never with endometriosis: 3 (1.3%), without endometriosis: 33 (10.2%).
Experiences fatigue between periods as often as rarely with endometriosis: 16 (7.0%), without endometriosis: 61 (18.9%).
Experiences fatigue between periods as often as sometimes with endometriosis: 57 (24.9%), without endometriosis: 96 (29.7%).
Experiences fatigue between periods as often as often with endometriosis: 70 (30.6%), without endometriosis: 76 (23.5%).
Experiences fatigue between periods as often as always with endometriosis: 83 (36.2%), without endometriosis: 57 (17.6%).


### 💊 Medication Intake

In [76]:
experiments_utils.print_stats(
    endo_df, no_endo_df, "Take over-the-counter pills women", "takes_over_cntr_pills"
)

Take over-the-counter pills women with endometriosis: 162 (70.7%), without endometriosis: 227 (70.3%).


In [77]:
experiments_utils.print_stats(
    endo_df, no_endo_df, "Take prescribed painkillers women", "takes_presc_painkillers"
)

Take prescribed painkillers women with endometriosis: 101 (44.1%), without endometriosis: 39 (12.1%).


In [78]:
experiments_utils.print_stats(
    endo_df,
    no_endo_df,
    "Take hormonal treatments for pain women",
    "takes_hormones_for_pain",
)

Take hormonal treatments for pain women with endometriosis: 101 (44.1%), without endometriosis: 50 (15.5%).


In [123]:
print(
    f"{(df_endo['takes_hormones_for_pain'].sum() / len(df_endo) * 100):.2f}% of all participants use hormonal treatments for pain"
)

27.36% of all participants use hormonal treatments for pain


### Intercourse-Related Symtoms

In [80]:
experiments_utils.print_stats(
    endo_df, no_endo_df, "Not sexually active", "not_sex_active"
)

Not sexually active with endometriosis: 41 (17.9%), without endometriosis: 97 (30.0%).


In [81]:
experiments_utils.print_stats(
    endo_df,
    no_endo_df,
    "Experience pelvic pain during intercourse",
    "pelvic_pain_during_intercourse",
)

Experience pelvic pain during intercourse with endometriosis: 127 (55.5%), without endometriosis: 60 (18.6%).


In [82]:
experiments_utils.print_stats(
    endo_df,
    no_endo_df,
    "Experience deep vaginal pain during intercourse",
    "deep_vaginal_pain_during_intercourse",
)

Experience deep vaginal pain during intercourse with endometriosis: 127 (55.5%), without endometriosis: 63 (19.5%).


In [83]:
experiments_utils.print_stats(
    endo_df, no_endo_df, "Experience pain after intercourse", "pain_after_sex"
)

Experience pain after intercourse with endometriosis: 113 (49.3%), without endometriosis: 61 (18.9%).


## 🚽 Bowel & Urinary Movements

In [84]:
experiments_utils.print_stats(
    endo_df, no_endo_df, "Has regular bowel movements", "regular_bowel_movements"
)

Has regular bowel movements with endometriosis: 188 (82.1%), without endometriosis: 284 (87.9%).


In [85]:
experiments_utils.print_stats(
    endo_df, no_endo_df, "Has painful bowel movements", "painful_bowel_movements"
)

Has painful bowel movements with endometriosis: 173.0 (75.5%), without endometriosis: 125.0 (38.7%).


In [86]:
experiments_utils.print_stats(
    endo_df,
    no_endo_df,
    "Has difficulty controlling urination",
    "difficulty_controlling_urination",
)

Has difficulty controlling urination with endometriosis: 80.0 (34.9%), without endometriosis: 52.0 (16.1%).


In [87]:
experiments_utils.print_stats(
    endo_df, no_endo_df, "Has pain during urination", "pain_during_urination"
)

Has pain during urination with endometriosis: 101.0 (44.1%), without endometriosis: 47.0 (14.6%).


## 🩸 Menses Characteristics & Blood Loss Approximation

### 🔄🗓️ Cycle Duration

In [88]:
for i in [
    "less24d_cycle_length",
    "24_31d_cycle_length",
    "32_38d_cycle_length",
    "39_50d_cycle_length",
    "more51d_cycle_length",
    "too_irregular_cycle_length",
]:
    experiments_utils.print_stats(endo_df, no_endo_df, f"{i}", i)

less24d_cycle_length with endometriosis: 11 (4.8%), without endometriosis: 8 (2.5%).
24_31d_cycle_length with endometriosis: 136 (59.4%), without endometriosis: 197 (61.0%).
32_38d_cycle_length with endometriosis: 29 (12.7%), without endometriosis: 46 (14.2%).
39_50d_cycle_length with endometriosis: 4 (1.7%), without endometriosis: 6 (1.9%).
more51d_cycle_length with endometriosis: 3 (1.3%), without endometriosis: 5 (1.5%).
too_irregular_cycle_length with endometriosis: 46 (20.1%), without endometriosis: 59 (18.3%).


### 🩸🗓️ Period Duration

In [89]:
for i in [
    "1_2days_period_duration",
    "3_4days_period_duration",
    "5_6days_period_duration",
    "7plus_days_period_duration",
]:
    experiments_utils.print_stats(endo_df, no_endo_df, f"{i}", i)

1_2days_period_duration with endometriosis: 6 (2.6%), without endometriosis: 13 (4.0%).
3_4days_period_duration with endometriosis: 48 (21.0%), without endometriosis: 101 (31.3%).
5_6days_period_duration with endometriosis: 109 (47.6%), without endometriosis: 141 (43.7%).
7plus_days_period_duration with endometriosis: 66 (28.8%), without endometriosis: 66 (20.4%).


### ⏱️ Bleeding Duration Changes

In [90]:
experiments_utils.print_stats(
    endo_df,
    no_endo_df,
    "Experiences changes in bleeding duration",
    "bleeding_duration_changes",
)

Experiences changes in bleeding duration with endometriosis: 122.0 (53.3%), without endometriosis: 158.0 (48.9%).


### Large Blood Clots Frequency

In [91]:
for i in pain_frequency_map.values():
    text = f"Experiences large blood clots as often as {i.lower()}"
    experiments_utils.print_stats(
        endo_df, no_endo_df, text, "large_blood_clots_frequency", cat=i
    )

Experiences large blood clots as often as never with endometriosis: 14 (6.1%), without endometriosis: 61 (18.9%).
Experiences large blood clots as often as rarely with endometriosis: 45 (19.7%), without endometriosis: 101 (31.3%).
Experiences large blood clots as often as sometimes with endometriosis: 87 (38.0%), without endometriosis: 105 (32.5%).
Experiences large blood clots as often as often with endometriosis: 83 (36.2%), without endometriosis: 54 (16.7%).
Experiences large blood clots as often as always with endometriosis: 0 (0.0%), without endometriosis: 0 (0.0%).


### Spotting Between Periods

In [92]:
experiments_utils.print_stats(
    endo_df,
    no_endo_df,
    "Experiences spotting between periods",
    "spotting_between_periods",
)

Experiences spotting between periods with endometriosis: 112.0 (48.9%), without endometriosis: 111.0 (34.4%).


### Heavy Bleeding Frequency

In [93]:
for i in pain_frequency_map.values():
    text = f"Experiences need to change sanitary products multiple times in several consecutive hours to prevent leakage as often as {i.lower()}"
    experiments_utils.print_stats(
        endo_df, no_endo_df, text, "heavy_bleeding_frequency", cat=i
    )

Experiences need to change sanitary products multiple times in several consecutive hours to prevent leakage as often as never with endometriosis: 25 (10.9%), without endometriosis: 89 (27.6%).
Experiences need to change sanitary products multiple times in several consecutive hours to prevent leakage as often as rarely with endometriosis: 41 (17.9%), without endometriosis: 91 (28.2%).
Experiences need to change sanitary products multiple times in several consecutive hours to prevent leakage as often as sometimes with endometriosis: 58 (25.3%), without endometriosis: 55 (17.0%).
Experiences need to change sanitary products multiple times in several consecutive hours to prevent leakage as often as often with endometriosis: 60 (26.2%), without endometriosis: 56 (17.3%).
Experiences need to change sanitary products multiple times in several consecutive hours to prevent leakage as often as always with endometriosis: 45 (19.7%), without endometriosis: 30 (9.3%).


### 🌙 Night Time Changes

In [94]:
for i in pain_frequency_map.values():
    text = f"Experiences need to change sanitary in the night to prevent leakage as often as {i.lower()}"
    experiments_utils.print_stats(
        endo_df, no_endo_df, text, "night_time_changes", cat=i
    )

Experiences need to change sanitary in the night to prevent leakage as often as never with endometriosis: 38 (16.6%), without endometriosis: 120 (37.2%).
Experiences need to change sanitary in the night to prevent leakage as often as rarely with endometriosis: 53 (23.1%), without endometriosis: 69 (21.4%).
Experiences need to change sanitary in the night to prevent leakage as often as sometimes with endometriosis: 71 (31.0%), without endometriosis: 71 (22.0%).
Experiences need to change sanitary in the night to prevent leakage as often as often with endometriosis: 36 (15.7%), without endometriosis: 42 (13.0%).
Experiences need to change sanitary in the night to prevent leakage as often as always with endometriosis: 31 (13.5%), without endometriosis: 19 (5.9%).


## 🧬 Family History

In [95]:
experiments_utils.print_stats(
    endo_df,
    no_endo_df,
    "Family member were diagnosed with endometriosis within women",
    "family_history_endometriosis",
)

Family member were diagnosed with endometriosis within women with endometriosis: 93 (40.6%), without endometriosis: 49 (15.2%).


In [96]:
experiments_utils.print_stats(
    endo_df,
    no_endo_df,
    "Family member experienced infertility within women",
    "family_history_infertility",
)

Family member experienced infertility within women with endometriosis: 48 (21.0%), without endometriosis: 34 (10.5%).


In [97]:
experiments_utils.print_stats(
    endo_df,
    no_endo_df,
    "Family member experienced heavy bleeding within women",
    "family_history_heavy_bleeding",
)

Family member experienced heavy bleeding within women with endometriosis: 115 (50.2%), without endometriosis: 114 (35.3%).


In [98]:
experiments_utils.print_stats(
    endo_df,
    no_endo_df,
    "Family member experienced pelvic pain within women",
    "family_history_pelvic_pain",
)

Family member experienced pelvic pain within women with endometriosis: 113 (49.3%), without endometriosis: 105 (32.5%).


## ✨ Quality-of-Life

In [99]:
qol_features = [
    "bleeding_impact_social_events",
    "bleeding_impact_home_jobs",
    "bleeding_impact_work",
    "bleeding_impact_physical_activity",
    "pain_impact_social_events",
    "pain_impact_home_jobs",
    "pain_impact_work",
    "pain_impact_physical_activity",
    "pain_impact_appetite",
    "pain_impact_sleep",
]

### Bleeding and Pain Impact on Daily Activity

In [100]:
for qol_feature in qol_features:
    for i in pain_frequency_map.values():
        text = f"Experiences {qol_feature} as often as {i.lower()}"
        experiments_utils.print_stats(endo_df, no_endo_df, text, qol_feature, cat=i)
    print()

Experiences bleeding_impact_social_events as often as never with endometriosis: 64 (27.9%), without endometriosis: 158 (48.9%).
Experiences bleeding_impact_social_events as often as rarely with endometriosis: 34 (14.8%), without endometriosis: 71 (22.0%).
Experiences bleeding_impact_social_events as often as sometimes with endometriosis: 66 (28.8%), without endometriosis: 56 (17.3%).
Experiences bleeding_impact_social_events as often as often with endometriosis: 55 (24.0%), without endometriosis: 28 (8.7%).
Experiences bleeding_impact_social_events as often as always with endometriosis: 9 (3.9%), without endometriosis: 7 (2.2%).

Experiences bleeding_impact_home_jobs as often as never with endometriosis: 59 (25.8%), without endometriosis: 142 (44.0%).
Experiences bleeding_impact_home_jobs as often as rarely with endometriosis: 26 (11.4%), without endometriosis: 62 (19.2%).
Experiences bleeding_impact_home_jobs as often as sometimes with endometriosis: 65 (28.4%), without endometriosis:

### Psychological Strain

In [101]:
for i in pain_frequency_map.values():
    text = f"Experiences mood swings as often as {i.lower()}"
    experiments_utils.print_stats(
        endo_df, no_endo_df, text, "experiences_mood_swings", cat=i
    )

Experiences mood swings as often as never with endometriosis: 1 (0.4%), without endometriosis: 15 (4.6%).
Experiences mood swings as often as rarely with endometriosis: 19 (8.3%), without endometriosis: 44 (13.6%).
Experiences mood swings as often as sometimes with endometriosis: 86 (37.6%), without endometriosis: 124 (38.4%).
Experiences mood swings as often as often with endometriosis: 85 (37.1%), without endometriosis: 87 (26.9%).
Experiences mood swings as often as always with endometriosis: 37 (16.2%), without endometriosis: 53 (16.4%).


In [102]:
for i in pain_frequency_map.values():
    text = f"Unable to cope with pain as often as {i.lower()}"
    experiments_utils.print_stats(
        endo_df, no_endo_df, text, "unable_to_cope_with_pain", cat=i
    )

Unable to cope with pain as often as never with endometriosis: 4 (1.7%), without endometriosis: 70 (21.7%).
Unable to cope with pain as often as rarely with endometriosis: 20 (8.7%), without endometriosis: 96 (29.7%).
Unable to cope with pain as often as sometimes with endometriosis: 86 (37.6%), without endometriosis: 94 (29.1%).
Unable to cope with pain as often as often with endometriosis: 93 (40.6%), without endometriosis: 50 (15.5%).
Unable to cope with pain as often as always with endometriosis: 26 (11.4%), without endometriosis: 11 (3.4%).
