In [2]:
import pandas as pd
from tqdm import tqdm
from dash_website import DIMENSIONS, MAIN_CATEGORIES_TO_CATEGORIES


MAIN_DIMENSIONS = [
    "Abdomen",
    "Musculoskeletal",
    "Lungs",
    "Eyes",
    "Heart",
    "Arterial",
    "Brain",
    "Biochemistry",
    "Hearing",
    "BloodCells",
    "PhysicalActivity",
]

PAIRS_MAIN_DIMENSIONS = [
    [main_dim_1, main_dim_2]
    for idx_dim, main_dim_1 in enumerate(MAIN_DIMENSIONS)
    for main_dim_2 in MAIN_DIMENSIONS[idx_dim + 1 :]
]

PAIRS_SUBDIMENSIONS = [
    ["BrainMRI", "BrainCognitive"],
    ["EyesOCT", "EyesFundus"],
    ["HeartECG", "HeartMRI"],
    ["AbdomenLiver", "AbdomenPancreas"],
    ["BiochemistryBlood", "BiochemistryUrine"],
    ["MusculoskeletalScalars", "MusculoskeletalFullBody"],
    ["MusculoskeletalScalars", "MusculoskeletalSpine"],
    ["MusculoskeletalScalars", "MusculoskeletalHips"],
    ["MusculoskeletalScalars", "MusculoskeletalKnees"],
    ["MusculoskeletalFullBody", "MusculoskeletalSpine"],
    ["MusculoskeletalFullBody", "MusculoskeletalHips"],
    ["MusculoskeletalFullBody", "MusculoskeletalKnees"],
    ["MusculoskeletalSpine", "MusculoskeletalHips"],
    ["MusculoskeletalSpine", "MusculoskeletalKnees"],
    ["MusculoskeletalHips", "MusculoskeletalKnees"],
]
DIMENSIONS_TO_EXCLUDE = {
    "set": [],
    "set_instances01": [],
    "set_instances1.5x": [],
    "set_instances23": [],
    "Abdomen": ["AbdomenLiver", "AbdomenPancreas"],
    "AbdomenLiver": ["Abdomen"],
    "AbdomenPancreas": ["Abdomen"],
    "Arterial": ["ArterialCarotids", "ArterialPulseWaveAnalysis"],
    "ArterialCarotids": ["Arterial"],
    "ArterialPulseWaveAnalysis": ["Arterial"],
    "Biochemistry": ["BiochemistryBlood", "BiochemistryUrine"],
    "BiochemistryBlood": ["Biochemistry"],
    "BiochemistryUrine": ["Biochemistry"],
    "Brain": ["BrainCognitive", "BrainMRI"],
    "BrainCognitive": ["Brain"],
    "BrainMRI": ["Brain"],
    "Eyes": ["EyesAll", "EyesFundus", "EyesOCT"],
    "EyesAll": ["Eyes"],
    "EyesFundus": ["Eyes"],
    "EyesOCT": ["Eyes"],
    "Hearing": [],
    "Heart": ["HeartECG", "HeartMRI"],
    "HeartECG": ["Heart"],
    "HeartMRI": ["Heart"],
    "BloodCells": [],
    "Lungs": [],
    "Musculoskeletal": [
        "MusculoskeletalFullBody",
        "MusculoskeletalHips",
        "MusculoskeletalKnees",
        "MusculoskeletalScalars",
        "MusculoskeletalSpine",
    ],
    "MusculoskeletalFullBody": ["Musculoskeletal"],
    "MusculoskeletalHips": ["Musculoskeletal"],
    "MusculoskeletalKnees": ["Musculoskeletal"],
    "MusculoskeletalScalars": ["Musculoskeletal"],
    "MusculoskeletalSpine": ["Musculoskeletal"],
    "PhysicalActivity": [],
}

In [24]:
correlations_raw = pd.read_feather(f"../../data/xwas/multivariate_correlations/correlations/correlations.feather").set_index(
    ["dimension_1", "dimension_2", "category"]
)
correlations_raw.columns = pd.MultiIndex.from_tuples(
    list(map(eval, correlations_raw.columns.tolist())), names=["algorithm", "correlation_type"]
)
every_category = correlations_raw.index.get_level_values("category").drop_duplicates()

list_indexes = []
for dimension in DIMENSIONS + ["MainDimensions", "SubDimensions"]:
    for category in every_category:
        list_indexes.append([dimension, category])
indexes = pd.MultiIndex.from_tuples(list_indexes, names=["dimension", "category"])

list_columns = []
for algorithm in ["elastic_net", "light_gbm", "neural_network"]:
    for correlation_type in ["pearson", "spearman"]:
        for observation in ["mean", "std"]:
            list_columns.append([algorithm, correlation_type, observation])
columns = pd.MultiIndex.from_tuples(list_columns, names=["algorithm", "correlation_type", "observation"])

averages_correlations = pd.DataFrame(None, index=indexes, columns=columns)

In [27]:
for algorithm in ["elastic_net", "light_gbm", "neural_network"]:
    for correlation_type in ["pearson", "spearman"]:
        correlations = correlations_raw[algorithm, correlation_type].swaplevel().swaplevel(i=0, j=1)

        for category in every_category:
            correlations_category = correlations.loc[category]

            averages_correlations.loc[
                ("MainDimensions", category), (algorithm, correlation_type, "mean")
            ] = correlations_category.loc[PAIRS_MAIN_DIMENSIONS].mean()
            averages_correlations.loc[
                ("MainDimensions", category), (algorithm, correlation_type, "std")
            ] = correlations_category.loc[PAIRS_MAIN_DIMENSIONS].std()

            averages_correlations.loc[
                ("SubDimensions", category), (algorithm, correlation_type, "mean")
            ] = correlations_category.loc[PAIRS_SUBDIMENSIONS].mean()
            averages_correlations.loc[
                ("SubDimensions", category), (algorithm, correlation_type, "std")
            ] = correlations_category.loc[PAIRS_SUBDIMENSIONS].std()

            for dimension in DIMENSIONS:
                correlations_independant = correlations_category.loc[dimension].drop(
                    index=([dimension] + DIMENSIONS_TO_EXCLUDE[dimension])
                )

                averages_correlations.loc[
                    (dimension, category), (algorithm, correlation_type, "mean")
                ] = correlations_independant.mean()
                averages_correlations.loc[
                    (dimension, category), (algorithm, correlation_type, "std")
                ] = correlations_independant.std()

averages_correlations.columns = map(str, averages_correlations.columns.tolist())
averages_correlations.reset_index().to_feather("data/xwas/multivariate_correlations/averages_correlations.feather")

Unnamed: 0_level_0,Unnamed: 1_level_0,"('elastic_net', 'pearson', 'mean')","('elastic_net', 'pearson', 'std')","('elastic_net', 'spearman', 'mean')","('elastic_net', 'spearman', 'std')","('light_gbm', 'pearson', 'mean')","('light_gbm', 'pearson', 'std')","('light_gbm', 'spearman', 'mean')","('light_gbm', 'spearman', 'std')","('neural_network', 'pearson', 'mean')","('neural_network', 'pearson', 'std')","('neural_network', 'spearman', 'mean')","('neural_network', 'spearman', 'std')"
dimension,category,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Abdomen,Alcohol,0.589633,0.135176,0.516687,0.156497,0.953293,0.026715,0.868831,0.056289,0.662819,0.158285,0.56272,0.104691
Abdomen,Diet,0.540979,0.159024,0.303607,0.14938,0.928008,0.026649,0.834175,0.051694,0.637922,0.283737,0.484614,0.193321
Abdomen,Education,0.684668,0.232164,0.493342,0.154985,0.953526,0.042885,0.753642,0.11993,0.791374,0.180909,0.614602,0.138902
Abdomen,ElectronicDevices,0.605466,0.239317,0.479922,0.159897,0.911997,0.054766,0.813139,0.105502,0.763396,0.218262,0.547445,0.165323
Abdomen,Employment,0.673773,0.185456,0.562271,0.155138,0.93709,0.04325,0.788158,0.076812,0.749394,0.138959,0.484448,0.12066
...,...,...,...,...,...,...,...,...,...,...,...,...,...
SubDimensions,CognitiveProspectiveMemory,0.658322,0.225951,0.597765,0.127194,0.954199,0.0309,0.750881,0.105935,0.583366,0.373558,,
SubDimensions,CognitiveReactionTime,0.755864,0.114107,0.645017,0.1654,0.979029,0.012444,0.882669,0.04597,0.725264,0.206096,,
SubDimensions,CognitiveSymbolDigitSubstitution,0.672218,0.143121,0.602311,0.166639,0.946415,0.057696,0.876349,0.077321,0.676962,0.222827,,
SubDimensions,CognitiveTowerRearranging,0.667571,0.145429,0.562724,0.17765,0.93298,0.050963,0.857145,0.072003,0.667946,0.257131,,
