In [1]:
import pandas as pd 
import numpy as np

DICT_TO_CHANGE_DIMENSIONS = {
    "ImmuneSystem": "BloodCells",
    "\*": "set",
    "*instances01": "set_instances01",
    "*instances1.5x": "set_instances1.5x",
    "*instances23": "set_instances23",
}
DICT_TO_CHANGE_CATEGORIES = {
    "HeartSize": "HeartFunction",
    "AnthropometryImpedance": "Impedance",
    "AnthropometryBodySize": "Anthropometry",
    "Claudification": "Claudication",
}

CAMEL_TO_SNAKE = {"ElasticNet": "elastic_net", "LightGbm": "light_gbm", "NeuralNetwork": "neural_network"}

list_correlations = []

for correlation_type in ["Pearson", "Spearman"]:
    for algorithm in ["ElasticNet", "LightGbm", "NeuralNetwork"]:
        correlations = pd.read_csv(f"../data/page8_MultivariateXWASCorrelations/CorrelationsMultivariate/CorrelationsMultivariate_{correlation_type}_{algorithm}.csv", index_col=0)
        
        correlations.rename(columns={"env_dataset": "category", "organ_1": "dimension_1", "organ_2": "dimension_2", "corr": "correlation", "sample_size": "number_variables"}, inplace=True)
        correlations.replace(DICT_TO_CHANGE_DIMENSIONS, inplace=True)

        correlations["category"] = list(map(lambda list_category:  list_category[1] if len(list_category) > 1 else list_category[0], correlations["category"].str.split("_")))

        correlations.replace(DICT_TO_CHANGE_CATEGORIES, inplace=True)

        correlations["algorithm"] = CAMEL_TO_SNAKE[algorithm]
        correlations["correlation_type"] = correlation_type.lower()

        list_correlations.append(correlations)
every_correlation = pd.concat(list_correlations).set_index(["algorithm", "correlation_type"])

In [2]:
list_columns = []
for algorithm in ["elastic_net", "light_gbm", "neural_network"]:
    for correlation_type in ["pearson", "spearman", "number_variables"]:
        list_columns.append([algorithm, correlation_type])
columns = pd.MultiIndex.from_tuples(list_columns)
correlations_raw = pd.DataFrame(None, index=every_correlation.set_index(["dimension_1", "dimension_2", "category"]).index.drop_duplicates(), columns=columns)

In [3]:
for algorithm in ["elastic_net", "light_gbm", "neural_network"]:
    for correlation_type in ["pearson", "spearman"]:
        correlations_raw[(algorithm, correlation_type)] = every_correlation.loc[(algorithm, correlation_type)].reset_index(drop=True).set_index(["dimension_1", "dimension_2", "category"])["correlation"]

        correlations_raw[(algorithm, "number_variables")] = every_correlation.loc[(algorithm, correlation_type)].reset_index(drop=True).set_index(["dimension_1", "dimension_2", "category"])["number_variables"]



In [5]:
correlations_raw.columns = map(str, correlations_raw.columns.tolist())
correlations_raw.reset_index()# .to_feather("../data/xwas/multivariate_correlations/correlations.feather")

Unnamed: 0,dimension_1,dimension_2,category,"('elastic_net', 'pearson')","('elastic_net', 'spearman')","('elastic_net', 'number_variables')","('light_gbm', 'pearson')","('light_gbm', 'spearman')","('light_gbm', 'number_variables')","('neural_network', 'pearson')","('neural_network', 'spearman')","('neural_network', 'number_variables')"
0,set,set,Alcohol,1.000000,1.000000,41,1.000000,1.000000,41,1.000000,1.000000,41
1,set,set_instances01,Alcohol,0.417034,0.320990,41,0.813815,0.812489,41,0.803148,0.594180,41
2,set,set_instances1.5x,Alcohol,0.994724,0.929577,41,0.967495,0.946538,41,0.966550,0.907317,41
3,set,set_instances23,Alcohol,0.206256,0.247385,41,0.809886,0.649891,41,0.139374,0.520872,41
4,set,Abdomen,Alcohol,0.505619,0.320144,41,0.865277,0.893658,41,0.740531,0.520906,41
...,...,...,...,...,...,...,...,...,...,...,...,...
60979,PhysicalActivity,MusculoskeletalHips,CognitiveTrailMaking,0.710257,0.744529,30,0.954860,0.939878,30,0.716040,0.694849,30
60980,PhysicalActivity,MusculoskeletalKnees,CognitiveTrailMaking,0.593993,0.597667,30,0.964185,0.885759,30,0.711686,0.501724,30
60981,PhysicalActivity,MusculoskeletalScalars,CognitiveTrailMaking,0.729431,0.398608,30,0.886786,0.823686,30,0.190002,0.579375,30
60982,PhysicalActivity,MusculoskeletalSpine,CognitiveTrailMaking,0.610758,0.706087,30,0.966147,0.998736,30,0.746994,0.597397,30
