In [1]:
import pandas as pd

DATA_TYPE_NAMING = {
    "instances": "all_samples_per_participant",
    "eids": "average_per_participant",
    "*": "all_samples_when_possible_otherwise_average",
}
COLUMNS_TO_TAKE = {
    "organ": "dimension",
    "view": "subdimension",
    "R-Squared_all": "r2",
    "R-Squared_sd_all": "r2_std",
}
DICT_TO_CHANGE_DIMENSIONS = {"ImmuneSystem": "BloodCells"}


for data_type in ["instances", "eids"]:
    correlations_raw_ = pd.read_csv(
        f"page4_correlations/ResidualsCorrelations/ResidualsCorrelations_{data_type}_Age_test.csv"
    )
    correlations_std_raw_ = pd.read_csv(
        f"page4_correlations/ResidualsCorrelations/ResidualsCorrelations_sd_{data_type}_Age_test.csv"
    )

    correlations_raw = correlations_raw_.melt(
        id_vars=["Unnamed: 0"], value_vars=correlations_raw_.columns.drop("Unnamed: 0")
    )
    correlations_raw.rename(
        columns={"Unnamed: 0": "dimensions_1", "variable": "dimensions_2", "value": "correlation"}, inplace=True
    )

    correlations_std_raw = correlations_std_raw_.melt(
        id_vars=["Unnamed: 0"], value_vars=correlations_std_raw_.columns.drop("Unnamed: 0")
    )
    correlations_std_raw.rename(
        columns={"Unnamed: 0": "dimensions_1", "variable": "dimensions_2", "value": "correlation"}, inplace=True
    )

    correlations = pd.DataFrame(
        None,
        columns=[
            "dimension_1",
            "subdimension_1",
            "sub_subdimension_1",
            "algorithm_1",
            "r2_1",
            "r2_std_1",
            "dimension_2",
            "subdimension_2",
            "sub_subdimension_2",
            "algorithm_2",
            "r2_2",
            "r2_std_2",
            "correlation",
            "correlation_std",
        ],
    )

    for dimension_index in ["1", "2"]:
        split_residual = correlations_raw[f"dimensions_{dimension_index}"].str.split("_")

        correlations[f"dimension_{dimension_index}"] = split_residual.apply(
            lambda list_information: list_information[0]
        )
        correlations[f"subdimension_{dimension_index}"] = split_residual.apply(
            lambda list_information: list_information[1]
        )
        correlations[f"sub_subdimension_{dimension_index}"] = split_residual.apply(
            lambda list_information: list_information[2]
        )
        correlations[f"algorithm_{dimension_index}"] = split_residual.apply(
            lambda list_information: list_information[3]
        )

    correlations["correlation"] = correlations_raw["correlation"]
    correlations["correlation_std"] = correlations_std_raw["correlation"]

    scores_raw = pd.read_csv(
        f"page2_predictions/Performances/PERFORMANCES_withEnsembles_alphabetical_{data_type}_Age_test.csv"
    )[COLUMNS_TO_TAKE].rename(columns=COLUMNS_TO_TAKE)
    scores_raw.set_index(["dimension", "subdimension", "sub_subdimension", "algorithm"], inplace=True)

    correlations.set_index(["dimension_1", "subdimension_1", "sub_subdimension_1", "algorithm_1"], inplace=True)
    correlations[["r2_1", "r2_std_1"]] = scores_raw[["r2", "r2_std"]]

    correlations.reset_index(inplace=True)
    correlations.set_index(["dimension_2", "subdimension_2", "sub_subdimension_2", "algorithm_2"], inplace=True)
    correlations[["r2_2", "r2_std_2"]] = scores_raw[["r2", "r2_std"]]

    correlations.reset_index(inplace=True)

    correlations.loc[
        (correlations["dimension_1"] == "Musculoskeletal") & (correlations["sub_subdimension_1"] == "MRI"),
        "sub_subdimension_1",
    ] = "DXA"
    correlations.loc[
        (correlations["dimension_2"] == "Musculoskeletal") & (correlations["sub_subdimension_2"] == "MRI"),
        "sub_subdimension_2",
    ] = "DXA"

    correlations.replace({"ImmuneSystem": "BloodCells"}).to_feather(
        f"all_data/correlation_between_accelerated_aging_dimensions/all_dimensions_{DATA_TYPE_NAMING[data_type]}.feather"
    )


In [30]:
scores_raw = pd.read_csv(
        f"../../all_data/page2_predictions/Performances/PERFORMANCES_withEnsembles_alphabetical_instances_Age_test.csv"
    )[COLUMNS_TO_TAKE].rename(columns=COLUMNS_TO_TAKE)
scores_raw.set_index(["dimension", "subdimension"]).loc["Hearing"]

Unnamed: 0_level_0,r2,r2_std
subdimension,Unnamed: 1_level_1,Unnamed: 2_level_1
*,0.314345,0.001659
HearingTest,0.314335,0.001657
HearingTest,0.314416,0.001653
HearingTest,0.255066,0.002363
HearingTest,0.311409,0.001718
HearingTest,0.309865,0.001655


In [6]:
import pandas as pd 

COLUMNS_TO_TAKE = {
    "organ": "dimension",
    "view": "subdimension",
    "R-Squared_all": "r2",
    "R-Squared_sd_all": "r2_std",
}


scores_raw = pd.read_csv(
        f"../../all_data/page2_predictions/Performances/PERFORMANCES_bestmodels_alphabetical_instances_Age_test.csv"
    )[COLUMNS_TO_TAKE].rename(columns=COLUMNS_TO_TAKE)
# .set_index(["dimension", "subdimension"], inplace=True)
scores_raw

Unnamed: 0,dimension,subdimension,r2,r2_std
0,*,*,0.660751,0.001829
1,*instances01,*,0.869851,0.003602
2,*instances1.5x,*,0.635053,0.001931
3,*instances23,*,0.900849,0.003993
4,Abdomen,*,0.763341,0.002007
5,Abdomen,Liver,0.714937,0.002142
6,Abdomen,Pancreas,0.702804,0.002529
7,Arterial,*,0.671142,0.006004
8,Arterial,Carotids,0.648211,0.005648
9,Arterial,PulseWaveAnalysis,0.412802,0.001641


In [9]:
squeeze_dimensions = scores_raw["dimension"] + scores_raw["subdimension"].replace("*", "")

In [32]:
correlations_raw["dimensions_1"][~correlations_raw["dimensions_1"].isin(squeeze_dimensions)].drop_duplicates()

20    Hearing
25      Lungs
Name: dimensions_1, dtype: object

In [17]:
correlations_raw_ = pd.read_csv("../../all_data/page4_correlations/ResidualsCorrelations/ResidualsCorrelations_bestmodels_instances_Age_test.csv")
correlations_raw = correlations_raw_.melt(id_vars=["Unnamed: 0"], value_vars=correlations_raw_.columns.drop("Unnamed: 0"))
correlations_raw.rename(columns={"Unnamed: 0": "dimensions_1", "variable": "dimensions_2", "value": "correlation"}, inplace=True)
correlations_raw.set_index("dimensions_1")

Unnamed: 0_level_0,dimensions_2,correlation
dimensions_1,Unnamed: 1_level_1,Unnamed: 2_level_1
*,*,1.000000
*instances01,*,0.838413
*instances1.5x,*,0.998501
*instances23,*,0.774242
Abdomen,*,0.375136
...,...,...
MusculoskeletalHips,PhysicalActivity,
MusculoskeletalKnees,PhysicalActivity,
MusculoskeletalFullBody,PhysicalActivity,
MusculoskeletalScalars,PhysicalActivity,


In [33]:
correlations_raw["dimensions_1"].drop_duplicates().to_list()

CUSTOM_SUBDIMENSION = {'Abdomen':['*', 'Liver','Pancreas'], 'Arterial': ['*', 'PulseWaveAnalysis', 'Carotids'], 'Biochemistry' : ['*', 'Urine', 'Blood'], 'Brain': ['*', 'Cognitive','MRI'], 'Eyes':['*', 'All','Fundus','OCT'], 'Heart':['*', 'ECG','MRI'], 'Musculoskeletal':['*', 'Spine','Hips','Knees','FullBody','Scalars']}
CUSTOM_DIMENSION = ['*','*instances01','*instances1.5x','*instances23','Abdomen','Arterial','Biochemistry','Brain','Eyes','Hearing','Heart','ImmuneSystem','Lungs','Musculoskeletal','PhysicalActivity']

data_custom_dimensions = []
for dimension in CUSTOM_DIMENSION:
    for subdimension in CUSTOM_SUBDIMENSION.get(dimension, ["*"]):
        data_custom_dimensions.append([dimension, subdimension])

pd.DataFrame(data_custom_dimensions, columns=["dimension", "subdimension"])

Unnamed: 0,dimension,subdimension
0,*,*
1,*instances01,*
2,*instances1.5x,*
3,*instances23,*
4,Abdomen,*
5,Abdomen,Liver
6,Abdomen,Pancreas
7,Arterial,*
8,Arterial,PulseWaveAnalysis
9,Arterial,Carotids
