In [27]:
import pandas as pd 

COLUMNS_TO_TAKE = {
    "organ": "dimension",
    "view": "subdimension",
    "R-Squared_all": "r2",
    "R-Squared_sd_all": "r2_std",
}
DICT_TO_CHANGE_DIMENSIONS = {"ImmuneSystem": "BloodCells"}

scores_raw = (
    pd.read_csv(
        f"../../all_data/page2_predictions/Performances/PERFORMANCES_bestmodels_alphabetical_instances_Age_test.csv"
    )[COLUMNS_TO_TAKE]
    .rename(columns=COLUMNS_TO_TAKE)
    .set_index("dimension")
)

ensembles_scores_raw = (
    pd.read_csv(
        f"../../all_data/page2_predictions/Performances/PERFORMANCES_withEnsembles_alphabetical_instances_Age_test.csv"
    )[COLUMNS_TO_TAKE]
    .rename(columns=COLUMNS_TO_TAKE)
    .set_index(["dimension", "subdimension"])
)
ensembles_scores_raw["subdimension"] = ensembles_scores_raw.index.get_level_values("subdimension")

for dimension_to_correct in ["Hearing", "Lungs"]:
    scores_raw.loc[dimension_to_correct, ["subdimension", "r2", "r2_std"]] = ensembles_scores_raw.loc[
        (dimension_to_correct, "*"), ["subdimension", "r2", "r2_std"]
    ].values[0]

scores = scores_raw.reset_index()
scores["squeezed_dimensions"] = scores["dimension"] + scores["subdimension"].replace("*", "")
scores.set_index("squeezed_dimensions", inplace=True)

correlations_raw_ = pd.read_csv("../../all_data/page17_GWASCorrelations/GWAS_correlations_Age.csv")
correlations_raw = correlations_raw_.melt(id_vars=["Unnamed: 0"], value_vars=correlations_raw_.columns.drop("Unnamed: 0")).rename(
            columns={
                "Unnamed: 0": "squeezed_dimensions_1",
                "variable": "squeezed_dimensions_2",
                "value": "correlation",
            }
        )

correlations_std_raw_ = pd.read_csv("../../all_data/page17_GWASCorrelations/GWAS_correlations_sd_Age.csv")
correlations_std_raw = correlations_std_raw_.melt(id_vars=["Unnamed: 0"], value_vars=correlations_raw_.columns.drop("Unnamed: 0")).rename(
    columns={
        "Unnamed: 0": "squeezed_dimensions_1",
        "variable": "squeezed_dimensions_2",
        "value": "correlation_std",
    }
)

heritability = pd.read_csv("../../all_data/page11_GWASHeritability/Heritability/GWAS_heritabilities_Age.csv").rename(columns={"Organ": "squeezed_dimensions", "h2_sd": "h2_std"})
heritability.drop(index=heritability["squeezed_dimensions"][heritability["squeezed_dimensions"].isna()].index, inplace=True)
heritability.set_index("squeezed_dimensions", inplace=True)

correlations = pd.DataFrame(
    None,
    columns=[
        "squeezed_dimensions_1",
        "dimension_1",
        "subdimension_1",
        "r2_1",
        "r2_std_1",
        "heritability_1",
        "heritability_std_1",
        "squeezed_dimensions_2",
        "dimension_2",
        "subdimension_2",
        "r2_2",
        "r2_std_2",
        "heritability_2",
        "heritability_std_2",
        "correlation",
        "correlation_std",
    ],
)

for idx_dimensions in ["1", "2"]:
    correlations[f"squeezed_dimensions_{idx_dimensions}"] = correlations_raw[
        f"squeezed_dimensions_{idx_dimensions}"
    ]
    correlations.set_index(f"squeezed_dimensions_{idx_dimensions}", inplace=True)
    correlations[f"dimension_{idx_dimensions}"] = scores["dimension"]
    correlations[f"subdimension_{idx_dimensions}"] = scores["subdimension"]
    correlations[f"r2_{idx_dimensions}"] = scores["r2"]
    correlations[f"r2_std_{idx_dimensions}"] = scores["r2_std"]
    correlations[f"h2_{idx_dimensions}"] = heritability["h2"]
    correlations[f"h2_std_{idx_dimensions}"] = heritability["h2_std"]
    correlations.reset_index(inplace=True)

correlations_raw.set_index(["squeezed_dimensions_1", "squeezed_dimensions_2"], inplace=True)
correlations_std_raw.set_index(["squeezed_dimensions_1", "squeezed_dimensions_2"], inplace=True)
correlations.set_index(["squeezed_dimensions_1", "squeezed_dimensions_2"], inplace=True)
correlations["correlation"] = correlations_raw["correlation"]
correlations["correlation_std"] = correlations_std_raw["correlation_std"]

correlations.reset_index(drop=True).replace(DICT_TO_CHANGE_DIMENSIONS)

Unnamed: 0,dimension_1,subdimension_1,r2_1,r2_std_1,heritability_1,heritability_std_1,dimension_2,subdimension_2,r2_2,r2_std_2,heritability_2,heritability_std_2,correlation,correlation_std,h2_1,h2_std_1,h2_2,h2_std_2
0,*,*,0.660751,0.001829,,,*,*,0.660751,0.001829,,,,,,,,
1,*instances01,*,0.869851,0.003602,,,*,*,0.660751,0.001829,,,,,,,,
2,*instances1.5x,*,0.635053,0.001931,,,*,*,0.660751,0.001829,,,,,0.124556,0.00874,,
3,*instances23,*,0.900849,0.003993,,,*,*,0.660751,0.001829,,,,,0.200518,0.22711,,
4,Brain,*,0.763918,0.002534,,,*,*,0.660751,0.001829,,,,,0.359140,0.02580,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1019,PhysicalActivity,*,0.635326,0.001947,,,BloodCells,*,0.124663,0.000862,,,,,0.123365,0.00874,0.180777,0.00202
1020,Biochemistry,*,0.507093,0.002615,,,BloodCells,*,0.124663,0.000862,,,0.094461,0.02889,0.255124,0.00952,0.180777,0.00202
1021,Biochemistry,Urine,0.104010,0.001485,,,BloodCells,*,0.124663,0.000862,,,-0.030999,0.02528,0.105166,0.00474,0.180777,0.00202
1022,Biochemistry,Blood,0.485563,0.001488,,,BloodCells,*,0.124663,0.000862,,,0.084629,0.01137,0.262295,0.00347,0.180777,0.00202
