In [1]:
import pandas as pd
import numpy as np
from scipy import stats


from dash_website.utils.graphs import heatmap_by_sorted_dimensions, add_custom_legend_axis
from dash_website.utils.graphs import heatmap_by_sorted_dimensions, add_custom_legend_axis
from dash_website import (
    ORDER_DIMENSIONS,
    CUSTOM_ORDER,
)

subset_method = "union"
correlation_type = "pearson"
first_category = "Genetics"
second_category = "Phenotypic"

data_comparison_upper = pd.read_feather(f"../../all_data/xwas/univariate_correlations/correlations/categories/correlations_{first_category}.feather").to_dict()
data_comparison_lower = pd.read_feather(f"../../all_data/xwas/univariate_correlations/correlations/categories/correlations_{second_category}.feather").to_dict()

In [2]:
def get_table_and_customdata(data_comparison, subset_method, correlation_type):
    correlations_raw = pd.DataFrame(data_comparison).set_index(
        ["dimension_1", "subdimension_1", "r2_1", "r2_std_1", "dimension_2", "subdimension_2", "r2_2", "r2_std_2"]
    )
    correlations_raw.columns = pd.MultiIndex.from_tuples(
        list(map(eval, correlations_raw.columns.tolist())), names=["subset_method", "correlation_type"]
    )
    correlations = correlations_raw[[(subset_method, correlation_type), (subset_method, "number_variables")]]
    correlations.columns = ["correlation", "number_variables"]
    correlations.reset_index(inplace=True)

    table_correlations = correlations.pivot(
        index=["dimension_1", "subdimension_1"],
        columns=["dimension_2", "subdimension_2"],
        values="correlation",
    ).loc[ORDER_DIMENSIONS, ORDER_DIMENSIONS]

    customdata_list = []
    for customdata_item in ["r2_1", "r2_std_1", "r2_2", "r2_std_2", "number_variables"]:
        customdata_list.append(
            correlations.pivot(
                index=["dimension_1", "subdimension_1"],
                columns=["dimension_2", "subdimension_2"],
                values=customdata_item,
            )
            .loc[ORDER_DIMENSIONS, ORDER_DIMENSIONS]
            .values
        )
    stacked_customdata = list(map(list, np.dstack(customdata_list)))

    customdata = pd.DataFrame(None, index=ORDER_DIMENSIONS, columns=ORDER_DIMENSIONS)
    customdata[customdata.columns] = stacked_customdata

    return table_correlations, customdata, correlations

In [3]:
import plotly.graph_objs as go

table_correlations_upper, customdata_upper, correlations_upper = get_table_and_customdata(
    data_comparison_upper, subset_method, correlation_type
)
np.fill_diagonal(table_correlations_upper.values, np.nan)

table_correlations_lower, customdata_lower, correlations_lower = get_table_and_customdata(
    data_comparison_lower, subset_method, correlation_type
)
np.fill_diagonal(table_correlations_lower.values, np.nan)

fig_points = go.Figure()

hovertemplate_points = "Dimension 1: %{customdata[0]}, Subdimension 1: %{customdata[1]}<br>Dimenions 2: %{customdata[2]}, Subdimension 2: %{customdata[3]}<Br>Correlation first category %{x:.3f}, Correlation second category %{y:.3f}<extra></extra>"

y_points = correlations_lower.set_index(["dimension_1", "subdimension_1", "dimension_2", "subdimension_2"])[
    "correlation"
]
y_points[
    (y_points.index.get_level_values("dimension_1") == y_points.index.get_level_values("dimension_2"))
    & (y_points.index.get_level_values("subdimension_1") == y_points.index.get_level_values("subdimension_2"))
] = np.nan

x_points = correlations_upper.set_index(["dimension_1", "subdimension_1", "dimension_2", "subdimension_2"]).loc[
    y_points.index, "correlation"
]
x_points[
    (x_points.index.get_level_values("dimension_1") == x_points.index.get_level_values("dimension_2"))
    & (x_points.index.get_level_values("subdimension_1") == x_points.index.get_level_values("subdimension_2"))
] = np.nan

customdata_points = list(map(list, y_points.index.values))

fig_points = go.Figure()
fig_points.add_scatter(x=[-1.1, 1.1], y=[-1.1, 1.1], mode="lines", name="perfect correlation line")

fig_points.add_scatter(
    x=x_points.values,
    y=y_points.values,
    mode="markers",
    customdata=customdata_points,
    hovertemplate=hovertemplate_points,
    marker={"size": 5},
    name=f"{first_category} vs. {second_category} correlation for one aging dimension",
)
correlation_of_correlations, p_value_of_correlations = stats.pearsonr(
    x_points[x_points.notna() & y_points.notna()], y_points[x_points.notna() & y_points.notna()]
)

fig_points.update_layout(
    yaxis={"title": f"{second_category} correlation", "range": [-1.1, 1.1], "showgrid": False},
    xaxis={"title": f"{first_category} correlation", "range": [-1.1, 1.1], "showgrid": False},
    width=1500,
    height=1500,
)

sorted_dimensions = (
    correlations_upper.set_index(["dimension_1", "subdimension_1"]).loc[CUSTOM_ORDER].index.drop_duplicates()
)

triangular_heatmap_values = np.triu(table_correlations_upper)
triangular_heatmap_values += np.tril(table_correlations_lower, k=1)
triangular_heatmap = pd.DataFrame(
    triangular_heatmap_values, index=table_correlations_upper.index, columns=table_correlations_upper.columns
)

customdata_triangular_values = np.triu(customdata_upper)
customdata_triangular_values += np.tril(customdata_lower, k=1)
customdata_triangular = pd.DataFrame(
    customdata_triangular_values, index=table_correlations_upper.index, columns=table_correlations_upper.columns
)

sorted_triangular_heatmap = triangular_heatmap.loc[sorted_dimensions, sorted_dimensions]
sorted_customdata_triangular = customdata_triangular.loc[sorted_dimensions, sorted_dimensions]

hovertemplate_triangular = "Correlation: %{z:.3f} <br><br>Dimensions 1: %{x} <br>R2: %{customdata[0]:.3f} +- %{customdata[1]:.3f} <br>Dimensions 2: %{y}<br>R2: %{customdata[2]:.3f} +- %{customdata[3]:.3f} <br>Number variables: %{customdata[4]}<br><extra></extra>"

fig_triangular = heatmap_by_sorted_dimensions(
    sorted_triangular_heatmap, hovertemplate_triangular, sorted_customdata_triangular
)
fig_triangular = add_custom_legend_axis(
    fig_triangular, sorted_triangular_heatmap, size_dimension=14, size_subdimension=12
)
fig_triangular.update_layout(
    yaxis={"title": f"{second_category} correlation", "showgrid": False, "zeroline": False},
    xaxis={"title": f"{first_category} correlation", "showgrid": False, "zeroline": False},
    width=1500,
    height=1500,
)

difference_heatmap = table_correlations_upper - table_correlations_lower

sorted_difference_heatmap = difference_heatmap.loc[sorted_dimensions, sorted_dimensions]
sorted_customdata_upper = customdata_upper.loc[sorted_dimensions, sorted_dimensions]

hovertemplate_difference = "Difference in correlation: %{z:.3f} <br><br>Dimensions 1: %{x} <br>R2: %{customdata[0]:.3f} +- %{customdata[1]:.3f} <br>Dimensions 2: %{y}<br>R2: %{customdata[2]:.3f} +- %{customdata[3]:.3f}<br><extra></extra>"

fig_difference = heatmap_by_sorted_dimensions(
    sorted_difference_heatmap, hovertemplate_difference, sorted_customdata_upper, zmin=-2, zmax=2
)
fig_difference = add_custom_legend_axis(
    fig_difference, sorted_difference_heatmap, size_dimension=14, size_subdimension=12
)
fig_difference.update_layout(
    yaxis={"showgrid": False, "zeroline": False},
    xaxis={"showgrid": False, "zeroline": False},
    width=1500,
    height=1500,
)

None

In [18]:
sorted_triangular_heatmap["Brain"]

Unnamed: 0_level_0,subdimension_1,*,Cognitive,MRI
dimension_1,subdimension_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
*,*,,,
*instances01,*,,,
*instances1.5x,*,-0.191328,-0.087364,-0.396896
*instances23,*,0.710316,0.397386,0.768987
Brain,*,,0.714376,0.957501
Brain,Cognitive,0.41092,,0.130928
Brain,MRI,0.948826,0.112195,
Eyes,*,0.099258,0.015905,0.093376
Eyes,All,,,
Eyes,Fundus,0.084013,0.041154,0.083549


In [21]:
triangular_heatmap #.loc[("Arterial", "*"), "Brain"]

Unnamed: 0_level_0,Unnamed: 1_level_0,*,*instances01,*instances1.5x,*instances23,Abdomen,Abdomen,Abdomen,Arterial,Arterial,Arterial,...,Heart,Heart,Lungs,Musculoskeletal,Musculoskeletal,Musculoskeletal,Musculoskeletal,Musculoskeletal,Musculoskeletal,PhysicalActivity
Unnamed: 0_level_1,Unnamed: 1_level_1,*,*,*,*,*,Liver,Pancreas,*,Carotids,PulseWaveAnalysis,...,ECG,MRI,*,*,FullBody,Hips,Knees,Scalars,Spine,*
*,*,,,,,,,,,,,...,,,,,,,,,,
*instances01,*,0.838413,,0.995208,,,,,,,,...,0.996617,-1.0,,,,,,,,0.986415
*instances1.5x,*,0.998501,0.008787,,,,-0.197093,0.671523,1.0,,0.108788,...,0.451731,0.2371,0.102739,0.054674,0.016591,-0.067616,0.098903,0.068212,-0.371051,0.999015
*instances23,*,0.774242,0.551438,0.180503,,1.502254,1.0,0.96622,0.016989,-0.217423,0.357011,...,1.0,0.786852,0.664485,1.0,0.792833,1.0,,0.125398,0.031988,
Abdomen,*,0.375136,0.368087,0.15509,0.502254,,1.85143,0.960883,,,,...,,,,0.57271,0.520912,0.249912,0.234045,0.094929,0.553833,
Abdomen,Liver,0.342234,0.339481,0.134956,0.447073,0.883755,,1.389253,,,,...,,,,0.540991,0.50014,0.245894,0.250905,0.097693,0.512137,-0.177173
Abdomen,Pancreas,0.320634,0.31948,0.143712,0.428799,0.86037,0.525806,,,,,...,,,,0.540319,0.512409,0.205965,0.267465,0.072721,0.484023,0.656232
Arterial,*,0.356929,0.283191,0.109654,0.423142,0.200691,0.178282,0.173652,,,0.784125,...,0.310173,0.195234,,0.195871,0.187398,0.183354,0.081748,-0.030638,0.323916,1.0
Arterial,Carotids,0.338114,0.255188,0.084547,0.394605,0.207675,0.187425,0.177024,0.94078,,,...,,,,,,,,,,
Arterial,PulseWaveAnalysis,0.222601,0.330039,0.12803,0.189965,0.084459,0.070353,0.074557,0.404531,0.163522,,...,0.248931,0.192065,,0.063606,0.120481,-0.034109,0.024407,0.316102,0.017941,0.109895
