In [22]:
import pandas as pd
import numpy as np
from scipy import stats


from dash_website.utils.graphs import heatmap_by_sorted_dimensions, add_custom_legend_axis
from dash_website.utils.graphs import heatmap_by_sorted_dimensions, add_custom_legend_axis
from dash_website import (
    ORDER_DIMENSIONS,
    CUSTOM_ORDER,
)

subset_method = "union"
correlation_type = "pearson"
first_category = "Phenotypic"
second_category = "Genetics"

data_comparison_upper = pd.read_feather(f"../../all_data/xwas/univariate_correlations/correlations/categories/correlations_{first_category}.feather").to_dict()
data_comparison_lower = pd.read_feather(f"../../all_data/xwas/univariate_correlations/correlations/categories/correlations_{second_category}.feather").to_dict()

In [23]:
def get_table_and_customdata(data_comparison, subset_method, correlation_type):
    correlations_raw = pd.DataFrame(data_comparison).set_index(
        ["dimension_1", "subdimension_1", "r2_1", "r2_std_1", "dimension_2", "subdimension_2", "r2_2", "r2_std_2"]
    )
    correlations_raw.columns = pd.MultiIndex.from_tuples(
        list(map(eval, correlations_raw.columns.tolist())), names=["subset_method", "correlation_type"]
    )
    correlations = correlations_raw[[(subset_method, correlation_type), (subset_method, "number_variables")]]
    correlations.columns = ["correlation", "number_variables"]
    correlations.reset_index(inplace=True)

    table_correlations = correlations.pivot(
        index=["dimension_1", "subdimension_1"],
        columns=["dimension_2", "subdimension_2"],
        values="correlation",
    ).loc[ORDER_DIMENSIONS, ORDER_DIMENSIONS]

    customdata_list = []
    for customdata_item in ["r2_1", "r2_std_1", "r2_2", "r2_std_2", "number_variables"]:
        customdata_list.append(
            correlations.pivot(
                index=["dimension_1", "subdimension_1"],
                columns=["dimension_2", "subdimension_2"],
                values=customdata_item,
            )
            .loc[ORDER_DIMENSIONS, ORDER_DIMENSIONS]
            .values
        )
    stacked_customdata = list(map(list, np.dstack(customdata_list)))

    customdata = pd.DataFrame(None, index=ORDER_DIMENSIONS, columns=ORDER_DIMENSIONS)
    customdata[customdata.columns] = stacked_customdata

    return table_correlations, customdata, correlations

In [40]:
import plotly.graph_objs as go

table_correlations_upper, customdata_upper, correlations_upper = get_table_and_customdata(
    data_comparison_upper, subset_method, correlation_type
)
np.fill_diagonal(table_correlations_upper.values, np.nan)

table_correlations_lower, customdata_lower, correlations_lower = get_table_and_customdata(
    data_comparison_lower, subset_method, correlation_type
)
np.fill_diagonal(table_correlations_lower.values, np.nan)

fig_points = go.Figure()

hovertemplate_points = "Dimension 1: %{customdata[0]}, Subdimension 1: %{customdata[1]}<br>Dimenions 2: %{customdata[2]}, Subdimension 2: %{customdata[3]}<Br>Correlation first category %{x:.3f}, Correlation second category %{y:.3f}<extra></extra>"

y_points = correlations_lower.set_index(["dimension_1", "subdimension_1", "dimension_2", "subdimension_2"])[
    "correlation"
]
y_points[
    (y_points.index.get_level_values("dimension_1") == y_points.index.get_level_values("dimension_2"))
    & (y_points.index.get_level_values("subdimension_1") == y_points.index.get_level_values("subdimension_2"))
] = np.nan

x_points = correlations_upper.set_index(["dimension_1", "subdimension_1", "dimension_2", "subdimension_2"]).loc[
    y_points.index, "correlation"
]
x_points[
    (x_points.index.get_level_values("dimension_1") == x_points.index.get_level_values("dimension_2"))
    & (x_points.index.get_level_values("subdimension_1") == x_points.index.get_level_values("subdimension_2"))
] = np.nan

customdata_points = list(map(list, y_points.index.values))

fig_points = go.Figure()
fig_points.add_scatter(x=[-1.1, 1.1], y=[-1.1, 1.1], mode="lines", name="perfect correlation line")

fig_points.add_scatter(
    x=x_points.values,
    y=y_points.values,
    mode="markers",
    customdata=customdata_points,
    hovertemplate=hovertemplate_points,
    marker={"size": 5},
    name=f"{first_category} vs. {second_category} correlation for one aging dimension",
)
correlation_of_correlations, p_value_of_correlations = stats.pearsonr(
    x_points[x_points.notna() & y_points.notna()], y_points[x_points.notna() & y_points.notna()]
)

fig_points.update_layout(
    yaxis={"title": f"{second_category} correlation", "range": [-1.1, 1.1], "showgrid": False, "title_font":{"size": 25}},
    xaxis={"title": f"{first_category} correlation", "range": [0, 1.1], "showgrid": False, "title_font":{"size": 25}},
    width=1500,
    height=1500,
)

fig_points.show(config={"toImageButtonOptions": {"format": "svg"}})