In [1]:
import pandas as pd 
import numpy as np

from dash_website import (
    ALL_BIOMARKERS,
    ALL_PHENOTYPES,
    ALL_DISEASES,
    ALL_ENVIRONMENTAL,
    ALL_SOCIOECONOMICS,
    ALL_CATEGORIES,
)

all_correlations = pd.read_feather("../../data/xwas/univariate_results/linear_correlations_Abdomen.feather").set_index("index")
all_correlations.index = pd.MultiIndex.from_tuples(list(map(eval, all_correlations.index.tolist())), names=["category", "variable"])

category = "FamilyHistory"
category = "All_Biomarkers"
main_category = "Biomarkers"

In [2]:
if "All" in category:
    if main_category == "Biomarkers":
        correlations = all_correlations.loc[all_correlations.index.get_level_values(0).isin(ALL_BIOMARKERS)].copy()
else:
    correlations = all_correlations.loc[all_correlations.index.get_level_values(0).isin([category])].copy()

correlations.drop(index=correlations.index[(correlations["sample_size"] < 10)], inplace=True)

correlations["neg_log_p_value"] = - np.log10(correlations["p_value"])
correlations["category"] = correlations.index.get_level_values(0)
correlations["variable"] = correlations.index.get_level_values(1)

In [10]:
import plotly.express as px

fig = px.scatter(correlations, x="correlation", y="neg_log_p_value", custom_data=["variable", "p_value", "sample_size"], color="category", labels={"correlation": "Partial Correlation", "neg_log_p_value": "-log(p-value)", "category": "Categories:"}, title="Volcano plot")
fig.update_traces(
    hovertemplate="<br>".join([
        "Variable: %{customdata[0]}",
        "Partial Correlation: %{x:.3f}",
        "p-value: %{customdata[1]:.3E}",
        "Samples size: %{customdata[2]}",
    ])
)
fig.show()

In [9]:
import plotly.graph_objects as go

fig.add_trace(go.Scatter(x=[correlations["correlation"].min() - correlations["correlation"].std(), correlations["correlation"].max() + correlations["correlation"].std()], y=[-np.log10(0.05), -np.log10(0.05)], mode='lines', name = "No Correction"))

fig.add_trace(go.Scatter(x=[correlations["correlation"].min() - correlations["correlation"].std(), correlations["correlation"].max() + correlations["correlation"].std()], y=[-np.log10(0.05 / correlations.shape[0]), -np.log10(0.05 / correlations.shape[0])], mode='lines', name = "With Bonferoni Correction"))

In [None]:
        fig['data'].append(
            Scatter(x = [res['corr_value'].min() - res['corr_value'].std(), res['corr_value'].max() + res['corr_value'].std()],
                       y = [-np.log10((5/100)), -np.log10((5/100))],
                       name = 'No Correction',
                       mode = 'lines'))
        fig['data'].append(
            Scatter(x = [res['corr_value'].min() - res['corr_value'].std(), res['corr_value'].max() + res['corr_value'].std()],
                       y = [-np.log10((5/100)/num_tests), -np.log10((5/100)/num_tests)],
                       name = 'With Bonferoni Correction',
                       mode = 'lines'))

In [None]:
import plotly.express as px

correlations = all_correlations.loc[all_correlations["sample_size"] >= 10], value_category]

hovertemplate = "Variable : %{customdata[0]}\
                    <br> X subcategory : %{customdata[1]}\
                    <br>p-value : %{customdata[2]:.3E:.3f}\
                    <br>Partial correlation : %{x:.3f}\
                    <br>Sample size : %{customdata[3]}"
customdata = np.stack(
    [
        correlations.columns.get_level_values(1),
        correlations.columns.get_level_values(0),
        correlations.loc["p_value"],
        correlations.loc["sample_size"]
    ],
    axis=-1,
)

Scatter(
    x=correlations.loc["correlation"],
    y=-np.log10(correlations.loc["p_value"]),
    mode="markers",
    name=value_category,
    hovertemplate=hovertemplate,
    customdata=customdata,
)

In [2]:
fig = {
    "layout": dict(
        title="Volcano plot", xaxis_title="Partial correlation", yaxis_title="-log(p_value)"
    )
}

In [3]:
correlations = all_correlations.loc[all_correlations["sample_size"] >= 10], value_category]

hovertemplate = "Variable : %{customdata[0]}\
                    <br> X subcategory : %{customdata[1]}\
                    <br>p-value : %{customdata[2]:.3E:.3f}\
                    <br>Partial correlation : %{x:.3f}\
                    <br>Sample size : %{customdata[3]}"
customdata = np.stack(
    [
        correlations.columns.get_level_values(1),
        correlations.columns.get_level_values(0),
        correlations.loc["p_value"],
        correlations.loc["sample_size"]
    ],
    axis=-1,
)
fig["data"].append(
    Scatter(
        x=correlations.loc["correlation"],
        y=-np.log10(correlations.loc["p_value"]),
        mode="markers",
        name=value_category,
        hovertemplate=hovertemplate,
        customdata=customdata,
    )
)
num_tests = res.shape[0]
shapes = []
line = dict(color="Black", width=0.5)
fig["data"].append(
    Scatter(
        x=[
            res["corr_value"].min() - res["corr_value"].std(),
            res["corr_value"].max() + res["corr_value"].std(),
        ],
        y=[-np.log10((5 / 100)), -np.log10((5 / 100))],
        name="No Correction",
        mode="lines",
    )
)
fig["data"].append(
    Scatter(
        x=[
            res["corr_value"].min() - res["corr_value"].std(),
            res["corr_value"].max() + res["corr_value"].std(),
        ],
        y=[-np.log10((5 / 100) / num_tests), -np.log10((5 / 100) / num_tests)],
        name="With Bonferoni Correction",
        mode="lines",
    )
)

data = res.rename(
    columns=dict(
        zip(
            [
                "env_feature_name",
                "target_dataset_name",
                "Env_Dataset",
                "p_val",
                "corr_value",
                "size_na_dropped",
            ],
            ["Environmental Feature", "Organ", "X Dataset", "p_value", "Partial correlation", "Sample Size"],
        )
    )
).to_dict("records")

Unnamed: 0,env_dataset,organ_1,organ_2,corr,sample_size
0,Alcohol,*instances01,*instances01,1.000000,2
1,Alcohol,*instances01,*instances1.5x,1.000000,2
2,Alcohol,*instances01,*instances23,,1
3,Alcohol,*instances01,Abdomen,,1
4,Alcohol,*instances01,AbdomenLiver,1.000000,2
...,...,...,...,...,...
96916,All_Diseases,\*,MusculoskeletalKnees,-0.485714,6
96917,All_Diseases,\*,MusculoskeletalFullBody,0.190769,25
96918,All_Diseases,\*,MusculoskeletalScalars,0.686697,224
96919,All_Diseases,\*,PhysicalActivity,0.984152,235


In [17]:
string = ""
for d in ALL_CATEGORIES:
    string += f'"{d}" '
string

'"Alcohol" "AnthropometryBodySize" "AnthropometryImpedance" "ArterialStiffness" "BloodBiochemistry" "BloodCount" "BloodPressure" "BoneDensitometryOfHeel" "BrainGreyMatterVolumes" "BrainSubcorticalVolumes" "BraindMRIWeightedMeans" "Breathing" "CancerScreening" "CarotidUltrasound" "ChestPain" "Claudication" "CognitiveFluidIntelligence" "CognitiveMatrixPatternCompletion" "CognitiveNumericMemory" "CognitivePairedAssociativeLearning" "CognitivePairsMatching" "CognitiveProspectiveMemory" "CognitiveReactionTime" "CognitiveSymbolDigitSubstitution" "CognitiveTowerRearranging" "CognitiveTrailMaking" "Diet" "ECGAtRest" "EarlyLifeFactors" "Education" "ElectronicDevices" "Employment" "EyeAcuity" "EyeAutorefraction" "EyeIntraocularPressure" "Eyesight" "FamilyHistory" "GeneralHealth" "GeneralPain" "HandGripStrength" "Hearing" "HearingTest" "HeartPWA" "HeartSize" "Household" "Medication" "MentalHealth" "Mouth" "OtherSociodemographics" "PhysicalActivity" "SexualFactors" "Sleep" "Smoking" "SocialSupport