In [263]:
import pandas as pd
from dotmap import DotMap

from scipy import stats
import numpy as np

from plotly.subplots import make_subplots
import plotly.graph_objects as go

import plotly.express as px

import os

In [210]:
palette = px.colors.qualitative.Plotly

In [12]:
pev1_path = r"S:\U_Proteomica\UNIDAD\software\MacrosRafa\data\Metabolomics\PESA_Integromics\Alessia\1_DifferentialCorrelation\data\PESA_V1.xlsx"
pev2_path = r"S:\U_Proteomica\UNIDAD\software\MacrosRafa\data\Metabolomics\PESA_Integromics\Alessia\1_DifferentialCorrelation\data\PESA_V2.xlsx"
awhs_path = r"S:\U_Proteomica\UNIDAD\software\MacrosRafa\data\Metabolomics\PESA_Integromics\Alessia\1_DifferentialCorrelation\data\AWHS_2.xlsx"

In [175]:
pev1 = pd.read_excel(pev1_path, sheet_name='transpose2')
pev2 = pd.read_excel(pev2_path, sheet_name='Transpose2')
awh1 = pd.read_excel(awhs_path, sheet_name='Transpose_MS1')
awh2 = pd.read_excel(awhs_path, sheet_name='Transpose_MS2')

In [176]:
pev1['Caso/control'] = [0 if i=='C' else 1 for i in pev1.Group]
pev2['Caso/control'] = [0 if i=='C' else 1 for i in pev2.Group]

In [177]:
lipids = [
    # (pesa v1, pesa v2, awhs ms1, awhs ms2)
    ('PE 38:4_CN', 'PE 38:4_CN', 'PE [38:4]_N'),
    ('LPE 20:4_CN', 'LPE 20:4_CN', 'LPE [20:4]_N'),
    ('LPE 20:4_CP', 'LPE 20:4_CP', 'LPE [20:4]_P'),
    ('LPE 20:4_HP', 'LPE 20:4_HP'),
    ('PE 36:4 _CN', 'PE 36:4 _CN'),
    ('PE 36:4 _CP', 'PE 36:4 _CP'),
    ('LPC 18:2_HP', 'LPC 18:2_HP'),
    ('LPC 18:2_HN', ),
    ('PC 36:4_CP', 'PC 36:4_CP'),
    ('PC 38:4_HP', 'PC 38:4_HP'),
]

lipids = {
    'PE 38:4': [
        ('PE 38:4_CN', 'PE 38:4-PC 35:4_CP'), 
        ('PE 38:4_CN', 'PE 38:4-PC 35:4_CP'), 
        ('PE [38:4]_N', 'PE [38:4]_P'), 
        ('PE [18:0 / 20:4]_N', 'PE [38:4]_P')
    ],
    'LPE 20:4': [
        ('LPE 20:4_CN', 'LPE 20:4_CP', 'LPE 20:4_HP'),
        ('LPE 20:4_CN', 'LPE 20:4_CP', 'LPE 20:4_HP'),
        ('LPE [20:4]_N','LPE [20:4]_P'),
        ('LPE [20:4]_N','LPE [20:4]_P')
    ],
    'PE 36:4': [
        ('PE 36:4 _CN','PE 36:4 _CP'),
        ('PE 36:4 _CN','PE 36:4 _CP'),
        ('PE [36:4]_N',),
        ('PE [16:0 / 20:4]_N',)
    ],
    'LPC 18:2': [
        ('LPC 18:2_HP','LPC 18:2_HN'),
        ('LPC 18:2_HP',),
        ('LPC [18:2]_N', 'LPC [18:2]_P'),
        ('LPC [18:2]_N', 'LPC [18:2]_P')
    ],
    'PC 36:4': [
        ('PC 36:4_CP',), 
        ('PC 36:4_CP',), 
        ('PC [36:4]_P', 'PC [36:4]_N'),
        ('PC [36:4]_P', 'PC [16:0 / 20:4]_N', 'PC [18:3 / 18:1]_N', 'PC [18:2 / 18:2]_N')
    ],
    'PC 38:4': [
        ('PC 38:4_HP',), 
        ('PC 38:4_HP',),
        ('PC [38:4]_P', 'PC [38:4]_N'),
        ('PC [38:4]_P', 'PC [18:0 / 20:4]_N')
    ]

}

In [178]:
# Columns
cpt = 'Plaque_thickness'
ccs = 'Calcium_Score'
ccd = 'Caso/control'

In [202]:
res = {}

for c in [cpt, ccs]:
# c = cpt
    res[c] = {}

    for l in lipids:
    #l = 'PE 38:4'

        res[c][l] = {}

        for n, (df, g) in enumerate([(pev1, 'PESA_V1'), (pev2, 'PESA_V2'), (awh1, 'AWHS_MS1'), (awh2, 'AWHS_MS2')]):
            #n, df, g = 0, pev1, 'PESA_V1'
            df = df.loc[:, [ccd, c, *lipids[l][n]]].dropna()

            res[c][l][g] = {}
            res[c][l][g] = {
                i: {
                    name: {
                    'r': stats.pearsonr(df.loc[cbool, c],df.loc[cbool, i]).statistic,
                    'pv': stats.pearsonr(df.loc[cbool, c],df.loc[cbool, i]).pvalue
                    }

                    for cbool, name in [
                        (np.ones_like(df[ccd]==0), 'all'), 
                        (df[ccd]==0, 'C'), 
                        (df[ccd]==1, 'D')
                    ]
                    }
                for i in lipids[l][n]
            }



In [273]:
c = ccs

file = f'Plots/{c}.html'
if os.path.exists(file):
    os.remove(file)

groups = ['PESA_V1', 'PESA_V2', 'AWHS_MS1', 'AWHS_MS2']

for l in lipids:
    fig = make_subplots(rows=1, cols=4, subplot_titles=groups)
    for n,cohort in enumerate(res[c][l]):
        for feature in res[c][l][cohort]:        
            fig.add_trace(go.Scatter(
                x=[0,1], y=[res[c][l][cohort][feature]['C']['r'], res[c][l][cohort][feature]['D']['r']],
                text=['', feature], textposition='middle left',
                mode='lines+markers+text', name=feature, marker_color=palette[0], line_width=1, showlegend=False
            ),row=1, col=n+1)

    fig.update_xaxes(tickvals=[0,1], ticktext=['C','D'])
    fig.update_layout(title=f'Differential Correlation {c} | {l}')
    # fig.show()
    with open(file, 'a') as f:
        f.write(fig.to_html(full_html=False, include_plotlyjs='cdn', default_height='50%', default_width='90%'))

In [236]:
res

{'Plaque_thickness': {'PE 38:4': {'PESA_V1': {'PE 38:4_CN': {'all': {'r': 0.14842862743464977,
      'pv': 0.0035975064010808654},
     'C': {'r': -0.07182987264238049, 'pv': 0.32213299508509124},
     'D': {'r': 0.007521242455464419, 'pv': 0.9177528369411079}},
    'PE 38:4-PC 35:4_CP': {'all': {'r': 0.13086256234836696,
      'pv': 0.010356499767000862},
     'C': {'r': -0.005882084876542681, 'pv': 0.9354633984243086},
     'D': {'r': 0.05136068775476728, 'pv': 0.48042120816107914}}},
   'PESA_V2': {'PE 38:4_CN': {'all': {'r': 0.15629172329239913,
      'pv': 0.0009778517693797993},
     'C': {'r': -0.023831278250500944, 'pv': 0.7239957077881511},
     'D': {'r': 0.11449959346484803, 'pv': 0.09022633994622842}},
    'PE 38:4-PC 35:4_CP': {'all': {'r': 0.1815316912329072,
      'pv': 0.00012423795964864985},
     'C': {'r': -0.05732665048993623, 'pv': 0.39531104602028233},
     'D': {'r': 0.14620578568336395, 'pv': 0.030166963907676723}}},
   'AWHS_MS1': {'PE [38:4]_N': {'all': {'r': 

In [86]:
df[cpt]

0          0
1          0
2          0
3          0
4          0
       ...  
439     17.4
440    18.75
441    15.73
442    19.12
443    13.35
Name: Plaque_thickness, Length: 444, dtype: object

In [66]:
awh2.columns
# pev2.columns

Index(['Patient', 'data', 'Unnamed: 2', 'SEQN', 'Batch', 'GlobalOrder',
       'Caso/control', 'Cohorte', 'Codigo_externo', 'Plaque thickness',
       'Calcio Score', 'N_TERRITORIES', 'Glucosa', 'LPA', 'Colesterol',
       'FUMADOR_RF', 'TABAQ', 'age', 'sexo', 'HDL', 'SystolicBloodPressure',
       'DiastolicBloodPressure', 'smoker', 'MED_DIABETES', 'MED_DISLIPEMIA',
       'MED_HIPERTENSION', 'MED_OTRAS', 'diabetes', 'DIABETES_RF',
       'HIPERTENSION_RF', 'DISLIPEMIA_RF', 'framingham', 'PESA Score',
       'PESA Score_Num', 'regicor', 'IGHA2', 'APOA', 'HPT', 'Polarity',
       'PE [18:0 / 20:4]_N', 'LPE [20:4]_N', 'PE [16:0 / 20:4]_N',
       'PE [38:4]_P', 'PC [15:0 / 20:4]_N', 'LPE [20:4]_P', 'PC [36:4]_P',
       'PC [16:0 / 20:4]_N', 'PC [18:3 / 18:1]_N', 'PC [18:2 / 18:2]_N',
       'PE [38:6]_P', 'PE [36:2]_P', 'PE [40:6]_P', 'PC [38:4]_P',
       'PC [18:0 / 20:4]_N', 'LPC [18:2]_N', 'LPC [18:2]_P'],
      dtype='object')