In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import numpy as np
from scipy import stats
import seaborn as sns
import plotly.express as px
import statsmodels.formula.api as smf
import plotly.graph_objects as go
from scripts.python.routines.manifest import get_manifest
from scripts.python.routines.plot.save import save_figure
from scripts.python.routines.plot.layout import add_layout
from statsmodels.stats.multitest import multipletests
import plotly.io as pio
pio.kaleido.scope.mathjax = None
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=False)
from scipy.stats import mannwhitneyu, median_test, kruskal, wilcoxon, friedmanchisquare
import matplotlib.pyplot as plt
import matplotlib
import matplotlib.patheffects as path_effects
import random
import pathlib
from tqdm import tqdm
from src.utils.plot.bioinfokit import mhat, volcano
import gseapy as gp
import mygene
from sklearn.decomposition import PCA, IncrementalPCA, KernelPCA, TruncatedSVD
from sklearn.decomposition import MiniBatchDictionaryLearning, FastICA
from sklearn.random_projection import GaussianRandomProjection, SparseRandomProjection
from sklearn.manifold import MDS, Isomap, TSNE, LocallyLinearEmbedding
import upsetplot
from matplotlib_venn import venn2, venn2_circles
from itertools import chain
from sklearn.metrics import mean_absolute_error
from scripts.python.routines.plot.colorscales import get_continuous_color
import plotly
from scripts.python.routines.plot.p_value import add_p_value_annotation
from scripts.python.routines.sections import get_sections
from statannotations.Annotator import Annotator
import functools
import matplotlib.lines as mlines
import patchworklib as pw


def conjunction(conditions):
    return functools.reduce(np.logical_and, conditions)


def disjunction(conditions):
    return functools.reduce(np.logical_or, conditions)

# Init data

In [None]:
path = f"D:/YandexDisk/Work/pydnameth/datasets/GPL21145/GSEUNN"

age_types = {
    "DNAmAgeHannum_harm": "Hannum",
    "DNAmAge_harm": "Horvath",
    "DNAmPhenoAge_harm": "PhenoAge",
    "DNAmGrimAge_harm": "GrimAge",
    "mPACE": "DunedinPACE"
}
for x in ["PCHorvath1", "PCHorvath2", "PCHannum", "PCPhenoAge", "PCGrimAge"]:
    age_types[x] = x
ages = list(age_types.values())

df_epi = pd.read_excel(f"{path}/pheno.xlsx", index_col=0)
df_epi.rename(columns={'TR_status': 'Transplantation'}, inplace=True)
df_epi['Transplantation'].replace({'before': 'Before', '1y': 'After 1 Year'}, inplace=True)
colors = {'Before': 'crimson', 'After 1 Year': 'dodgerblue'}
df_epi = df_epi.loc[df_epi['Transplantation'].isin(list(colors.keys())), list(age_types.keys()) + ['Age', 'Transplantation', 'Subject_ID']]
df_epi.rename(columns=age_types, inplace=True)
ages_acc = ['DunedinPACE']
for x in ages:
    if x != 'DunedinPACE':
        df_epi[f"{x}Acc"] = df_epi[x] - df_epi['Age']
        ages_acc.append(f"{x}Acc")
    
df_imm = pd.read_excel(f"{path}/data/immuno/df_samples(all_1052_121222)_proc(raw)_imp(fast_knn)_replace(quarter).xlsx", index_col=0)
df_imm = df_imm.loc[(df_imm['Donor/Recipient'].notnull()) & (df_imm['Transplantation Time'].isin(['Before', 'After', 'After 1 Year'])), :]

path_save = f"{path}/special/055_kidney_transplant"
for p in [f"{path_save}/epi", f"{path_save}/imm"]:
    pathlib.Path(p).mkdir(parents=True, exist_ok=True)

# Epi data

In [None]:
fig = plt.figure(figsize=(3, 0.5))
sns.set_theme(style='whitegrid', font_scale=1)
countplot = sns.countplot(
    data=df_epi,
    y='Transplantation',
    edgecolor='black',
    palette=colors,
    orient='v',
    order=list(colors.keys())
)
countplot.bar_label(countplot.containers[0])
countplot.set_xlabel("Count")
countplot.set_ylabel("")
countplot.set_title(f"")
plt.savefig(f"{path_save}/epi/countplot.png", bbox_inches='tight', dpi=400)
plt.savefig(f"{path_save}/epi/countplot.pdf", bbox_inches='tight')
plt.close(fig)

In [None]:
df_stat = pd.DataFrame(index=ages_acc)
for feat in ages_acc:
    df_pivot = df_epi.pivot(index='Subject_ID', columns='Transplantation', values=feat)
    res = wilcoxon(
        x=df_pivot.loc[:, 'Before'].values,
        y=df_pivot.loc[:, 'After 1 Year'].values,
        alternative='two-sided'
    )
    df_stat.at[feat, "pval"] = res.pvalue
_, df_stat.loc[ages_acc, "pval_fdr_bh"], _, _ = multipletests(df_stat.loc[ages_acc, "pval"], 0.05, method='fdr_bh')
df_stat.sort_values([f"pval_fdr_bh"], ascending=[True], inplace=True)
df_stat.to_excel(f"{path_save}/epi/stat.xlsx", index_label='Features')

axs = {}
pw_rows = []
n_cols = 5
n_rows = int(np.ceil(len(ages_acc) / n_cols))
for r_id in range(n_rows):
    pw_cols = []
    for c_id in range(n_cols):
        rc_id = r_id * n_cols + c_id
        if rc_id < len(ages_acc):
            feat = ages_acc[rc_id]
            axs[feat] = pw.Brick(figsize=(2, 2))
            sns.set_theme(style='whitegrid')
            pointplot = sns.pointplot(
                data=df_epi,
                x='Transplantation',
                y=feat,
                hue='Subject_ID',
                legend=False,
                ax=axs[feat]
            )
            axs[feat].set_xlabel("")
            #axs[feat].legend([],[], frameon=False)
            axs[feat].legend(loc='center left', bbox_to_anchor=(1.01, 0.5), ncol=1)
            axs[feat].set_title(f"p-value: {df_stat.at[feat, 'pval_fdr_bh']:.2e}")
            pw_cols.append(axs[feat])
        else:
            empty_fig = pw.Brick(figsize=(2, 2))
            empty_fig.axis('off')
            pw_cols.append(empty_fig)
    pw_rows.append(pw.stack(pw_cols, operator="|"))
pw_fig = pw.stack(pw_rows, operator="/")
pw_fig.savefig(f"{path_save}/epi/feats.pdf")
pw_fig.savefig(f"{path_save}/epi/feats.png")
pw.clear()

# Imm data

In [None]:
dfs_imm = {
    'Donors': df_imm.loc[df_imm['Donor/Recipient'] == 'Donor', :],
    'Recipients': df_imm.loc[df_imm['Donor/Recipient'] == 'Recipient', :],
}

for patients, df_patients in dfs_imm.items():
    df_0_1_2 = df_patients.loc[df_patients['Transplantation Time'].isin(['Before', 'After', 'After 1 Year'])].pivot(index='Subject ID', columns='Transplantation Time', values='SImAge acceleration')
    df_0_1_2 = df_0_1_2[~df_0_1_2.isnull().any(axis=1)]
    df_0_1 = df_patients.loc[df_patients['Transplantation Time'].isin(['Before', 'After', ])].pivot(index='Subject ID', columns='Transplantation Time', values='SImAge acceleration')
    df_0_1 = df_0_1[~df_0_1.isnull().any(axis=1)]
    df_0_2 = df_patients.loc[df_patients['Transplantation Time'].isin(['Before', 'After 1 Year'])].pivot(index='Subject ID', columns='Transplantation Time', values='SImAge acceleration')
    df_0_2 = df_0_2[~df_0_2.isnull().any(axis=1)]
    
    times = {
        '0-1-2': ['Before', 'After', 'After 1 Year'],
        '0-1': ['Before', 'After'],
        '0-2': ['Before', 'After 1 Year']
    }
    dfs_times = {
        '0-1-2': df_0_1_2,
        '0-1': df_0_1,
        '0-2': df_0_2
    }
    
    for group in times:
        if group == '0-1':
            res = wilcoxon(
                x=dfs_times[group].loc[:, 'Before'].values,
                y=dfs_times[group].loc[:, 'After'].values,
                alternative='two-sided'
            )
        elif group == '0-2':
            res = wilcoxon(
                x=dfs_times[group].loc[:, 'Before'].values,
                y=dfs_times[group].loc[:, 'After 1 Year'].values,
                alternative='two-sided'
            )
        else:
            res = friedmanchisquare(
                dfs_times[group].loc[:, 'Before'].values,
                dfs_times[group].loc[:, 'After'].values,
                dfs_times[group].loc[:, 'After 1 Year'].values,
            )
        df_melt = dfs_times[group].melt(
            var_name='Transplantation Time',
            value_name='SImAge acceleration',
            ignore_index=False
        )
        df_melt['Subject ID'] = df_melt.index
        
        fig = plt.figure(figsize=(6, 4))
        sns.set_theme(style='whitegrid')
        pointplot = sns.pointplot(
            data=df_melt,
            order=times[group],
            x='Transplantation Time',
            y='SImAge acceleration',
            hue='Subject ID',
            legend=False,
        )
        pointplot.set_xlabel("")
        pointplot.legend(loc='center left', bbox_to_anchor=(1.01, 0.5), ncol=1)
        pointplot.set_title(f"p-value: {res.pvalue:.2e}")
        plt.savefig(f"{path_save}/imm/{patients}_{group}_SImAgeAcc.png", bbox_inches='tight', dpi=300)
        plt.savefig(f"{path_save}/imm/{patients}_{group}_SImAgeAcc.pdf", bbox_inches='tight')
        plt.close(fig)

# Update pheno.xlsx

In [None]:
path = f"D:/YandexDisk/Work/pydnameth/datasets/GPL21145/GSEUNN"
df_epi = pd.read_excel(f"{path}/pheno.xlsx", index_col=0)
df_metrics = pd.read_excel(f"{path}/data/many_metrics.xlsx", index_col=0)

In [None]:
ids_dial = df_metrics.index[df_metrics['Dialysis (months)'].notnull()].values
df_epi.loc[ids_dial, 'Dialysis (months)'] = df_metrics.loc[ids_dial, 'Dialysis (months)']
df_epi.to_excel(f"{path}/pheno111.xlsx", index_label='index')

# Controls and ESRDs with dialysis: DNAm

In [None]:
path = f"D:/YandexDisk/Work/pydnameth/datasets/GPL21145/GSEUNN"
df_epi = pd.read_excel(f"{path}/pheno.xlsx", index_col=0)
df_epi.drop(["I64_old", "I1_duplicate"], inplace=True)
df_long = df_epi['Subject_ID'].value_counts().to_frame()
df_long = df_long[df_long['Subject_ID'] > 1]
ids_long = df_long.index.values

In [None]:
ids_long_dial = list(set.intersection(set(ids_long), df_epi.index[df_epi['Dialysis (months)'].notnull()].values))
ids_long_ctrl = list(set.intersection(set(ids_long), df_epi.index[df_epi['Status'] == "Control"].values))

experiments = {
    'esrd_dial': df_epi.loc[df_epi['Subject_ID'].isin(ids_long_dial), :].copy(),
    'controls': df_epi.loc[df_epi['Subject_ID'].isin(ids_long_ctrl), :].copy()
}

In [None]:
colors = {'T0': 'crimson', 'T1': 'dodgerblue', 'T2': 'lawngreen', 'T3': 'orange', 'T4': 'purple'}
age_types = {
    "DNAmAgeHannum_harm": "Hannum",
    "DNAmAge_harm": "Horvath",
    "DNAmPhenoAge_harm": "PhenoAge",
    "DNAmGrimAge_harm": "GrimAge",
    "mPACE": "DunedinPACE"
}
for x in ["PCHorvath1", "PCHorvath2", "PCHannum", "PCPhenoAge", "PCGrimAge"]:
    age_types[x] = x
ages = list(age_types.values())
path_save = f"{path}/special/055_kidney_transplant"
for p in [f"{path_save}/esrd_dial", f"{path_save}/controls"]:
    pathlib.Path(p).mkdir(parents=True, exist_ok=True)

In [None]:
for expt, df_expt in experiments.items():

    df_expt.rename(columns={'Sample_Chronology': 'Time'}, inplace=True)
    df_expt['Time'].replace({0: 'T0', 1: 'T1', 2: 'T2', 3: 'T3'}, inplace=True)
    df_expt = df_expt[df_expt['Time'].isin(['T0', 'T1'])]
    df_expt = df_expt.loc[:, list(age_types.keys()) + ['Age', 'Time', 'Subject_ID']]
    df_expt.rename(columns=age_types, inplace=True)
    ages_acc = ['DunedinPACE']
    
    for x in ages:
        if x != 'DunedinPACE':
            df_expt[f"{x}Acc"] = df_expt[x] - df_expt['Age']
            ages_acc.append(f"{x}Acc")
            
    df_stat = pd.DataFrame(index=ages_acc)
    for feat in ages_acc:
        df_pivot = df_expt.pivot(index='Subject_ID', columns='Time', values=feat)
        res = wilcoxon(
            x=df_pivot.loc[:, 'T0'].values,
            y=df_pivot.loc[:, 'T1'].values,
            alternative='two-sided'
        )
        df_stat.at[feat, "pval"] = res.pvalue
    _, df_stat.loc[ages_acc, "pval_fdr_bh"], _, _ = multipletests(df_stat.loc[ages_acc, "pval"], 0.05, method='fdr_bh')
    df_stat.sort_values([f"pval_fdr_bh"], ascending=[True], inplace=True)
    df_stat.to_excel(f"{path_save}/{expt}/stat.xlsx", index_label='Features')
    
    axs = {}
    pw_rows = []
    n_cols = 2
    n_rows = int(np.ceil(len(ages_acc) / n_cols))
    for r_id in range(n_rows):
        pw_cols = []
        for c_id in range(n_cols):
            rc_id = r_id * n_cols + c_id
            if rc_id < len(ages_acc):
                feat = ages_acc[rc_id]
                axs[feat] = pw.Brick(figsize=(2, 2))
                sns.set_theme(style='whitegrid')
                pointplot = sns.pointplot(
                    data=df_expt,
                    x='Time',
                    y=feat,
                    hue='Subject_ID',
                    legend=False,
                    ax=axs[feat]
                )
                axs[feat].set_xlabel("")
                #axs[feat].legend([],[], frameon=False)
                axs[feat].legend(loc='center left', bbox_to_anchor=(1.01, 0.5), ncol=4)
                axs[feat].set_title(f"p-value: {df_stat.at[feat, 'pval_fdr_bh']:.2e}")
                pw_cols.append(axs[feat])
            else:
                empty_fig = pw.Brick(figsize=(2, 2))
                empty_fig.axis('off')
                pw_cols.append(empty_fig)
        pw_rows.append(pw.stack(pw_cols, operator="|"))
    pw_fig = pw.stack(pw_rows, operator="/")
    pw_fig.savefig(f"{path_save}/{expt}/feats.pdf")
    pw_fig.savefig(f"{path_save}/{expt}/feats.png")
    pw.clear()      

# Update immunodata

In [None]:
path = f"D:/YandexDisk/Work/pydnameth/datasets/GPL21145/GSEUNN"
df_imm = pd.read_excel(f"{path}/data/immuno/df_samples(all_1052_121222)_proc(raw)_imp(fast_knn)_replace(quarter).xlsx", index_col=0)
df_epi = pd.read_excel(f"{path}/pheno.xlsx", index_col=0)
df_epi.drop(["I64_old", "I1_duplicate"], inplace=True)
ids_cmn = df_imm.index.intersection(df_epi.index).values

In [None]:
df_imm.loc[ids_cmn, 'Dialysis (months)'] = df_epi.loc[ids_cmn, 'Dialysis (months)']
df_imm.loc[ids_cmn, 'Subject ID'] = df_epi.loc[ids_cmn, 'Subject_ID']
df_imm['Sample_Chronology'] = -1
df_imm.loc[ids_cmn, 'Sample_Chronology'] = df_epi.loc[ids_cmn, 'Sample_Chronology']

In [None]:
df_long = df_imm['Subject ID'].value_counts().to_frame()

In [None]:
df_imm['Is longitudinal?'] = False
df_imm.loc[df_imm['Subject ID'].isin(df_long.index[df_long['Subject ID'] > 1].values), 'Is longitudinal?'] = True

In [None]:
df_imm.to_excel(f"{path}/data/immuno/11111111.xlsx", index_label='index')

# Controls and ESRDs with dialysis: Immuno

In [None]:
df_imm = pd.read_excel(f"{path}/data/immuno/df_samples(all_1052_121222)_proc(raw)_imp(fast_knn)_replace(quarter).xlsx", index_col=0)
colors = {'T0': 'crimson', 'T1': 'dodgerblue', 'T2': 'lawngreen', 'T3': 'orange', 'T4': 'purple'}

df_long = df_imm['Subject ID'].value_counts().to_frame()
df_long = df_long[df_long['Subject ID'] > 1]
ids_long = df_long.index.values
ids_long_dial = list(set.intersection(set(ids_long), df_imm.index[df_imm['Dialysis (months)'].notnull()].values))
ids_long_ctrl = list(set.intersection(set(ids_long), df_imm.index[df_imm['Status'] == "Control"].values))

In [None]:
experiments = {
    'esrd_dial': df_imm.loc[df_imm['Subject ID'].isin(ids_long_dial), :].copy(),
    'controls': df_imm.loc[df_imm['Subject ID'].isin(ids_long_ctrl), :].copy()
}

In [None]:
for expt, df_expt in experiments.items():

    df_expt.rename(columns={'Sample_Chronology': 'Time'}, inplace=True)
    df_expt['Time'].replace({0: 'T0', 1: 'T1', 2: 'T2', 3: 'T3'}, inplace=True)
    df_expt = df_expt[df_expt['Time'].isin(['T0', 'T1'])]
    print(df_expt.shape)
    df_pivot = df_expt.pivot(index='Subject ID', columns='Time', values='SImAge acceleration')


    res = wilcoxon(
        x=df_pivot.loc[:, 'T0'].values,
        y=df_pivot.loc[:, 'T1'].values,
        alternative='two-sided'
    )
    
    fig = plt.figure(figsize=(6, 4))
    sns.set_theme(style='whitegrid')
    pointplot = sns.pointplot(
        data=df_expt,
        order=['T0', 'T1'],
        x='Time',
        y='SImAge acceleration',
        hue='Subject ID',
        legend=False,
    )
    pointplot.set_xlabel("")
    pointplot.legend(loc='center left', bbox_to_anchor=(1.01, 0.5), ncol=2)
    pointplot.set_title(f"p-value: {res.pvalue:.2e}")
    pathlib.Path(f"{path_save}/imm/{expt}").mkdir(parents=True, exist_ok=True)
    plt.savefig(f"{path_save}/imm/{expt}/SImAgeAcc.png", bbox_inches='tight', dpi=300)
    plt.savefig(f"{path_save}/imm/{expt}/SImAgeAcc.pdf", bbox_inches='tight')
    plt.close(fig)