# Debugging autoreload

In [None]:
%load_ext autoreload
%autoreload 2

# Load packages

In [None]:
from pytorch_tabular.utils import load_covertype_dataset
from rich.pretty import pprint
from plotly.subplots import make_subplots
from pytorch_tabular import TabularModel
import plotly.express as px
import torch
import random
import plotly.graph_objects as go
from scipy import stats
import shap
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.impute import KNNImputer
from glob import glob
import ast
import matplotlib.pyplot as plt
import seaborn as sns
import copy
import itertools
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from pytorch_tabular import model_sweep
from src.pt.model_sweep import model_sweep_custom
import warnings
from src.utils.configs import read_parse_config
from src.pt.hyper_opt import train_hyper_opt
from src.utils.hash import dict_hash
import pathlib
from tqdm import tqdm
import distinctipy
import matplotlib.patheffects as pe
import matplotlib.colors as mcolors
from statannotations.Annotator import Annotator
from scipy.stats import mannwhitneyu
from plottable import ColumnDefinition, Table
from scipy.stats import chi2_contingency
from plottable.plots import bar
from plottable.cmap import normed_cmap, centered_cmap
import optuna
from matplotlib.colors import LinearSegmentedColormap, ListedColormap
import matplotlib.cm
import matplotlib as mpl
from statsmodels.stats.multitest import multipletests
import re
import datetime
from collections import Counter
from matplotlib.ticker import MaxNLocator
from itertools import chain
from sklearn.metrics import mean_absolute_error
from scipy.stats import mannwhitneyu, variation, levene, zscore
import pyaging as pya
import matplotlib.lines as mlines
import statsmodels.formula.api as smf
from itertools import chain
from functools import reduce
import upsetplot
from src.plot.plotly_layout import add_layout
from pathlib import Path
import re
from openai import OpenAI
from matplotlib_venn import venn3, venn3_circles
import functools
from adjustText import adjust_text
import sys
from matplotlib.patches import Rectangle
from sklearn.decomposition import PCA
from sklearn.manifold import MDS, Isomap, TSNE
from sklearn.cluster import DBSCAN, HDBSCAN


def conjunction(conditions):
    return functools.reduce(np.logical_and, conditions)


def disjunction(conditions):
    return functools.reduce(np.logical_or, conditions)

# Figure 1

## Histograms

In [None]:
path = 'E:/YandexDisk/Work/pydnameth/draft/13_fmba_cvd_dnam'

df_our = pd.read_excel(f"{path}/data/120_1/pheno_funnorm.xlsx", index_col=0)
colors_our = {
    'Control': 'chartreuse',
    'Case': 'red'
}

df_gse = pd.read_excel(f"{path}/data/GSE220622/gen/pheno_funnorm.xlsx", index_col=0)
colors_gse = {
    'Control': 'forestgreen',
    'Case': 'darkred'
}

datasets = {}
datasets['GSENEW'] = {
    'data': df_our,
    'colors': colors_our
}
datasets['GSE220622'] = {
    'data': df_gse,
    'colors': colors_gse
}

for ds_name, ds_dict in datasets.items():
    ds_data = ds_dict['data']
    hue_counts = ds_data['Status'].value_counts()
    hue_colors = ds_dict['colors']
    hue_replace = {x: f"{x} ({y})" for x, y in hue_counts.items()}
    hue_colors = {f"{x} ({y})": hue_colors[x] for x, y in hue_counts.items()}
    hue_order = [hue_replace[x] for x in ['Case', 'Control']]
    ds_data['Status'].replace(hue_replace, inplace=True)

    hist_bins = np.linspace(5, 115, 23)

    sns.set_theme(style='ticks')
    fig, ax = plt.subplots(figsize=(6, 3.5))
    histplot = sns.histplot(
        data=ds_data,
        bins=hist_bins,
        edgecolor='k',
        linewidth=1,
        x="Age",
        hue='Status',
        hue_order=hue_order,
        palette=hue_colors,
        ax=ax
    )
    histplot.set(xlim=(15, 80))
    # histplot.set_title(ds_name)
    plt.savefig(f"{path}/figures/1/age_hist_{ds_name}.png", bbox_inches='tight', dpi=200)
    plt.savefig(f"{path}/figures/1/age_hist_{ds_name}.pdf", bbox_inches='tight')
    plt.close(fig)

## Table for GSENEW

In [None]:
path = 'E:/YandexDisk/Work/pydnameth/draft/13_fmba_cvd_dnam'

df_our = pd.read_excel(f"{path}/data/120_1/pheno_funnorm.xlsx", index_col=0)

special_diseases = [
    "Терапевт, I10",        # гипертония
    "Терапевт, I10.0",      # гипертония
    "Терапевт, I11",        # гипертония
    "Терапевт, I11.0",      # гипертония
    "Терапевт, I11.9",      # гипертония
    "Терапевт, I20",        # ИБС
    "Терапевт, I25.0",      # ИБС
    "Терапевт, I25",        # ИБС
    "Терапевт, I42",        # Cardiomyopathy
    "Терапевт, I42.0",      # Cardiomyopathy
    "Терапевт, I49",        # Other cardiac arrhythmias
    "Терапевт, E78",        # холестерин (гиперлипидемия, гиперхолестеринемия)
    
    "Терапевт, E78.5",
    "Терапевт, E78.9",

    "Терапевт, E66",        # ожирение
    "Терапевт, E66.0",      # ожирение
]
special_diseases = list(set.intersection(set(special_diseases), set(df_our.columns)))

print(special_diseases)

df_our['Target Column'] = np.where(disjunction([df_our[m] == 'Yes' for m in special_diseases]), 1, 0)

count_special = {
    'I10': df_our[(df_our['Терапевт, I10'] == 'Yes') | (df_our['Терапевт, I10.0'] == 'Yes')].shape[0],
    'I11': df_our[(df_our['Терапевт, I11'] == 'Yes') | (df_our['Терапевт, I11.0'] == 'Yes') | (df_our['Терапевт, I11.9'] == 'Yes')].shape[0],
    'I20': df_our[df_our['Терапевт, I20'] == 'Yes'].shape[0],
    'I25': df_our[(df_our['Терапевт, I25'] == 'Yes') | (df_our['Терапевт, I25.0'] == 'Yes')].shape[0],
    'E66': df_our[(df_our['Терапевт, E66'] == 'Yes') | (df_our['Терапевт, E66.0'] == 'Yes')].shape[0],
    'E78': df_our[df_our['Терапевт, E78'] == 'Yes'].shape[0],
}
print(count_special)

## Chronology of clocks

In [None]:
path = 'E:/YandexDisk/Work/pydnameth/draft/13_fmba_cvd_dnam'

df_clocks_info = pd.read_excel("E:/YandexDisk/Work/pydnameth/datasets/pyaging/clocks_meta_upd.xlsx", index_col='Clock Name')
df_clocks = pd.read_excel(f"{path}/data/120_1/ages_corrected.xlsx", index_col=0)
df_clocks_info = df_clocks_info.loc[df_clocks.index.intersection(df_clocks_info.index), :].sort_values(["Chrolology"], ascending=[True])
df_clocks_info.to_excel(f"{path}/figures/1/ages.xlsx")

# Figure 2

## Aging clocks

In [None]:
path = 'E:/YandexDisk/Work/pydnameth/draft/13_fmba_cvd_dnam'

df_pheno = pd.read_excel(f"{path}/data/120_1/ages_data_corrected.xlsx", index_col=0)
df_ages = pd.read_excel(f"{path}/data/120_1/ages_corrected.xlsx", index_col=0)
df_ages['Features'] = df_ages.index
df_ages['Mann-Whitney'] = -np.log10(df_ages['mannwhitneyu_pval_fdr_bh'].values)
df_ages['ANCOVA'] = -np.log10(df_ages['ancova_Special_Status[T.Control]_pval_fdr_bh'].values)
df_ages.sort_values(["Mann-Whitney"], ascending=[False], inplace=True)

colors_tests = {
    'Mann-Whitney': 'fuchsia',
    'ANCOVA': 'cyan'
}

df_fig = df_ages.copy()
df_fig = df_fig.melt(id_vars='Features', value_vars=['Mann-Whitney', 'ANCOVA'], var_name='Test', value_name=r"$-\log_{10}(\mathrm{p-value})$")
sns.set_theme(style='ticks')
fig, ax = plt.subplots(figsize=(3, df_fig.shape[0] * 0.15))
barplot = sns.barplot(
    data=df_fig,
    y='Features',
    x=r"$-\log_{10}(\mathrm{p-value})$",
    edgecolor='black',
    palette=colors_tests,
    hue='Test',
    hue_order=['Mann-Whitney', 'ANCOVA'],
    ax=ax,
)
# ax.xaxis.tick_top()
# ax.xaxis.set_label_position('top')
ax.set_ylabel('')
ax.axvline(-np.log10(0.05), color="red", linestyle="dotted", linewidth=2.0)
plt.savefig(f"{path}/figures/2/ages_barplot.png", bbox_inches='tight', dpi=200)
plt.savefig(f"{path}/figures/2/ages_barplot.pdf", bbox_inches='tight')
plt.close(fig)

ages_target = df_ages.loc[(df_ages['mannwhitneyu_pval_fdr_bh'] < 0.05) | (df_ages['mannwhitneyu_pval_fdr_bh'] < 0.05), 'Features']

In [None]:
colors_our = {
    'Control': 'chartreuse',
    'Case': 'red'
}

sns.set_theme(style='ticks')
fig = plt.figure(
    figsize=(5, 12),
    layout="constrained"
)
ncols = 1
nrows = 4
subfigs = fig.subfigures(
    ncols=ncols,
    nrows=nrows,
    wspace=0.05,
    hspace=0.05,
)
for epiage_id, epiage in enumerate(ages_target.index.values):
    row_id, col_id = divmod(epiage_id, 2)
    
    if ncols == 1 or nrows == 1:
        axs = subfigs[epiage_id].subplot_mosaic(
        [
            ['21', '22'],
        ],
        # height_ratios=[1, 4],
        width_ratios=[3, 1.5],
        gridspec_kw={
            #"bottom": 0.14,
            #"top": 0.95,
            # "left": 0.1,
            # "right": 0.5,
            #"wspace": 0.33,
            #"hspace": 0.01,
        },
    )
    else:
        axs = subfigs[row_id, col_id].subplot_mosaic(
        [
            ['21', '22'],
        ],
        # height_ratios=[1, 4],
        width_ratios=[3, 1.5],
        gridspec_kw={
            #"bottom": 0.14,
            #"top": 0.95,
            # "left": 0.1,
            # "right": 0.5,
            #"wspace": 0.33,
            #"hspace": 0.01,
        },
    )


    
    xy_min = df_pheno[['Age', epiage]].min().min()
    xy_max = df_pheno[['Age', epiage]].max().max()
    xy_ptp = xy_max - xy_min
    bisect = sns.lineplot(
        x=[xy_min - 0.1 * xy_ptp, xy_max + 0.1 * xy_ptp],
        y=[xy_min - 0.1 * xy_ptp, xy_max + 0.1 * xy_ptp],
        linestyle='--',
        color='black',
        linewidth=1.0,
        ax=axs['21']
    )
    regplot = sns.regplot(
        data=df_pheno.loc[df_pheno['Status'] == 'Control', :],
        x='Age',
        y=epiage,
        color=colors_our['Control'],
        scatter=False,
        truncate=False,
        ax=axs['21']
    )
    scatter = sns.scatterplot(
        data=df_pheno,
        x='Age',
        y=epiage,
        hue='Status',
        palette=colors_our,
        linewidth=0.5,
        alpha=0.75,
        edgecolor="k",
        s=35,
        hue_order=list(colors_our.keys()),
        legend=True,
        ax=axs['21'],
    )
    axs['21'].set_xlim(xy_min - 0.1 * xy_ptp, xy_max + 0.1 * xy_ptp)
    axs['21'].set_ylim(xy_min - 0.1 * xy_ptp, xy_max + 0.1 * xy_ptp)
    
    sns.violinplot(
        data=df_pheno,
        x='Status',
        y=f"{epiage} acceleration",
        hue='Status',
        palette=colors_our,
        density_norm='width',
        order=['Control', 'Case'],
        saturation=0.75,
        linewidth=1.0,
        ax=axs['22'],
        legend=False,
        cut=0,
    )
    axs['22'].set_ylabel(f"{epiage} acceleration")

fig.savefig(f"{path}/figures/2/ages_distribution.png", bbox_inches='tight', dpi=200)
fig.savefig(f"{path}/figures/2/ages_distribution.pdf", bbox_inches='tight')

plt.close(fig)

## Aging metrics

In [None]:
path = 'E:/YandexDisk/Work/pydnameth/draft/13_fmba_cvd_dnam'

df_pheno = pd.read_excel(f"{path}/data/120_1/pheno_funnorm.xlsx", index_col=0)
df_pheno.rename(
    columns={
        'stemtoc': 'stemTOC',
        'epitoc1': 'epiTOC1',
        'dnamtl': 'DNAmTL',
        'dunedinpace': 'DunedinPACE',
        'pcdnamtl': 'PCDNAmTL',
        'zhangmortality': 'ZhangMortality',
    },
    inplace=True
)
df_metrics = pd.read_excel(f"{path}/data/120_1/metrics.xlsx", index_col=0)
df_metrics['Features'] = df_metrics.index
df_metrics['Mann-Whitney'] = -np.log10(df_metrics['mannwhitneyu_pval_fdr_bh'].values)
df_metrics['ANCOVA'] = -np.log10(df_metrics['ancova_Special_Status[T.Control]_pval_fdr_bh'].values)
df_metrics.sort_values(["Mann-Whitney"], ascending=[False], inplace=True)

colors_tests = {
    'Mann-Whitney': 'fuchsia',
    'ANCOVA': 'cyan'
}

df_fig = df_metrics.copy()
df_fig = df_fig.melt(id_vars='Features', value_vars=['Mann-Whitney', 'ANCOVA'], var_name='Test', value_name=r"$-\log_{10}(\mathrm{p-value})$")
sns.set_theme(style='ticks')
fig, ax = plt.subplots(figsize=(3, df_fig.shape[0] * 0.15))
barplot = sns.barplot(
    data=df_fig,
    y='Features',
    x=r"$-\log_{10}(\mathrm{p-value})$",
    edgecolor='black',
    palette=colors_tests,
    hue='Test',
    hue_order=['Mann-Whitney', 'ANCOVA'],
    ax=ax,
)
# ax.xaxis.tick_top()
# ax.xaxis.set_label_position('top')
ax.set_ylabel('')
ax.axvline(-np.log10(0.05), color="red", linestyle="dotted", linewidth=2.0)
#sns.move_legend(ax, "upper center", bbox_to_anchor=(1, 1))
sns.move_legend(ax, "lower center", bbox_to_anchor=(.4, 1), ncol=2, frameon=False)
plt.savefig(f"{path}/figures/2/metrics_barplot.png", bbox_inches='tight', dpi=200)
plt.savefig(f"{path}/figures/2/metrics_barplot.pdf", bbox_inches='tight')
plt.close(fig)

metrics_target = df_metrics.loc[(df_metrics['mannwhitneyu_pval_fdr_bh'] < 0.05) | (df_metrics['mannwhitneyu_pval_fdr_bh'] < 0.05), 'Features']

In [None]:
colors_our = {
    'Control': 'chartreuse',
    'Case': 'red'
}
    
n_rows = 3
n_cols = 2
fig_width = 4
fig_height = 7.5

sns.set_theme(style='ticks')
fig, axs = plt.subplots(n_rows, n_cols, figsize=(fig_width, fig_height), gridspec_kw={'wspace':0.05, 'hspace': 0.05}, layout='constrained')
for epi_metric_id, epi_metric in enumerate(metrics_target):
    row_id, col_id = divmod(epi_metric_id, n_cols)
    
    ql = df_pheno[epi_metric].quantile(0.01)
    qh = df_pheno[epi_metric].quantile(0.99)
    
    sns.violinplot(
        # data=df_pheno.loc[(df_pheno[epi_metric] > ql) & (df_pheno[epi_metric] < qh), :],
        data=df_pheno,
        x='Status',
        y=epi_metric,
        palette=colors_our,
        scale='width',
        order=['Control', 'Case'],
        saturation=0.75,
        ax=axs[row_id, col_id],
        legend=False,
        cut=0,
    )
    axs[row_id, col_id].set_ylabel(epi_metric)
    axs[row_id, col_id].ticklabel_format(style='scientific', scilimits=(-1, 1), axis='y', useOffset=True, useMathText=True)

fig.savefig(f"{path}/figures/2/metrics_distribution.png", bbox_inches='tight', dpi=200)
fig.savefig(f"{path}/figures/2/metrics_distribution.pdf", bbox_inches='tight')
plt.close(fig)

## EpiScores

In [None]:
path = 'E:/YandexDisk/Work/pydnameth/draft/13_fmba_cvd_dnam'

df_scores = pd.read_excel(f"{path}/data/120_1/scores.xlsx", index_col=0)
df_scores.index = df_scores.index.astype(str)
df_scores.index = df_scores.index.str.replace(' (EpiScores)', '')
df_pheno = pd.read_csv(f"{path}/data/120_1/episcores_Les_120.csv", index_col=0)
df_pheno.index = df_pheno.index.str.replace('X', '')
df_pheno_add = pd.read_excel(f"{path}/data/120_1/pheno_funnorm.xlsx", index_col=0)
df_pheno_add.index = df_pheno_add.index.astype(str)
df_pheno.loc[df_pheno.index, 'Status'] = df_pheno_add.loc[df_pheno.index, 'Status']

df_scores['Features'] = df_scores.index
df_scores[r"$-\log_{10}(\mathrm{Mann-Whitney\ p-value})$"] = -np.log10(df_scores['mannwhitneyu_pval_fdr_bh'].values)
df_scores[r"$-\log_{10}(\mathrm{ANCOVA\ p-value})$"] = -np.log10(df_scores['ancova_Special_Status[T.Control]_pval_fdr_bh'].values)
df_scores.sort_values([r"$-\log_{10}(\mathrm{Mann-Whitney\ p-value})$"], ascending=[False], inplace=True)

episcores_nonsign = df_scores.index[(df_scores['mannwhitneyu_pval_fdr_bh'] > 0.05) & (df_scores['ancova_Special_Status[T.Control]_pval_fdr_bh'] > 0.05)]
episcores_sign = df_scores.index.difference(episcores_nonsign)

sns.set_theme(style='ticks')
fig, ax = plt.subplots(figsize=(8, 5), gridspec_kw={}, layout='constrained')
scatter = sns.scatterplot(
    data=df_scores.loc[episcores_nonsign, :],
    x=r"$-\log_{10}(\mathrm{Mann-Whitney\ p-value})$",
    y=r"$-\log_{10}(\mathrm{ANCOVA\ p-value})$",
    linewidth=0.25,
    alpha=0.75,
    edgecolor="k",
    s=15,
    color='gray',
    ax=ax,
)
ax.axvline(-np.log10(0.05), color='lightgray', linestyle="-", linewidth=2.5, zorder=0)
ax.axvline(-np.log10(0.05), color='fuchsia', linestyle="dotted", linewidth=2.0, zorder=1)
ax.axhline(-np.log10(0.05), color='lightgray', linestyle="-", linewidth=2.5, zorder=0)
ax.axhline(-np.log10(0.05), color='cyan', linestyle="dotted", linewidth=2.0, zorder=1)
scatter = sns.scatterplot(
    data=df_scores.loc[episcores_sign, :],
    x=r"$-\log_{10}(\mathrm{Mann-Whitney\ p-value})$",
    y=r"$-\log_{10}(\mathrm{ANCOVA\ p-value})$",
    linewidth=0.25,
    alpha=0.75,
    edgecolor="k",
    s=25,
    color='red',
    ax=ax,
)
texts = []
for i, row in df_scores.loc[episcores_sign, :].iterrows():
    texts.append(ax.text(row[r"$-\log_{10}(\mathrm{Mann-Whitney\ p-value})$"], row[r"$-\log_{10}(\mathrm{ANCOVA\ p-value})$"], row['Features'], fontsize=9))
adjust_text(texts, arrowprops=dict(arrowstyle='-', color='black'))
ax.set_xlim(-0.1, 6)
ax.set_ylim(-0.1, 2.5)
fig.savefig(f"{path}/figures/2/episcores.png", bbox_inches='tight', dpi=200)
fig.savefig(f"{path}/figures/2/episcores.pdf", bbox_inches='tight')
plt.close(fig)

# Figure 3

In [None]:
def check_for_nonnumeric(pd_series=None):
    if pd.to_numeric(pd_series, errors='coerce').isna().sum() == 0:
        return 0
    else:
        return 1


def gene_plot(d, geneid, lfc, lfc_thr, pv_thr, genenames, gfont, pv, gstyle):
    if genenames is not None and genenames == "deg":
        for i in d[geneid].unique():
            if (d.loc[d[geneid] == i, lfc].iloc[0] >= lfc_thr[0] and d.loc[d[geneid] == i, pv].iloc[0] < pv_thr[0]) or \
                    (d.loc[d[geneid] == i, lfc].iloc[0] <= -lfc_thr[1] and d.loc[d[geneid] == i, pv].iloc[0] < pv_thr[1]):
                if gstyle == 1:
                    plt.text(d.loc[d[geneid] == i, lfc].iloc[0], d.loc[d[geneid] == i, 'logpv_add_axy'].iloc[0], i,
                                  fontsize=gfont)
                elif gstyle == 2:
                    plt.annotate(i, xy=(d.loc[d[geneid] == i, lfc].iloc[0], d.loc[d[geneid] == i, 'logpv_add_axy'].iloc[0]),
                                 xycoords='data', xytext=(5, -15), textcoords='offset points', size=6,
                                 bbox=dict(boxstyle="round", alpha=0.1),
                                 arrowprops=dict(arrowstyle="wedge,tail_width=0.5", alpha=0.1, relpos=(0, 0)))
                else:
                    print("Error: invalid gstyle choice")
                    sys.exit(1)
    elif genenames is not None and type(genenames) is tuple:
        for i in d[geneid].unique():
            if i in genenames:
                if gstyle == 1:
                    plt.text(d.loc[d[geneid] == i, lfc].iloc[0], d.loc[d[geneid] == i, 'logpv_add_axy'].iloc[0], i,
                                  fontsize=gfont)
                elif gstyle == 2:
                    plt.annotate(i, xy=(d.loc[d[geneid] == i, lfc].iloc[0], d.loc[d[geneid] == i, 'logpv_add_axy'].iloc[0]),
                                 xycoords='data', xytext=(5, -15), textcoords='offset points', size=6,
                                 bbox=dict(boxstyle="round", alpha=0.1),
                                 arrowprops=dict(arrowstyle="wedge,tail_width=0.5", alpha=0.1, relpos=(0, 0)))
                else:
                    print("Error: invalid gstyle choice")
                    sys.exit(1)
    elif genenames is not None and type(genenames) is dict:
        for i in d[geneid].unique():
            if i in genenames:
                if gstyle == 1:
                    plt.text(d.loc[d[geneid] == i, lfc].iloc[0], d.loc[d[geneid] == i, 'logpv_add_axy'].iloc[0],
                                  genenames[i], fontsize=gfont)
                elif gstyle == 2:
                    plt.annotate(genenames[i], xy=(d.loc[d[geneid] == i, lfc].iloc[0], d.loc[d[geneid] == i, 'logpv_add_axy'].iloc[0]),
                                 xycoords='data', xytext=(5, -15), textcoords='offset points', size=6,
                                 bbox=dict(boxstyle="round", alpha=0.1),
                                 arrowprops=dict(arrowstyle="wedge,tail_width=0.5", alpha=0.1, relpos=(0, 0)))
                else:
                    print("Error: invalid gstyle choice")
                    sys.exit(1)


def volcano(df="dataframe", lfc=None, pv=None, lfc_thr=(1, 1), pv_thr=(0.05, 0.05), color=("green", "grey", "red"),
            valpha=1, geneid=None, genenames=None, gfont=8, dim=(5, 5), ar=90, dotsize=1, markerdot="o",
            sign_line_v=False, sign_line_h=False, gstyle=1, axtickfontsize=9,
            axtickfontname="Arial", axlabelfontsize=9, axlabelfontname="Arial", axxlabel=None,
            axylabel=None, xlm=None, ylm=None, plotlegend=False, legendpos='best',
            figname='volcano', legendanchor=None,
            legendlabels=['Significant up', 'Not significant', 'Significant down'], theme=None, path='', ret=False, **kwargs):
    _x = r'$ \log_{2}(\mathrm{Fold Change})$'
    _y = r'$ -\log_{10}(\mathrm{p-value})$'
    color = color
    ax = kwargs.get('ax')
    if ax:
        plt.sca(ax)
    # check if dataframe contains any non-numeric character
    assert check_for_nonnumeric(df[lfc]) == 0, 'dataframe contains non-numeric values in lfc column'
    assert check_for_nonnumeric(df[pv]) == 0, 'dataframe contains non-numeric values in pv column'
    # this is important to check if color or logpv exists and drop them as if you run multiple times same command
    # it may update old instance of df
    df = df.drop(['color_add_axy', 'logpv_add_axy'], axis=1, errors='ignore')
    assert len(set(color)) == 3, 'unique color must be size of 3'
    df.loc[(df[lfc] >= lfc_thr[0]) & (df[pv] < pv_thr[0]), 'color_add_axy'] = color[0]  # upregulated
    #df.loc[(df[lfc] >= lfc_thr[0]) & (df[pv] < pv_thr[0]), 'size_add_axy'] = dotsize[0]
    df.loc[(df[lfc] <= -lfc_thr[1]) & (df[pv] < pv_thr[1]), 'color_add_axy'] = color[2]  # downregulated
    #df.loc[(df[lfc] <= -lfc_thr[1]) & (df[pv] < pv_thr[1]), 'size_add_axy'] = dotsize[2]
    df['color_add_axy'].fillna(color[1], inplace=True)  # intermediate
    #df['size_add_axy'].fillna(dotsize[1], inplace=True)  # intermediate
    df['logpv_add_axy'] = -(np.log10(np.array(df[pv].values.astype(float))))
    # plot
    assign_values = {col: i for i, col in enumerate(color)}
    color_result_num = [assign_values[i] for i in df['color_add_axy']]

    #assert len(set(color_result_num)) == 3, \
    #    'either significant or non-significant genes are missing; try to change lfc_thr or pv_thr to include ' \
    #    'both significant and non-significant genes'
    if theme == 'dark':
        plt.style.use('dark_background')
    #plt.subplots(figsize=dim)
    if plotlegend:
        s = plt.scatter(df[lfc], df['logpv_add_axy'], c=color_result_num, cmap=ListedColormap(color), alpha=valpha,
                        s=dotsize, marker=markerdot)
        assert len(legendlabels) == 3, 'legendlabels must be size of 3'
        plt.legend(handles=s.legend_elements()[0], labels=legendlabels, loc=legendpos, bbox_to_anchor=legendanchor)
    else:
        plt.scatter(df[lfc], df['logpv_add_axy'], c=color_result_num, cmap=ListedColormap(color), alpha=valpha,
                    s=dotsize, marker=markerdot)
    if sign_line_h:
        plt.axhline(y=-np.log10(pv_thr[0]), linestyle='--', color='black', linewidth=1)
    if sign_line_v:
        plt.axvline(x=lfc_thr[0], linestyle='--', color='black', linewidth=1)
        plt.axvline(x=-lfc_thr[1], linestyle='--', color='black', linewidth=1)
    gene_plot(df, geneid, lfc, lfc_thr, pv_thr, genenames, gfont, pv, gstyle)

    if axxlabel:
        _x = axxlabel
    if axylabel:
        _y = axylabel

    plt.xlabel(_x, fontsize=axlabelfontsize, fontname=axlabelfontname)
    plt.ylabel(_y, fontsize=axlabelfontsize, fontname=axlabelfontname)
    if xlm:
        plt.xlim(left=xlm[0], right=xlm[1])
        plt.xticks(fontsize=axtickfontsize, rotation=ar, fontname=axtickfontname)

    else:
        plt.xticks(fontsize=axtickfontsize, rotation=ar, fontname=axtickfontname)
    if ylm:
        plt.ylim(bottom=ylm[0], top=ylm[1])
        plt.yticks(np.arange(ylm[0], ylm[1], ylm[2]),  fontsize=axtickfontsize, rotation=ar, fontname=axtickfontname)
    else:
        plt.yticks(fontsize=axtickfontsize, rotation=ar, fontname=axtickfontname)
    
    if ret:
        return plt.gca()
    else:
        plt.savefig(f"{path}/{figname}.png", bbox_inches='tight', dpi=400)
        plt.savefig(f"{path}/{figname}.pdf", bbox_inches='tight', dpi=400)
        plt.clf()
        plt.close()

In [None]:
path = "E:/YandexDisk/Work/pydnameth/draft/13_fmba_cvd_dnam"

# Load pheno
df_pheno = pd.read_excel(f"{path}/data/120_1/pheno_funnorm.xlsx", index_col=0)
df_pheno.index = df_pheno.index.astype(str)
colors = {
    'Control': 'chartreuse',
    'Case': 'red'
}
# Load betas
df_betas = pd.read_pickle(f"{path}/data/120_1/betas_funnorm.pkl")
df_betas.index = df_betas.index.astype(str)

# Load DMPs
df_dmps = pd.read_csv(f"{path}/data/120_1/GSEA(ebayes)_group_orgn_limma.csv", index_col=0)
df_dmps.sort_values(["adj.P.Val"], ascending=[True], inplace=True)
df_dmps[r'$ -\log_{10}(\mathrm{p-value})$'] = -(np.log10(np.array(df_dmps["adj.P.Val"].values.astype(float))))
df_dmps[r'$ \log_{2}(\mathrm{Fold Change})$'] = df_dmps['logFC']
dmps = df_dmps.index[df_dmps["adj.P.Val"] < 0.05].values

# Merge data
df_data = pd.merge(df_pheno, df_betas.loc[:, dmps], left_index=True, right_index=True)

# Dimensionality reduction
dim_red_models = {
    't-SNE': TSNE(n_components=2),
    'PCA': PCA(n_components=2, whiten=False),
    'IsoMap': Isomap(n_components=2, n_neighbors=5),
    'MDS': MDS(n_components=2, metric=True),
    # 'GRP': GaussianRandomProjection(n_components=2, eps=0.5),
    # 'SRP': SparseRandomProjection(n_components=2, density='auto', eps=0.5, dense_output=False),
}
feats_dim_red = []
for drm in dim_red_models:
    dim_red_res = dim_red_models[drm].fit_transform(df_data.loc[:, dmps].values)
    df_data.loc[:, f"{drm} 1"] = dim_red_res[:, 0]
    df_data.loc[:, f"{drm} 2"] = dim_red_res[:, 1]
    df_data.loc[:, f"{drm} HDBSCAN"] = HDBSCAN(min_cluster_size=int(df_data.shape[0] * 0.05)).fit(df_data.loc[:, [f"{drm} 1", f"{drm} 2"]].values).labels_
    feats_dim_red += [ f"{drm} 1",  f"{drm} 2", f"{drm} HDBSCAN"]
df_data.loc[:, ['Age', 'Status'] + feats_dim_red].to_excel(f"{path}/figures/3/dimred.xlsx")

In [None]:
fig, ax = plt.subplots(figsize=(7, 7), gridspec_kw={}, layout='constrained')
# scatter = sns.scatterplot(
#     data=df_dmps,
#     y=r'$ -\log_{10}(\mathrm{p-value})$',
#     x=r'$ \log_{2}(\mathrm{Fold Change})$',
#     alpha=0.5,
#     ax=ax,
#     rasterized=True,
# )
volc = volcano(
    df=df_dmps,
    lfc='logFC',
    pv='adj.P.Val',
    pv_thr=(0.05, 0.05),
    lfc_thr=(0.0, 0.0),
    path=f"{path}",
    geneid='print',
    axtickfontsize=16,
    axlabelfontsize=16,
    gfont=16,
    gstyle=2,
    sign_line_h=True,
    sign_line_v=False,
    ar=0,
    color=('orange', 'gray', 'mediumblue'),
    dim=(7, 7), 
    ret=True,
    ax=ax,
    dotsize=4,
)

fig.savefig(f"{path}/figures/3/volcano.png", bbox_inches='tight', dpi=1200)
fig.savefig(f"{path}/figures/3/volcano.pdf", bbox_inches='tight')
plt.close(fig)

In [None]:
n_rows = 2
n_cols = 2
fig_height = 8
fig_width = 8
sns.set_theme(style='ticks')
fig, axs = plt.subplots(n_rows, n_cols, figsize=(fig_width, fig_height), gridspec_kw={'wspace':0.05, 'hspace': 0.05}, sharey=False, sharex=False, layout='constrained')
for drm_id, drm in enumerate(dim_red_models.keys()):
    row_id, col_id = divmod(drm_id, n_cols)
    scatter = sns.scatterplot(
        data=df_data,
        x=f"{drm} 1",
        y=f"{drm} 2",
        # hue=f"{drm} HDBSCAN",
        hue='Status',
        palette=colors,
        linewidth=0.25,
        alpha=0.75,
        edgecolor="k",
        s=40,
        # color=colors[feats_set],
        ax=axs[row_id, col_id],
    )
    axs[row_id, col_id].set_title(drm)
    # axs[n_rows - 1, n_cols - 1].axis('off')
# fig.suptitle(title, fontsize='large')   
fig.savefig(f"{path}/figures/3/dim_red.png", bbox_inches='tight', dpi=200)
fig.savefig(f"{path}/figures/3/dim_red.pdf", bbox_inches='tight')
plt.close(fig)

## Background colors for groups of GSEA terms

In [None]:
exclude_colors = ['black', 'white', 'chartreuse', 'red']
exclude_colors = [mcolors.hex2color(mcolors.CSS4_COLORS[color]) for color in exclude_colors]
bckg_colors = distinctipy.get_colors(8, exclude_colors, rng=1337, pastel_factor=0.5)
distinctipy.color_swatch(bckg_colors)

## Legacy for 63 samples

In [None]:
df_dmps = pd.read_csv(f"{path}/GSEA(ebayes)_group_orgn_limma.csv", index_col=0)
df_dmps["CpG"] = df_dmps.index.values
df_dmps.sort_values(["adj.P.Val"], ascending=[True], inplace=True)
df_dmps['print'] = df_dmps.apply(lambda row: f"{row['CpG'].split('_')[0]}", axis=1)
df_dmps['log_pval'] = -np.log10(df_dmps["adj.P.Val"])

pheno = pd.read_excel(f"{path}/pheno_funnorm.xlsx", index_col=0)
pheno.index = pheno.index.astype(str)
betas = pd.read_pickle(f"{path}/betas_funnorm.pkl")
feats_beta = ['cg26740917_BC21', 'cg10717487_BC21', 'cg16474115_BC21',
              'cg09331866_BC21', 'cg02881230_BC21', 'cg06596521_BC21',
              'cg00891068_TC21', 'cg20995564_TC21', 'cg01569067_BC21',
              'cg03667969_BC21', 'cg25576364_BC21', 'cg15387715_BC21']
betas = betas[feats_beta]

feats_pheno = ['Age', 'Status']
pheno = pheno[feats_pheno]

df_for_plot = pd.merge(pheno, betas, left_index=True, right_index=True)

pheno_associations = {
    'Status': {
        'groups': ['Control', 'Case'],
        'base': 'Control',
        'colors': {'Control': 'chartreuse', 'Case': 'red'}
    }
}

fig = plt.figure(
        figsize=(15, 10),
        layout="constrained"
    )
"""
axs = fig.subplot_mosaic(
    [
        ['1', '21', '31'],
        ['1', '22', '32'],
        ['1', '23', '33'],
        ['1', '24', '34'],
        ['1', '25', '35'],
        ['1', '26', '36'],
    ],
    height_ratios=[1, 1, 1, 1, 1, 1],
    width_ratios=[2, 1, 1],
)
"""
sns.set_theme(style='ticks')

axs = fig.subplot_mosaic(
    [
        ['1', '.', '21', '31', '41'],
        ['1', '.', '22', '32', '42'],
        ['1', '.', '23', '33', '43'],
        ['1', '.', '24', '34', '44'],
    ],
    height_ratios=[1, 1, 1, 1],
    width_ratios=[4, 0.05, 1, 1, 1],
    gridspec_kw={
                "bottom": 0.14,
                "top": 0.95,
                # "left": 0.1,
                # "right": 0.5,
                "wspace": 0.1,
                "hspace": 0.07,
            },
)
volc = volcano(
    df=df_dmps,
    lfc='logFC',
    pv='adj.P.Val',
    pv_thr=(0.05, 0.05),
    lfc_thr=(0.0, 0.0),
    path=f"{path}",
    geneid='print',
    axtickfontsize=18,
    axlabelfontsize=18,
    gfont=18,
    gstyle=2,
    sign_line_h=True,
    sign_line_v=False,
    ar=0,
    color=('gray', 'green', 'red'),
    dim=(4, 4), 
    ret=True,
    ax=axs['1'],
    dotsize=5,
)
axs['1'].grid(True)
axs['1'].text(-0.33, 1.5, 'A', fontsize=30, fontfamily='arial')
rect = Rectangle((-0.11, 1.28), 0.11, 0.2, linewidth=2, edgecolor='r', facecolor='none')
axs['1'].add_patch(rect)

for id, cpg in enumerate(feats_beta):
    row_id = id % 3 + 2
    column_id = id // 3 + 1
    violin = sns.violinplot(
        data=df_for_plot,
        x='Status',
        y=cpg,
        hue='Status',
        palette=pheno_associations['Status']['colors'],
        density_norm='width',
        order=pheno_associations['Status']['groups'],
        saturation=0.75,
        linewidth=1.0,
        legend=False,
        ax=axs[f'{row_id}{column_id}'],
        cut=0,
    )
    axs[f'{row_id}{column_id}'].set(xlabel=None)
axs['21'].text(-1.8, 0.79, 'B', fontsize=30, fontfamily='arial')

plt.savefig(f"{path}/fig3.png", bbox_inches='tight', dpi=400)
plt.savefig(f"{path}/fig3.pdf", bbox_inches='tight', dpi=400)

# Figure 4

In [None]:
def check_for_nonnumeric(pd_series=None):
    if pd.to_numeric(pd_series, errors='coerce').isna().sum() == 0:
        return 0
    else:
        return 1


def gene_plot(d, geneid, lfc, lfc_thr, pv_thr, genenames, gfont, pv, gstyle):
    if genenames is not None and genenames == "deg":
        for i in d[geneid].unique():
            if (d.loc[d[geneid] == i, lfc].iloc[0] >= lfc_thr[0] and d.loc[d[geneid] == i, pv].iloc[0] < pv_thr[0]) or \
                    (d.loc[d[geneid] == i, lfc].iloc[0] <= -lfc_thr[1] and d.loc[d[geneid] == i, pv].iloc[0] < pv_thr[1]):
                if gstyle == 1:
                    plt.text(d.loc[d[geneid] == i, lfc].iloc[0], d.loc[d[geneid] == i, 'logpv_add_axy'].iloc[0], i,
                                  fontsize=gfont)
                elif gstyle == 2:
                    plt.annotate(i, xy=(d.loc[d[geneid] == i, lfc].iloc[0], d.loc[d[geneid] == i, 'logpv_add_axy'].iloc[0]),
                                 xycoords='data', xytext=(5, -15), textcoords='offset points', size=6,
                                 bbox=dict(boxstyle="round", alpha=0.1),
                                 arrowprops=dict(arrowstyle="wedge,tail_width=0.5", alpha=0.1, relpos=(0, 0)))
                else:
                    print("Error: invalid gstyle choice")
                    sys.exit(1)
    elif genenames is not None and type(genenames) is tuple:
        for i in d[geneid].unique():
            if i in genenames:
                if gstyle == 1:
                    plt.text(d.loc[d[geneid] == i, lfc].iloc[0], d.loc[d[geneid] == i, 'logpv_add_axy'].iloc[0], i,
                                  fontsize=gfont)
                elif gstyle == 2:
                    plt.annotate(i, xy=(d.loc[d[geneid] == i, lfc].iloc[0], d.loc[d[geneid] == i, 'logpv_add_axy'].iloc[0]),
                                 xycoords='data', xytext=(5, -15), textcoords='offset points', size=6,
                                 bbox=dict(boxstyle="round", alpha=0.1),
                                 arrowprops=dict(arrowstyle="wedge,tail_width=0.5", alpha=0.1, relpos=(0, 0)))
                else:
                    print("Error: invalid gstyle choice")
                    sys.exit(1)
    elif genenames is not None and type(genenames) is dict:
        for i in d[geneid].unique():
            if i in genenames:
                if gstyle == 1:
                    plt.text(d.loc[d[geneid] == i, lfc].iloc[0], d.loc[d[geneid] == i, 'logpv_add_axy'].iloc[0],
                                  genenames[i], fontsize=gfont)
                elif gstyle == 2:
                    plt.annotate(genenames[i], xy=(d.loc[d[geneid] == i, lfc].iloc[0], d.loc[d[geneid] == i, 'logpv_add_axy'].iloc[0]),
                                 xycoords='data', xytext=(5, -15), textcoords='offset points', size=6,
                                 bbox=dict(boxstyle="round", alpha=0.1),
                                 arrowprops=dict(arrowstyle="wedge,tail_width=0.5", alpha=0.1, relpos=(0, 0)))
                else:
                    print("Error: invalid gstyle choice")
                    sys.exit(1)


def volcano(df="dataframe", lfc=None, pv=None, lfc_thr=(1, 1), pv_thr=(0.05, 0.05), color=("green", "grey", "red"),
            valpha=1, geneid=None, genenames=None, gfont=8, dim=(5, 5), ar=90, dotsize=1, markerdot="o",
            sign_line_v=False, sign_line_h=False, gstyle=1, axtickfontsize=9,
            axtickfontname="Arial", axlabelfontsize=9, axlabelfontname="Arial", axxlabel=None,
            axylabel=None, xlm=None, ylm=None, plotlegend=False, legendpos='best',
            figname='volcano', legendanchor=None,
            legendlabels=['Significant up', 'Not significant', 'Significant down'], theme=None, path='', ret=False, **kwargs):
    _x = r'$ \log_{2}(\mathrm{Fold Change})$'
    _y = r'$ -\log_{10}(\mathrm{p-value})$'
    color = color
    ax = kwargs.get('ax')
    if ax:
        plt.sca(ax)
    # check if dataframe contains any non-numeric character
    assert check_for_nonnumeric(df[lfc]) == 0, 'dataframe contains non-numeric values in lfc column'
    assert check_for_nonnumeric(df[pv]) == 0, 'dataframe contains non-numeric values in pv column'
    # this is important to check if color or logpv exists and drop them as if you run multiple times same command
    # it may update old instance of df
    df = df.drop(['color_add_axy', 'logpv_add_axy'], axis=1, errors='ignore')
    assert len(set(color)) == 3, 'unique color must be size of 3'
    df.loc[(df[lfc] >= lfc_thr[0]) & (df[pv] < pv_thr[0]), 'color_add_axy'] = color[0]  # upregulated
    #df.loc[(df[lfc] >= lfc_thr[0]) & (df[pv] < pv_thr[0]), 'size_add_axy'] = dotsize[0]
    df.loc[(df[lfc] <= -lfc_thr[1]) & (df[pv] < pv_thr[1]), 'color_add_axy'] = color[2]  # downregulated
    #df.loc[(df[lfc] <= -lfc_thr[1]) & (df[pv] < pv_thr[1]), 'size_add_axy'] = dotsize[2]
    df['color_add_axy'].fillna(color[1], inplace=True)  # intermediate
    #df['size_add_axy'].fillna(dotsize[1], inplace=True)  # intermediate
    df['logpv_add_axy'] = -(np.log10(np.array(df[pv].values.astype(float))))
    # plot
    assign_values = {col: i for i, col in enumerate(color)}
    color_result_num = [assign_values[i] for i in df['color_add_axy']]

    #assert len(set(color_result_num)) == 3, \
    #    'either significant or non-significant genes are missing; try to change lfc_thr or pv_thr to include ' \
    #    'both significant and non-significant genes'
    if theme == 'dark':
        plt.style.use('dark_background')
    #plt.subplots(figsize=dim)
    if plotlegend:
        s = plt.scatter(df[lfc], df['logpv_add_axy'], c=color_result_num, cmap=ListedColormap(color), alpha=valpha,
                        s=dotsize, marker=markerdot)
        assert len(legendlabels) == 3, 'legendlabels must be size of 3'
        plt.legend(handles=s.legend_elements()[0], labels=legendlabels, loc=legendpos, bbox_to_anchor=legendanchor)
    else:
        plt.scatter(df[lfc], df['logpv_add_axy'], c=color_result_num, cmap=ListedColormap(color), alpha=valpha,
                    s=dotsize, marker=markerdot)
    if sign_line_h:
        plt.axhline(y=-np.log10(pv_thr[0]), linestyle='--', color='black', linewidth=1)
    if sign_line_v:
        plt.axvline(x=lfc_thr[0], linestyle='--', color='black', linewidth=1)
        plt.axvline(x=-lfc_thr[1], linestyle='--', color='black', linewidth=1)
    gene_plot(df, geneid, lfc, lfc_thr, pv_thr, genenames, gfont, pv, gstyle)

    if axxlabel:
        _x = axxlabel
    if axylabel:
        _y = axylabel

    plt.xlabel(_x, fontsize=axlabelfontsize, fontname=axlabelfontname)
    plt.ylabel(_y, fontsize=axlabelfontsize, fontname=axlabelfontname)
    if xlm:
        plt.xlim(left=xlm[0], right=xlm[1])
        plt.xticks(fontsize=axtickfontsize, rotation=ar, fontname=axtickfontname)

    else:
        plt.xticks(fontsize=axtickfontsize, rotation=ar, fontname=axtickfontname)
    if ylm:
        plt.ylim(bottom=ylm[0], top=ylm[1])
        plt.yticks(np.arange(ylm[0], ylm[1], ylm[2]),  fontsize=axtickfontsize, rotation=ar, fontname=axtickfontname)
    else:
        plt.yticks(fontsize=axtickfontsize, rotation=ar, fontname=axtickfontname)
    
    if ret:
        return plt.gca()
    else:
        plt.savefig(f"{path}/{figname}.png", bbox_inches='tight', dpi=400)
        plt.savefig(f"{path}/{figname}.pdf", bbox_inches='tight', dpi=400)
        plt.clf()
        plt.close()

In [None]:
path = 'E:/YandexDisk/Work/pydnameth/draft/13_fmba_cvd_dnam'

df_ages = pd.read_excel(f"{path}/data/GSE220622/gen/ages_corrected.xlsx", index_col=0)
df_ages['Features'] = df_ages.index
df_ages['Mann-Whitney'] = -np.log10(df_ages['mannwhitneyu_pval_fdr_bh'].values)
df_ages['ANCOVA'] = -np.log10(df_ages['ancova_Status[T.Control]_pval_fdr_bh'].values)

df_metrics = pd.read_excel(f"{path}/data/GSE220622/gen/metrics.xlsx", index_col=0)
df_metrics['Features'] = df_metrics.index
df_metrics['Mann-Whitney'] = -np.log10(df_metrics['mannwhitneyu_pval_fdr_bh'].values)
df_metrics['ANCOVA'] = -np.log10(df_metrics['ancova_Status[T.Control]_pval_fdr_bh'].values)

df_scores = pd.read_excel(f"{path}/data/GSE220622/gen/scores.xlsx", index_col=0)
df_scores.index = df_scores.index.astype(str)
df_scores.index = df_scores.index.str.replace(' (EpiScores)', '')
df_scores['Features'] = df_scores.index
df_scores[r"$-\log_{10}(\mathrm{Mann-Whitney\ p-value})$"] = -np.log10(df_scores['mannwhitneyu_pval_fdr_bh'].values)
df_scores[r"$-\log_{10}(\mathrm{ANCOVA\ p-value})$"] = -np.log10(df_scores['ancova_Status[T.Control]_pval_fdr_bh'].values)
episcores_nonsign = df_scores.index[(df_scores['mannwhitneyu_pval_fdr_bh'] > 0.05) & (df_scores['ancova_Status[T.Control]_pval_fdr_bh'] > 0.05)]
episcores_sign = df_scores.index.difference(episcores_nonsign)

df_dmps = pd.read_csv(f"{path}/data/GSE220622/gen/GSEA(ebayes)_group_orgn_limma.csv", index_col=0)
df_dmps["CpG"] = df_dmps.index.values
df_dmps.sort_values(["adj.P.Val"], ascending=[True], inplace=True)
df_dmps['print'] = df_dmps.apply(lambda row: f"{row['CpG'].split('_')[0]}", axis=1)
df_dmps['log_pval'] = -np.log10(df_dmps["adj.P.Val"])

colors_tests = {
    'Mann-Whitney': 'fuchsia',
    'ANCOVA': 'cyan'
}


In [None]:
df_fig = df_ages.copy()
df_fig = df_fig.melt(id_vars='Features', value_vars=['Mann-Whitney', 'ANCOVA'], var_name='Test', value_name=r"$-\log_{10}(\mathrm{p-value})$")
sns.set_theme(style='ticks')
fig, ax = plt.subplots(figsize=(3, df_fig.shape[0] * 0.15))
barplot = sns.barplot(
    data=df_fig,
    y='Features',
    x=r"$-\log_{10}(\mathrm{p-value})$",
    edgecolor='black',
    palette=colors_tests,
    hue='Test',
    hue_order=['Mann-Whitney', 'ANCOVA'],
    ax=ax,
)
# ax.xaxis.tick_top()
# ax.xaxis.set_label_position('top')
ax.set_ylabel('')
ax.axvline(-np.log10(0.05), color="red", linestyle="dotted", linewidth=2.0)
plt.savefig(f"{path}/figures/4/ages_barplot.png", bbox_inches='tight', dpi=200)
plt.savefig(f"{path}/figures/4/ages_barplot.pdf", bbox_inches='tight')
plt.close(fig)

In [None]:
df_fig = df_metrics.copy()
df_fig = df_fig.melt(id_vars='Features', value_vars=['Mann-Whitney', 'ANCOVA'], var_name='Test', value_name=r"$-\log_{10}(\mathrm{p-value})$")
sns.set_theme(style='ticks')
fig, ax = plt.subplots(figsize=(3, df_fig.shape[0] * 0.15))
barplot = sns.barplot(
    data=df_fig,
    y='Features',
    x=r"$-\log_{10}(\mathrm{p-value})$",
    edgecolor='black',
    palette=colors_tests,
    hue='Test',
    hue_order=['Mann-Whitney', 'ANCOVA'],
    ax=ax,
)
# ax.xaxis.tick_top()
# ax.xaxis.set_label_position('top')
ax.set_ylabel('')
ax.axvline(-np.log10(0.05), color="red", linestyle="dotted", linewidth=2.0)
#sns.move_legend(ax, "upper center", bbox_to_anchor=(1, 1))
sns.move_legend(ax, "lower center", bbox_to_anchor=(.4, 1), ncol=2, frameon=False)
plt.savefig(f"{path}/figures/4/metrics_barplot.png", bbox_inches='tight', dpi=200)
plt.savefig(f"{path}/figures/4/metrics_barplot.pdf", bbox_inches='tight')
plt.close(fig)

In [None]:
sns.set_theme(style='ticks')
fig, ax = plt.subplots(figsize=(8, 5), gridspec_kw={}, layout='constrained')
scatter = sns.scatterplot(
    data=df_scores.loc[episcores_nonsign, :],
    x=r"$-\log_{10}(\mathrm{Mann-Whitney\ p-value})$",
    y=r"$-\log_{10}(\mathrm{ANCOVA\ p-value})$",
    linewidth=0.25,
    alpha=0.75,
    edgecolor="k",
    s=15,
    color='gray',
    ax=ax,
)
ax.axvline(-np.log10(0.05), color='lightgray', linestyle="-", linewidth=2.5, zorder=0)
ax.axvline(-np.log10(0.05), color='fuchsia', linestyle="dotted", linewidth=2.0, zorder=1)
ax.axhline(-np.log10(0.05), color='lightgray', linestyle="-", linewidth=2.5, zorder=0)
ax.axhline(-np.log10(0.05), color='cyan', linestyle="dotted", linewidth=2.0, zorder=1)
scatter = sns.scatterplot(
    data=df_scores.loc[episcores_sign, :],
    x=r"$-\log_{10}(\mathrm{Mann-Whitney\ p-value})$",
    y=r"$-\log_{10}(\mathrm{ANCOVA\ p-value})$",
    linewidth=0.25,
    alpha=0.75,
    edgecolor="k",
    s=25,
    color='red',
    ax=ax,
)
texts = []
for i, row in df_scores.loc[episcores_sign, :].iterrows():
    texts.append(ax.text(row[r"$-\log_{10}(\mathrm{Mann-Whitney\ p-value})$"], row[r"$-\log_{10}(\mathrm{ANCOVA\ p-value})$"], row['Features'], fontsize=9))
adjust_text(texts, arrowprops=dict(arrowstyle='-', color='black'))
ax.set_xlim(-0.1, 12)
ax.set_ylim(-0.1, 11.5)
fig.savefig(f"{path}/figures/4/episcores.png", bbox_inches='tight', dpi=200)
fig.savefig(f"{path}/figures/4/episcores.pdf", bbox_inches='tight')
plt.close(fig)

In [None]:
fig, ax = plt.subplots(figsize=(8, 5), gridspec_kw={}, layout='constrained')

volc = volcano(
    df=df_dmps,
    lfc='logFC',
    pv='adj.P.Val',
    pv_thr=(0.05, 0.05),
    lfc_thr=(0.0, 0.0),
    path=f"{path}",
    geneid='print',
    axtickfontsize=14,
    axlabelfontsize=14,
    gfont=14,
    gstyle=2,
    sign_line_h=True,
    sign_line_v=False,
    ar=0,
    color=('orange', 'gray', 'mediumblue'),
    dim=(8, 5), 
    ret=True,
    ax=ax,
    dotsize=4,
)

fig.savefig(f"{path}/figures/4/volcano.png", bbox_inches='tight', dpi=1200)
fig.savefig(f"{path}/figures/4/volcano.pdf", bbox_inches='tight')
plt.close(fig)

In [None]:


fig = plt.figure(
        figsize=(15, 20),
        layout="constrained"
    )

axs = fig.subplot_mosaic(
    [
        ['ages', 'scores'],
        ['metrics', 'scores'],
        ['volcano', 'volcano'],
    ],
    height_ratios=[4.25, 0.75, 5],
    width_ratios=[1, 4],
    gridspec_kw={
        # "bottom": 0.14,
        # "top": 0.95,
        # "left": 0.1,
        # "right": 0.5,
        # "wspace": 0.33,
        # "hspace": 0.01,
    },
)

sns.set_theme(style='whitegrid')

df_fig = df_ages.copy()
df_fig = df_fig.melt(id_vars='Features', value_vars=['Mann-Whitney', 'ANCOVA'], var_name='Test', value_name=r"$-\log_{10}(\mathrm{p-value})$")
barplot = sns.barplot(
    data=df_fig,
    y='Features',
    x=r"$-\log_{10}(\mathrm{p-value})$",
    edgecolor='black',
    palette=colors_tests,
    hue='Test',
    hue_order=['Mann-Whitney', 'ANCOVA'],
    ax=axs['ages'],
)
axs['ages'].set_ylabel('')
axs['ages'].axvline(-np.log10(0.05), color="red", linestyle="dotted", linewidth=2.0)
axs['ages'].text(-9.5, 1, 'A', fontsize=30, fontfamily='arial')

df_fig = df_metrics.copy()
df_fig = df_fig.melt(id_vars='Features', value_vars=['Mann-Whitney', 'ANCOVA'], var_name='Test', value_name=r"$-\log_{10}(\mathrm{p-value})$")
barplot = sns.barplot(
    data=df_fig,
    y='Features',
    x=r"$-\log_{10}(\mathrm{p-value})$",
    edgecolor='black',
    palette=colors_tests,
    hue='Test',
    hue_order=['Mann-Whitney', 'ANCOVA'],
    ax=axs['metrics'],
)
axs['metrics'].set_ylabel('')
axs['metrics'].axvline(-np.log10(0.05), color="red", linestyle="dotted", linewidth=2.0)
axs['ages'].text(-9.5, 38, 'B', fontsize=30, fontfamily='arial')

scatter = sns.scatterplot(
    data=df_scores.loc[episcores_nonsign, :],
    x=r"$-\log_{10}(\mathrm{Mann-Whitney\ p-value})$",
    y=r"$-\log_{10}(\mathrm{ANCOVA\ p-value})$",
    linewidth=0.25,
    alpha=0.75,
    edgecolor="k",
    s=15,
    color='gray',
    ax=axs['scores'],
)
axs['scores'].axvline(-np.log10(0.05), color='lightgray', linestyle="-", linewidth=2.5, zorder=0)
axs['scores'].axvline(-np.log10(0.05), color='fuchsia', linestyle="dotted", linewidth=2.0, zorder=1)
axs['scores'].axhline(-np.log10(0.05), color='lightgray', linestyle="-", linewidth=2.5, zorder=0)
axs['scores'].axhline(-np.log10(0.05), color='cyan', linestyle="dotted", linewidth=2.0, zorder=1)
scatter = sns.scatterplot(
    data=df_scores.loc[episcores_sign, :],
    x=r"$-\log_{10}(\mathrm{Mann-Whitney\ p-value})$",
    y=r"$-\log_{10}(\mathrm{ANCOVA\ p-value})$",
    linewidth=0.25,
    alpha=0.75,
    edgecolor="k",
    s=25,
    color='red',
    ax=axs['scores'],
)
texts = []
for i, row in df_scores.loc[episcores_sign, :].iterrows():
    texts.append(axs['scores'].text(row[r"$-\log_{10}(\mathrm{Mann-Whitney\ p-value})$"], row[r"$-\log_{10}(\mathrm{ANCOVA\ p-value})$"], row['Features'], fontsize=9))
adjust_text(texts, arrowprops=dict(arrowstyle='-', color='black'), ax=axs['scores'])
axs['scores'].set_xlim(0, 11.8)
axs['scores'].set_ylim(0, 11)
axs['scores'].text(-0.9, 10.61, 'C', fontsize=30, fontfamily='arial')

volc = volcano(
    df=df_dmps,
    lfc='logFC',
    pv='adj.P.Val',
    pv_thr=(0.05, 0.05),
    lfc_thr=(0.0, 0.0),
    path=f"{path}",
    geneid='print',
    axtickfontsize=18,
    axlabelfontsize=18,
    gfont=18,
    gstyle=2,
    sign_line_h=True,
    sign_line_v=False,
    ar=0,
    color=('green', 'gray', 'red'),
    dim=(4, 4), 
    ret=True,
    ax=axs['volcano'],
    dotsize=15,
)
axs['volcano'].set_ylim(0, 10.5)
axs['volcano'].text(-0.185, 10.1, 'D', fontsize=30, fontfamily='arial')

fig.savefig(f"{path}/figures/4/fig.png", bbox_inches='tight', dpi=200)
fig.savefig(f"{path}/figures/4/fig.pdf", bbox_inches='tight')
plt.close(fig)

## Legacy (all plots in one figure, very heavy volcano plot)

In [None]:
def check_for_nonnumeric(pd_series=None):
    if pd.to_numeric(pd_series, errors='coerce').isna().sum() == 0:
        return 0
    else:
        return 1


def gene_plot(d, geneid, lfc, lfc_thr, pv_thr, genenames, gfont, pv, gstyle):
    if genenames is not None and genenames == "deg":
        for i in d[geneid].unique():
            if (d.loc[d[geneid] == i, lfc].iloc[0] >= lfc_thr[0] and d.loc[d[geneid] == i, pv].iloc[0] < pv_thr[0]) or \
                    (d.loc[d[geneid] == i, lfc].iloc[0] <= -lfc_thr[1] and d.loc[d[geneid] == i, pv].iloc[0] < pv_thr[1]):
                if gstyle == 1:
                    plt.text(d.loc[d[geneid] == i, lfc].iloc[0], d.loc[d[geneid] == i, 'logpv_add_axy'].iloc[0], i,
                                  fontsize=gfont)
                elif gstyle == 2:
                    plt.annotate(i, xy=(d.loc[d[geneid] == i, lfc].iloc[0], d.loc[d[geneid] == i, 'logpv_add_axy'].iloc[0]),
                                 xycoords='data', xytext=(5, -15), textcoords='offset points', size=6,
                                 bbox=dict(boxstyle="round", alpha=0.1),
                                 arrowprops=dict(arrowstyle="wedge,tail_width=0.5", alpha=0.1, relpos=(0, 0)))
                else:
                    print("Error: invalid gstyle choice")
                    sys.exit(1)
    elif genenames is not None and type(genenames) is tuple:
        for i in d[geneid].unique():
            if i in genenames:
                if gstyle == 1:
                    plt.text(d.loc[d[geneid] == i, lfc].iloc[0], d.loc[d[geneid] == i, 'logpv_add_axy'].iloc[0], i,
                                  fontsize=gfont)
                elif gstyle == 2:
                    plt.annotate(i, xy=(d.loc[d[geneid] == i, lfc].iloc[0], d.loc[d[geneid] == i, 'logpv_add_axy'].iloc[0]),
                                 xycoords='data', xytext=(5, -15), textcoords='offset points', size=6,
                                 bbox=dict(boxstyle="round", alpha=0.1),
                                 arrowprops=dict(arrowstyle="wedge,tail_width=0.5", alpha=0.1, relpos=(0, 0)))
                else:
                    print("Error: invalid gstyle choice")
                    sys.exit(1)
    elif genenames is not None and type(genenames) is dict:
        for i in d[geneid].unique():
            if i in genenames:
                if gstyle == 1:
                    plt.text(d.loc[d[geneid] == i, lfc].iloc[0], d.loc[d[geneid] == i, 'logpv_add_axy'].iloc[0],
                                  genenames[i], fontsize=gfont)
                elif gstyle == 2:
                    plt.annotate(genenames[i], xy=(d.loc[d[geneid] == i, lfc].iloc[0], d.loc[d[geneid] == i, 'logpv_add_axy'].iloc[0]),
                                 xycoords='data', xytext=(5, -15), textcoords='offset points', size=6,
                                 bbox=dict(boxstyle="round", alpha=0.1),
                                 arrowprops=dict(arrowstyle="wedge,tail_width=0.5", alpha=0.1, relpos=(0, 0)))
                else:
                    print("Error: invalid gstyle choice")
                    sys.exit(1)


def volcano(df="dataframe", lfc=None, pv=None, lfc_thr=(1, 1), pv_thr=(0.05, 0.05), color=("green", "grey", "red"),
            valpha=1, geneid=None, genenames=None, gfont=8, dim=(5, 5), ar=90, dotsize=1, markerdot="o",
            sign_line_v=False, sign_line_h=False, gstyle=1, axtickfontsize=9,
            axtickfontname="Arial", axlabelfontsize=9, axlabelfontname="Arial", axxlabel=None,
            axylabel=None, xlm=None, ylm=None, plotlegend=False, legendpos='best',
            figname='volcano', legendanchor=None,
            legendlabels=['Significant up', 'Not significant', 'Significant down'], theme=None, path='', ret=False, **kwargs):
    _x = r'$ \log_{2}(\mathrm{Fold Change})$'
    _y = r'$ -\log_{10}(\mathrm{p-value})$'
    color = color
    ax = kwargs.get('ax')
    if ax:
        plt.sca(ax)
    # check if dataframe contains any non-numeric character
    assert check_for_nonnumeric(df[lfc]) == 0, 'dataframe contains non-numeric values in lfc column'
    assert check_for_nonnumeric(df[pv]) == 0, 'dataframe contains non-numeric values in pv column'
    # this is important to check if color or logpv exists and drop them as if you run multiple times same command
    # it may update old instance of df
    df = df.drop(['color_add_axy', 'logpv_add_axy'], axis=1, errors='ignore')
    assert len(set(color)) == 3, 'unique color must be size of 3'
    df.loc[(df[lfc] >= lfc_thr[0]) & (df[pv] < pv_thr[0]), 'color_add_axy'] = color[0]  # upregulated
    #df.loc[(df[lfc] >= lfc_thr[0]) & (df[pv] < pv_thr[0]), 'size_add_axy'] = dotsize[0]
    df.loc[(df[lfc] <= -lfc_thr[1]) & (df[pv] < pv_thr[1]), 'color_add_axy'] = color[2]  # downregulated
    #df.loc[(df[lfc] <= -lfc_thr[1]) & (df[pv] < pv_thr[1]), 'size_add_axy'] = dotsize[2]
    df['color_add_axy'].fillna(color[1], inplace=True)  # intermediate
    #df['size_add_axy'].fillna(dotsize[1], inplace=True)  # intermediate
    df['logpv_add_axy'] = -(np.log10(np.array(df[pv].values.astype(float))))
    # plot
    assign_values = {col: i for i, col in enumerate(color)}
    color_result_num = [assign_values[i] for i in df['color_add_axy']]

    #assert len(set(color_result_num)) == 3, \
    #    'either significant or non-significant genes are missing; try to change lfc_thr or pv_thr to include ' \
    #    'both significant and non-significant genes'
    if theme == 'dark':
        plt.style.use('dark_background')
    #plt.subplots(figsize=dim)
    if plotlegend:
        s = plt.scatter(df[lfc], df['logpv_add_axy'], c=color_result_num, cmap=ListedColormap(color), alpha=valpha,
                        s=dotsize, marker=markerdot)
        assert len(legendlabels) == 3, 'legendlabels must be size of 3'
        plt.legend(handles=s.legend_elements()[0], labels=legendlabels, loc=legendpos, bbox_to_anchor=legendanchor)
    else:
        plt.scatter(df[lfc], df['logpv_add_axy'], c=color_result_num, cmap=ListedColormap(color), alpha=valpha,
                    s=dotsize, marker=markerdot)
    if sign_line_h:
        plt.axhline(y=-np.log10(pv_thr[0]), linestyle='--', color='black', linewidth=1)
    if sign_line_v:
        plt.axvline(x=lfc_thr[0], linestyle='--', color='black', linewidth=1)
        plt.axvline(x=-lfc_thr[1], linestyle='--', color='black', linewidth=1)
    gene_plot(df, geneid, lfc, lfc_thr, pv_thr, genenames, gfont, pv, gstyle)

    if axxlabel:
        _x = axxlabel
    if axylabel:
        _y = axylabel

    plt.xlabel(_x, fontsize=axlabelfontsize, fontname=axlabelfontname)
    plt.ylabel(_y, fontsize=axlabelfontsize, fontname=axlabelfontname)
    if xlm:
        plt.xlim(left=xlm[0], right=xlm[1])
        plt.xticks(fontsize=axtickfontsize, rotation=ar, fontname=axtickfontname)

    else:
        plt.xticks(fontsize=axtickfontsize, rotation=ar, fontname=axtickfontname)
    if ylm:
        plt.ylim(bottom=ylm[0], top=ylm[1])
        plt.yticks(np.arange(ylm[0], ylm[1], ylm[2]),  fontsize=axtickfontsize, rotation=ar, fontname=axtickfontname)
    else:
        plt.yticks(fontsize=axtickfontsize, rotation=ar, fontname=axtickfontname)
    
    if ret:
        return plt.gca()
    else:
        plt.savefig(f"{path}/{figname}.png", bbox_inches='tight', dpi=400)
        plt.savefig(f"{path}/{figname}.pdf", bbox_inches='tight', dpi=400)
        plt.clf()
        plt.close()

In [None]:
path = 'E:/YandexDisk/Work/pydnameth/draft/13_fmba_cvd_dnam'

df_ages = pd.read_excel(f"{path}/data/GSE220622/gen/ages_corrected.xlsx", index_col=0)
df_ages['Features'] = df_ages.index
df_ages['Mann-Whitney'] = -np.log10(df_ages['mannwhitneyu_pval_fdr_bh'].values)
df_ages['ANCOVA'] = -np.log10(df_ages['ancova_Status[T.Control]_pval_fdr_bh'].values)

df_metrics = pd.read_excel(f"{path}/data/GSE220622/gen/metrics.xlsx", index_col=0)
df_metrics['Features'] = df_metrics.index
df_metrics['Mann-Whitney'] = -np.log10(df_metrics['mannwhitneyu_pval_fdr_bh'].values)
df_metrics['ANCOVA'] = -np.log10(df_metrics['ancova_Status[T.Control]_pval_fdr_bh'].values)

df_scores = pd.read_excel(f"{path}/data/GSE220622/gen/scores.xlsx", index_col=0)
df_scores.index = df_scores.index.astype(str)
df_scores.index = df_scores.index.str.replace(' (EpiScores)', '')
df_scores['Features'] = df_scores.index
df_scores[r"$-\log_{10}(\mathrm{Mann-Whitney\ p-value})$"] = -np.log10(df_scores['mannwhitneyu_pval_fdr_bh'].values)
df_scores[r"$-\log_{10}(\mathrm{ANCOVA\ p-value})$"] = -np.log10(df_scores['ancova_Status[T.Control]_pval_fdr_bh'].values)
episcores_nonsign = df_scores.index[(df_scores['mannwhitneyu_pval_fdr_bh'] > 0.05) & (df_scores['ancova_Status[T.Control]_pval_fdr_bh'] > 0.05)]
episcores_sign = df_scores.index.difference(episcores_nonsign)

df_dmps = pd.read_csv(f"{path}/data/GSE220622/gen/GSEA(ebayes)_group_orgn_limma.csv", index_col=0)
df_dmps["CpG"] = df_dmps.index.values
df_dmps.sort_values(["adj.P.Val"], ascending=[True], inplace=True)
df_dmps['print'] = df_dmps.apply(lambda row: f"{row['CpG'].split('_')[0]}", axis=1)
df_dmps['log_pval'] = -np.log10(df_dmps["adj.P.Val"])

colors_tests = {
    'Mann-Whitney': 'fuchsia',
    'ANCOVA': 'cyan'
}


fig = plt.figure(
        figsize=(15, 20),
        layout="constrained"
    )

axs = fig.subplot_mosaic(
    [
        ['ages', 'scores'],
        ['metrics', 'scores'],
        ['volcano', 'volcano'],
    ],
    height_ratios=[4.25, 0.75, 5],
    width_ratios=[1, 4],
    gridspec_kw={
        # "bottom": 0.14,
        # "top": 0.95,
        # "left": 0.1,
        # "right": 0.5,
        # "wspace": 0.33,
        # "hspace": 0.01,
    },
)

sns.set_theme(style='whitegrid')

df_fig = df_ages.copy()
df_fig = df_fig.melt(id_vars='Features', value_vars=['Mann-Whitney', 'ANCOVA'], var_name='Test', value_name=r"$-\log_{10}(\mathrm{p-value})$")
barplot = sns.barplot(
    data=df_fig,
    y='Features',
    x=r"$-\log_{10}(\mathrm{p-value})$",
    edgecolor='black',
    palette=colors_tests,
    hue='Test',
    hue_order=['Mann-Whitney', 'ANCOVA'],
    ax=axs['ages'],
)
axs['ages'].set_ylabel('')
axs['ages'].axvline(-np.log10(0.05), color="red", linestyle="dotted", linewidth=2.0)
axs['ages'].text(-9.5, 1, 'A', fontsize=30, fontfamily='arial')

df_fig = df_metrics.copy()
df_fig = df_fig.melt(id_vars='Features', value_vars=['Mann-Whitney', 'ANCOVA'], var_name='Test', value_name=r"$-\log_{10}(\mathrm{p-value})$")
barplot = sns.barplot(
    data=df_fig,
    y='Features',
    x=r"$-\log_{10}(\mathrm{p-value})$",
    edgecolor='black',
    palette=colors_tests,
    hue='Test',
    hue_order=['Mann-Whitney', 'ANCOVA'],
    ax=axs['metrics'],
)
axs['metrics'].set_ylabel('')
axs['metrics'].axvline(-np.log10(0.05), color="red", linestyle="dotted", linewidth=2.0)
axs['ages'].text(-9.5, 38, 'B', fontsize=30, fontfamily='arial')

scatter = sns.scatterplot(
    data=df_scores.loc[episcores_nonsign, :],
    x=r"$-\log_{10}(\mathrm{Mann-Whitney\ p-value})$",
    y=r"$-\log_{10}(\mathrm{ANCOVA\ p-value})$",
    linewidth=0.25,
    alpha=0.75,
    edgecolor="k",
    s=15,
    color='gray',
    ax=axs['scores'],
)
axs['scores'].axvline(-np.log10(0.05), color='lightgray', linestyle="-", linewidth=2.5, zorder=0)
axs['scores'].axvline(-np.log10(0.05), color='fuchsia', linestyle="dotted", linewidth=2.0, zorder=1)
axs['scores'].axhline(-np.log10(0.05), color='lightgray', linestyle="-", linewidth=2.5, zorder=0)
axs['scores'].axhline(-np.log10(0.05), color='cyan', linestyle="dotted", linewidth=2.0, zorder=1)
scatter = sns.scatterplot(
    data=df_scores.loc[episcores_sign, :],
    x=r"$-\log_{10}(\mathrm{Mann-Whitney\ p-value})$",
    y=r"$-\log_{10}(\mathrm{ANCOVA\ p-value})$",
    linewidth=0.25,
    alpha=0.75,
    edgecolor="k",
    s=25,
    color='red',
    ax=axs['scores'],
)
texts = []
for i, row in df_scores.loc[episcores_sign, :].iterrows():
    texts.append(axs['scores'].text(row[r"$-\log_{10}(\mathrm{Mann-Whitney\ p-value})$"], row[r"$-\log_{10}(\mathrm{ANCOVA\ p-value})$"], row['Features'], fontsize=9))
adjust_text(texts, arrowprops=dict(arrowstyle='-', color='black'), ax=axs['scores'])
axs['scores'].set_xlim(0, 11.8)
axs['scores'].set_ylim(0, 11)
axs['scores'].text(-0.9, 10.61, 'C', fontsize=30, fontfamily='arial')

volc = volcano(
    df=df_dmps,
    lfc='logFC',
    pv='adj.P.Val',
    pv_thr=(0.05, 0.05),
    lfc_thr=(0.0, 0.0),
    path=f"{path}",
    geneid='print',
    axtickfontsize=18,
    axlabelfontsize=18,
    gfont=18,
    gstyle=2,
    sign_line_h=True,
    sign_line_v=False,
    ar=0,
    color=('green', 'gray', 'red'),
    dim=(4, 4), 
    ret=True,
    ax=axs['volcano'],
    dotsize=15,
)
axs['volcano'].set_ylim(0, 10.5)
axs['volcano'].text(-0.185, 10.1, 'D', fontsize=30, fontfamily='arial')

fig.savefig(f"{path}/figures/4/fig.png", bbox_inches='tight', dpi=200)
fig.savefig(f"{path}/figures/4/fig.pdf", bbox_inches='tight')
plt.close(fig)