# Debugging autoreload

In [None]:
%load_ext autoreload
%autoreload 2

# Load packages

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pathlib
from tqdm import tqdm
from scipy import stats
import matplotlib


def make_rgb_transparent(rgb, bg_rgb=(1,1,1), alpha=1.0):
    return [alpha * c1 + (1 - alpha) * c2
            for (c1, c2) in zip(rgb, bg_rgb)]


def mix_colors_rgba(color_a, color_b, mode="mix", t=None, gamma=2.2):
    """
    Mix two colors color_a and color_b.

    Arguments:
        color_a:    Real-valued 4-tuple. Foreground color in "blend" mode.
        color_b:    Real-valued 4-tuple. Background color in "blend" mode.
        mode:       "mix":   Interpolate between two colors.
                    "blend": Blend two translucent colors.
        t:          Mixing threshold.
        gamma:      Parameter to control the gamma correction.

    Returns: 
        rgba:       A 4-tuple with the result color.

    To reproduce Markus Jarderot's solution:
            mix_colors_rgba(a, b, mode="blend", t=0, gamma=1.)
    To reproduce Fordi's solution:
            mix_colors_rgba(a, b, mode="mix", t=t, gamma=2.)
    To compute the RGB color of a translucent color on white background:
            mix_colors_rgba(a, [1,1,1,1], mode="blend", t=0, gamma=None)
    """
    assert(mode in ("mix", "blend"))
    assert(gamma is None or gamma>0)
    t = t if t is not None else (0.5 if mode=="mix" else 0.)
    t = max(0,min(t,1))
    color_a = np.asarray(color_a)
    color_b = np.asarray(color_b)
    if mode=="mix" and gamma in (1., None):
        r, g, b, a = (1-t)*color_a + t*color_b
    elif mode=="mix" and gamma > 0:
        r,g,b,_ = np.power((1-t)*color_a**gamma + t*color_b**gamma, 1/gamma)
        a = (1-t)*color_a[-1] + t*color_b[-1]
    elif mode=="blend":
        alpha_a = color_a[-1]*(1-t)
        a = 1 - (1-alpha_a) * (1-color_b[-1])
        s = color_b[-1]*(1-alpha_a)/a
        if gamma in (1., None):
            r, g, b, _ = (1-s)*color_a + s*color_b
        elif gamma > 0:
            r, g, b, _ = np.power((1-s)*color_a**gamma + s*color_b**gamma,
                                  1/gamma)

    return tuple(np.clip([r,g,b,a], 0, 1))


# Load original data

In [None]:
path = f"D:/YandexDisk/Work/pydnameth/datasets/GPL21145/GSEUNN"
path_save = f"{path}/special/063_dnam_statistics"
pathlib.Path(f"{path_save}").mkdir(parents=True, exist_ok=True)

df = pd.read_excel(f"{path}/pheno.xlsx", index_col=0)

# df_imm = pd.read_excel(f"{path}/data/immuno/data.xlsx", index_col=0)
# ids_intxn = df.index.intersection(df_imm.index)
# df.loc[ids_intxn, 'PMC10699032 ID'] = df_imm.loc[ids_intxn, 'PMC10699032 ID']
# df.to_excel(f"{path}/pheno111.xlsx")

ids_status = {
    'Healthy Controls': df.index[df['Status'] == 'Control'],
    'ESRD (No Transplantation)': df.index[df['Status'] == 'ESRD'],
    'ESRD (With Transplantation)': df.index[df['Status'] == 'TR'],
}
colors_status = {
    'Healthy Controls': 'forestgreen',
    'ESRD (No Transplantation)': 'crimson',
    'ESRD (With Transplantation)': 'darkorchid'
}

colors_sex = {
    'F': 'violet',
    'M': 'deepskyblue'
}

colors_region = {
    'Central': 'gold',
    'Yakutia': 'lightslategray',
    'Yakutia/Central': mix_colors_rgba(
        matplotlib.colors.ColorConverter.to_rgba('gold'),
        matplotlib.colors.ColorConverter.to_rgba('lightslategray')
    )
}

df_status = pd.DataFrame(index=['Healthy Controls', 'ESRD (No Transplantation)', 'ESRD (With Transplantation)'], columns=['Count', 'Color', 'Name'])
for group in ids_status:
    df_status.at[group, 'Count'] = len(ids_status[group])
    df_status.at[group, 'Color'] = colors_status[group]

# Status plots

In [None]:
sns.set_theme(style='whitegrid', font_scale=1.5)
fig, ax = plt.subplots(figsize=(3, 4))
barplot = sns.barplot(
    data=df_status,
    y=df_status.index,
    x='Count',
    edgecolor='black',
    palette=colors_status,
    ax=ax
)
ax.set_ylabel('')
for x in barplot.containers:
    barplot.bar_label(x, fmt="%d")
plt.savefig(f"{path_save}/status_barplot.png", bbox_inches='tight', dpi=200)
plt.savefig(f"{path_save}/status_barplot.pdf", bbox_inches='tight')
plt.close(fig)

In [None]:
hist_bins = np.linspace(0, 120, 13)
for group in ids_status:
    df_fig = df.loc[ids_status[group], ['Age', 'Sex']].copy()
    rename_dict = {
        'F': f"F ({df_fig[df_fig['Sex'] == 'F'].shape[0]})",
        'M': f"M ({df_fig[df_fig['Sex'] == 'M'].shape[0]})"
    }
    df_fig['Sex'].replace(
        rename_dict,
        inplace=True
    )
    palette = {rename_dict[x]: colors_sex[x] for x in rename_dict.keys()}
    sns.set_theme(style='whitegrid')
    fig, ax = plt.subplots(figsize=(6, 4))
    histplot = sns.histplot(
        data=df_fig,
        hue_order=list(palette.keys())[::-1],
        bins=hist_bins,
        x="Age",
        hue="Sex",
        edgecolor='black',
        palette=palette,
        multiple="stack",
        ax=ax
    )
    ax.set_facecolor(make_rgb_transparent(matplotlib.colors.ColorConverter.to_rgb(colors_status[group]), [1, 1, 1], 0.1))
    plt.savefig(f"{path_save}/{group}_histplot_sex.png", bbox_inches='tight', dpi=400)
    plt.savefig(f"{path_save}/{group}_histplot_sex.pdf", bbox_inches='tight')
    plt.close(fig)

# Controls

In [None]:
df_fig = df.loc[ids_status['Healthy Controls'], ['Age', 'Region']].copy()
rename_dict = {
    'Central': f"Central Russia ({df_fig[df_fig['Region'] == 'Central'].shape[0]})",
    'Yakutia': f"Yakutia (Sakha) ({df_fig[df_fig['Region'] == 'Yakutia'].shape[0]})",
    'Yakutia/Central': f"Central origin, lives in Yakutia ({df_fig[df_fig['Region'] == 'Yakutia/Central'].shape[0]})",
}
df_fig['Region'].replace(
    rename_dict,
    inplace=True
)
palette = {rename_dict[x]: colors_region[x] for x in rename_dict.keys()}
sns.set_theme(style='whitegrid')
fig, ax = plt.subplots(figsize=(6, 4))
histplot = sns.histplot(
    data=df_fig,
    hue_order=list(palette.keys())[::-1],
    bins=hist_bins,
    x="Age",
    hue="Region",
    edgecolor='black',
    palette=palette,
    multiple="stack",
    ax=ax
)
plt.savefig(f"{path_save}/Controls_histplot_region.png", bbox_inches='tight', dpi=400)
plt.savefig(f"{path_save}/Controls_histplot_region.pdf", bbox_inches='tight')
plt.close(fig)

In [None]:
df_fig = df.loc[ids_status['Healthy Controls'], ['Sex', 'Region']].copy()
rename_dict = {
    'Central': f"Central Russia",
    'Yakutia': f"Yakutia (Sakha)",
    'Yakutia/Central': f"Central origin, lives in Yakutia",
}
df_fig['Region'].replace(
    rename_dict,
    inplace=True
)
df_ctrl_region_sex = pd.DataFrame(index=['Central Russia', 'Yakutia (Sakha)', 'Central origin, lives in Yakutia'], columns=['F', 'M'])

for region in df_ctrl_region_sex.index:
    for sex in df_ctrl_region_sex.columns:
        df_ctrl_region_sex.at[region, sex] = df_fig[(df_fig['Region'] == region) & (df_fig['Sex'] == sex)].shape[0]
df_ctrl_region_sex.to_excel(f"{path_save}/ctrl_region_sex.xlsx")