# Debugging autoreload

In [None]:
%load_ext autoreload
%autoreload 2

# Load packages

In [None]:
from tqdm import tqdm
import pandas as pd
import numpy as np
import seaborn as sns
import plotly.io as pio
pio.kaleido.scope.mathjax = None
from plotly.offline import init_notebook_mode
from matplotlib import patheffects as pe
import matplotlib
init_notebook_mode(connected=False)
import matplotlib.pyplot as plt
import pathlib
import torch
import torchmetrics

# Load data

In [None]:
path = "D:/YandexDisk/Work/pydnameth/draft/10_MetaEPIClock/MetaEpiAge"

ages = [
    'Hannum',
    'PC-Hannum',
    'Horvath',
    'PC-Horvath',
    'SkinBloodAge',
    'PC-SkinBloodAge',
    'PhenoAge',
    'PC-PhenoAge',
    'GrimAge',
    'PC-GrimAge',
    'GrimAge2',
]
pace = 'DunedinPACE'

df_tissues_glob = pd.read_excel(f"{path}/count_tissues.xlsx", index_col=0)
df_states_glob = pd.read_excel(f"{path}/count_states.xlsx", index_col=0)
df_groups_glob = pd.read_excel(f"{path}/count_groups.xlsx", index_col=0)
df_gses_glob = pd.read_excel(f"{path}/count_gse.xlsx", index_col=0)
df = pd.read_excel(f"{path}/table.xlsx", index_col=0)

colors_tissues = {
    'Blood': 'red',
    'Buccal': 'skyblue',
    'Brain': 'plum',
    'Colon': 'pink',
    'Epidermis': 'burlywood',
    'Saliva': 'lavender',
    'Lung': 'gold',
    'Muscle': 'darkorange',
    'Breast': 'limegreen',
    'Liver': 'brown',
}
colors_sex = {
    'F': 'red',
    'M': 'blue'
}
colors_xkcd = matplotlib.colors.XKCD_COLORS
df_colors = pd.read_excel(f"{path}/colors.xlsx", index_col=0)
colors_global = {}
for feat in df_colors.index.values:
    colors_global[feat] = colors_xkcd[f"xkcd:{df_colors.at[feat, 'xkcd']}"]

# All tissues plots

## Metrics clustermaps

In [None]:
path_save = f"{path}/figures/metrics"
pathlib.Path(f"{path_save}").mkdir(parents=True, exist_ok=True)

tissues = df_tissues_glob.index.values

dfs_metrics = {
    'Concordance Corr Coef': pd.DataFrame(index=tissues, columns=ages, data=np.zeros(shape=(len(tissues), len(ages)))),
    'Explained Variance': pd.DataFrame(index=tissues, columns=ages, data=np.zeros(shape=(len(tissues), len(ages)))),
    'Kendall Rank Corr Coef': pd.DataFrame(index=tissues, columns=ages, data=np.zeros(shape=(len(tissues), len(ages)))),
    'Mean Absolute Error': pd.DataFrame(index=tissues, columns=ages, data=np.zeros(shape=(len(tissues), len(ages)))),
    'Mean Squared Error': pd.DataFrame(index=tissues, columns=ages, data=np.zeros(shape=(len(tissues), len(ages)))),
    'Minkowski Distance': pd.DataFrame(index=tissues, columns=ages, data=np.zeros(shape=(len(tissues), len(ages)))),
    'Pearson Corr Coef': pd.DataFrame(index=tissues, columns=ages, data=np.zeros(shape=(len(tissues), len(ages)))),
    'Relative Squared Error': pd.DataFrame(index=tissues, columns=ages, data=np.zeros(shape=(len(tissues), len(ages)))),
    'R2 Score': pd.DataFrame(index=tissues, columns=ages, data=np.zeros(shape=(len(tissues), len(ages)))),
    'Spearman Corr Coef': pd.DataFrame(index=tissues, columns=ages, data=np.zeros(shape=(len(tissues), len(ages)))),
}

for tissue in tissues:
    for age_type in tqdm(ages):
        y_real = torch.from_numpy(np.float32(df.loc[df['Tissue'] == tissue, age_type].values))
        y_pred = torch.from_numpy(np.float32(df.loc[df['Tissue'] == tissue, "Age"].values))
        
        metric = torchmetrics.regression.ConcordanceCorrCoef()
        dfs_metrics['Concordance Corr Coef'].at[tissue, age_type] = float(metric(y_pred, y_real).numpy())
        
        metric = torchmetrics.regression.ExplainedVariance()
        dfs_metrics['Explained Variance'].at[tissue, age_type] = float(metric(y_pred, y_real).numpy())
        
        metric = torchmetrics.regression.KendallRankCorrCoef()
        dfs_metrics['Kendall Rank Corr Coef'].at[tissue, age_type] = float(metric(y_pred, y_real).numpy())
        
        metric = torchmetrics.regression.MeanAbsoluteError()
        dfs_metrics['Mean Absolute Error'].at[tissue, age_type] = float(metric(y_pred, y_real).numpy())
        
        metric = torchmetrics.regression.MeanSquaredError()
        dfs_metrics['Mean Squared Error'].at[tissue, age_type] = float(metric(y_pred, y_real).numpy())
        
        metric = torchmetrics.regression.MinkowskiDistance(p=3)
        dfs_metrics['Minkowski Distance'].at[tissue, age_type] = float(metric(y_pred, y_real).numpy())
        
        metric = torchmetrics.regression.PearsonCorrCoef()
        dfs_metrics['Pearson Corr Coef'].at[tissue, age_type] = float(metric(y_pred, y_real).numpy())
        
        metric = torchmetrics.regression.RelativeSquaredError()
        dfs_metrics['Relative Squared Error'].at[tissue, age_type] = float(metric(y_pred, y_real).numpy())
        
        metric = torchmetrics.regression.R2Score()
        dfs_metrics['R2 Score'].at[tissue, age_type] = float(metric(y_pred, y_real).numpy())
        
        metric = torchmetrics.regression.SpearmanCorrCoef()
        dfs_metrics['Spearman Corr Coef'].at[tissue, age_type] = float(metric(y_pred, y_real).numpy())

In [None]:

with pd.ExcelWriter(f"{path_save}/metrics.xlsx", engine='xlsxwriter') as writer:
    for m_name, df_m in dfs_metrics.items():
        df_m.to_excel(writer, index_label="Tissue", sheet_name=m_name)

In [None]:
colormaps_metrics = {
    'Concordance Corr Coef': 'hot',
    'Explained Variance': 'hot',
    'Kendall Rank Corr Coef': 'hot',
    'Mean Absolute Error': 'plasma',
    'Mean Squared Error': 'hot',
    'Minkowski Distance': 'hot',
    'Pearson Corr Coef': 'hot',
    'Relative Squared Error': 'hot',
    'R2 Score': 'hot',
    'Spearman Corr Coef': 'hot',
}

for m_name, df_m in dfs_metrics.items():
    sns.set_theme(style='whitegrid')
    if m_name == "R2 Score":
        cmap = plt.get_cmap("autumn").copy()
        cmap.set_under('black')
        clustermap = sns.clustermap(
            df_m,
            annot=True,
            col_cluster=True,
            row_cluster=True,
            fmt=".2f",
            cmap=cmap,
            vmin=0.0,
            linewidth=0.1,
            linecolor='black',
            tree_kws=dict(linewidths=1.5),
            figsize=(10, 7),
            cbar_kws={'orientation': 'horizontal'}
        )
    else:
        clustermap = sns.clustermap(
            df_m,
            annot=True,
            col_cluster=True,
            row_cluster=True,
            fmt=".2f",
            cmap=colormaps_metrics[m_name],
            linewidth=0.1,
            linecolor='black',
            tree_kws=dict(linewidths=1.5),
            figsize=(10, 7),
            cbar_kws={'orientation': 'horizontal'}
        )
    clustermap.ax_heatmap.set_xlabel('')
    clustermap.ax_heatmap.set_ylabel('')
    clustermap.ax_heatmap.set_xticklabels(clustermap.ax_heatmap.get_xmajorticklabels(), fontsize = 18)
    clustermap.ax_heatmap.set_yticklabels(clustermap.ax_heatmap.get_ymajorticklabels(), fontsize = 18)
    clustermap.ax_cbar.tick_params()
    clustermap.ax_heatmap.set_yticklabels(clustermap.ax_heatmap.get_ymajorticklabels(), path_effects=[pe.withStroke(linewidth=0.75, foreground="black")])
    for tick_label in clustermap.ax_heatmap.get_yticklabels():
        tick_label.set_color(colors_tissues[tick_label.get_text()])
    for spine in clustermap.ax_cbar.spines.values():
        spine.set(visible=True, lw=0.25, edgecolor="black")
    x0, _y0, _w, _h = clustermap.cbar_pos
    clustermap_pos = clustermap.ax_col_dendrogram.get_position()
    clustermap.ax_cbar.set_position([clustermap_pos.x0, clustermap_pos.y1 + 0.05, clustermap_pos.width, 0.03])
    clustermap.ax_cbar.set_title(m_name)
    for spine in clustermap.ax_cbar.spines:
        clustermap.ax_cbar.spines[spine].set_linewidth(1)
    plt.savefig(f"{path_save}/{m_name}.png", bbox_inches='tight', dpi=200)
    plt.savefig(f"{path_save}/{m_name}.pdf", bbox_inches='tight')
    plt.close(clustermap.fig)