In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
from omegaconf import OmegaConf
from glob import glob
import numpy as np
import os
import seaborn as sns
import patchworklib as pw

In [None]:
path = "D:/YandexDisk/Work/pydnameth/datasets/GPL21145/GSEUNN"
path_save = f"{path}/special/057_multiplex_vs_elisa"
path_data = f"{path}/data/immuno/models/SImAge"

# Get data and limits, Count samples

In [None]:

df = pd.read_excel(f"{path_data}/data.xlsx", index_col=0)
df_ctrl = df.loc[df['Status'] == "Control", :]
df_lims = pd.read_excel(f"{path_save}/lims.xlsx", index_col=0)

df_passed = pd.DataFrame(index=df_lims.index, columns=['Passed'])
for feat in df_lims.index:
    df_passed.at[feat, 'Passed'] = df_ctrl[(df_ctrl[feat] >= df_lims.at[feat, 'low']) & (df_ctrl[feat] <= df_lims.at[feat, 'high'])].shape[0]
df_passed.to_excel(f"{path_save}/passed.xlsx", index_label="Feature")

# Collect ML results

In [None]:
model = 'widedeep_ft_transformer_trn_val_tst'

path_runs = f"{path_save}/multiplex/models/{model}/multiruns"

files = glob(f"{path_runs}/*/*/metrics_all_best_*.xlsx")

df_tmp = pd.read_excel(files[0], index_col="metric")
head, tail = os.path.split(files[0])
cfg = OmegaConf.load(f"{head}/.hydra/overrides.yaml")
params = []
for param_pair in cfg:
    param, val = param_pair.split('=')
    params.append(param)
df_res = pd.DataFrame(index=files)

for file in files:

    head, tail = os.path.split(file)

    # Metrics
    df_metrics = pd.read_excel(file, index_col="metric")
    for metric in df_metrics.index.values:
        df_res.at[file, metric + "_val"] = df_metrics.at[metric, "val"]
        df_res.at[file, metric + "_trn"] = df_metrics.at[metric, "trn"]
        df_res.at[file, metric + "_tst_ctrl"] = df_metrics.at[metric, "tst_ctrl"]
        df_res.at[file, metric + "_tst_esrd"] = df_metrics.at[metric, "tst_esrd"]
        df_res.at[file, metric + "_trn_val"] = df_metrics.at[metric, "trn_val"]
        df_res.at[file, metric + "_val_tst_ctrl"] = df_metrics.at[metric, "val_tst_ctrl"]
        df_res.at[file, metric + "_trn_val_tst_ctrl"] = df_metrics.at[metric, "trn_val_tst_ctrl"]

    # Params
    cfg = OmegaConf.load(f"{head}/.hydra/overrides.yaml")
    for param_pair in cfg:
        param, val = param_pair.split('=')
        df_res.at[file, param] = val

df_res["train_more_val"] = False
df_res["selected"] = False
df_res.loc[df_res["mean_absolute_error_trn"] > df_res["mean_absolute_error_val"], "train_more_val"] = True

first_columns = [
    'selected',
    'train_more_val',
    'mean_absolute_error_trn',
    'mean_absolute_error_val',
    'mean_absolute_error_tst_ctrl',
    'mean_absolute_error_val_tst_ctrl',
    'mean_absolute_error_trn_val_tst_ctrl',
    'pearson_corr_coef_trn',
    'pearson_corr_coef_val',
    'pearson_corr_coef_tst_ctrl',
    'pearson_corr_coef_val_tst_ctrl',
    'pearson_corr_coef_trn_val_tst_ctrl',
    'mean_absolute_error_cv_mean_trn',
    'mean_absolute_error_cv_std_trn',
    'mean_absolute_error_cv_mean_val',
    'mean_absolute_error_cv_std_val',
    'pearson_corr_coef_cv_mean_trn',
    'pearson_corr_coef_cv_std_trn',
    'pearson_corr_coef_cv_mean_val',
    'pearson_corr_coef_cv_std_val',
]
df_res = df_res[first_columns + [col for col in df_res.columns if col not in first_columns]]
df_res.to_excel(f"{path_runs}/summary.xlsx", index=True, index_label="file")

# Plot distribution of features

In [None]:
df = pd.read_excel(f"{path_data}/data.xlsx", index_col=0)
df_ctrl = df.loc[df['Status'] == "Control", :]

feats = pd.read_excel(f"{path}/data/immuno/feats_con.xlsx", index_col=0).index.values

df_feats_perc = pd.DataFrame(index=feats, columns=['0.05', '0.95'])

n_cols = 6
n_rows = int(np.ceil(len(feats) / n_cols))

axs = {}
pw_rows = []
for r_id in range(n_rows):
    pw_cols = []
    for c_id in range(n_cols):
        rc_id = r_id * n_cols + c_id
        if rc_id < len(feats):
            feat = feats[rc_id]
            np.percentile(data_fig, [5 ,95])
            axs[feat] = pw.Brick(figsize=(2, 2))
            sns.set_theme(style='whitegrid')
            data_fig = df_ctrl[feat].values
            perc = np.percentile(data_fig, [5 ,95])
            df_feats_perc.at[feat, '0.05'] = perc[0]
            df_feats_perc.at[feat, '0.95'] = perc[1]
            sns.violinplot(
                data=data_fig,
                edgecolor='k',
                cut=0,
                saturation=0.75,
                ax=axs[feat]
            )
            axs[feat].set(xticklabels=[])
            axs[feat].set_ylabel(feat)
            axs[feat].set_xlabel("")
            pw_cols.append(axs[feat])
        else:
            empty_fig = pw.Brick(figsize=(2.75, 2))
            empty_fig.axis('off')
            pw_cols.append(empty_fig)
    pw_rows.append(pw.stack(pw_cols, operator="|"))
pw_fig = pw.stack(pw_rows, operator="/")
pw_fig.savefig(f"{path_save}/multiplex_feats.pdf")
pw_fig.savefig(f"{path_save}/multiplex_feats.png", bbox_inches='tight', dpi=200)
pw.clear()
df_feats_perc.to_excel(f"{path_save}/multiplex_feats.xlsx", index_label='Features')