# Debugging autoreload

In [None]:
%load_ext autoreload
%autoreload 2

# Load packages

In [1]:
from pytorch_tabular.utils import load_covertype_dataset
from rich.pretty import pprint
from sklearn.model_selection import BaseCrossValidator, ParameterGrid, ParameterSampler
import torch
import pickle
import shutil
import shap
from sklearn.model_selection import RepeatedStratifiedKFold
from glob import glob
import ast
import matplotlib.pyplot as plt
import seaborn as sns
import copy
from sklearn.model_selection import train_test_split
import numpy as np
from pytorch_tabular.utils import make_mixed_dataset, print_metrics
from pytorch_tabular import available_models
from pytorch_tabular import TabularModel
from pytorch_tabular.models import CategoryEmbeddingModelConfig, GANDALFConfig, TabNetModelConfig, FTTransformerConfig, DANetConfig
from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig
from pytorch_tabular.models.common.heads import LinearHeadConfig
from pytorch_tabular.tabular_model_tuner import TabularModelTuner
from torchmetrics.functional.regression import mean_absolute_error, pearson_corrcoef
from pytorch_tabular import MODEL_SWEEP_PRESETS
import pandas as pd
from pytorch_tabular import model_sweep
from src.pt.model_sweep import model_sweep_custom
import warnings
from src.utils.configs import read_parse_config
from src.utils.hash import dict_hash
from src.pt.hyper_opt import train_hyper_opt
import pathlib
from tqdm import tqdm
import distinctipy
import matplotlib.patheffects as pe
import matplotlib.colors as mcolors
from statannotations.Annotator import Annotator
from scipy.stats import mannwhitneyu
from regression_bias_corrector import LinearBiasCorrector
import optuna
from sklearn.preprocessing import LabelEncoder
from plottable import ColumnDefinition, Table
from plottable.plots import bar
from plottable.cmap import normed_cmap, centered_cmap
import matplotlib.lines as mlines
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import scipy.stats
from functools import reduce
from sklearn.impute import KNNImputer


def make_rgb_transparent(rgb, bg_rgb, alpha):
    return [alpha * c1 + (1 - alpha) * c2 for (c1, c2) in zip(rgb, bg_rgb)]

# Load data and models for subsets of features

In [None]:
path = f"E:/YandexDisk/Work/bbd/atlas"

feat_trgt = 'Возраст'

data_suffix = '_v3'
data = pd.read_excel(f"{path}/data{data_suffix}.xlsx", index_col=0)

components = {
    'InBody': {
            'name': 'Биоимпеданс (InBody)',
            'path': f"{path}/subset_InBody-mRMR_no-sex",
            'path_model': f"{path}/subset_InBody-mRMR_no-sex/models/DANet/2/432", 
            'color': 'dodgerblue',
            'bkg_count': 50
    },
    'CompleteBloodCount': {
            'name': 'Общий анализ крови',
            'path': f"{path}/subset_CBC_no-sex",
            'path_model': f"{path}/subset_CBC_no-sex/models/DANet/2/67", 
            'color': 'crimson',
            'bkg_count': 300
    },
    'BloodBiochemical': {
            'name': 'Биохимия крови',
            'path': f"{path}/subset_BloodBiochemical_no-sex",
            'path_model': f"{path}/subset_BloodBiochemical_no-sex/models/DANet/1/630", 
            'color': 'cyan',
            'bkg_count': 150,
    },
    "LipidProfile": {
        "name": "Липидный профиль",
        "path": f"{path}/subset_LipidProfile_no-sex",
        "path_model": f"{path}/subset_LipidProfile_no-sex/models/DANet/1/798",
        "color": "gold",
        'bkg_count': 100,
    },
    "CoagulationTest": {
        "name": "Коагулограмма",
        "path": f"{path}/subset_CoagulationTest_no-sex",
        "path_model": f"{path}/subset_CoagulationTest_no-sex/models/DANet/1/868",
        "color": "olive",
        'bkg_count': 100,
    },
    "HormoneProfile": {
        "name": "Гормональный профиль",
        "path": f"{path}/subset_HormoneProfile_no-sex",
        "path_model": f"{path}/subset_HormoneProfile_no-sex/models/DANet/1/425",
        "color": "chocolate",
        'bkg_count': 50,
    },
    "ProstateSpecificAntigenTest": {
        "name": "Простатический специфический антиген",
        "path": f"{path}/subset_ProstateSpecificAntigenTest_no-sex",
        "path_model": f"{path}/subset_ProstateSpecificAntigenTest_no-sex/models/DANet/1/529",
        "color": "lawngreen",
        'bkg_count': 50,
    },
    "RheumatologyScreening": {
        "name": "Ревматологический тест",
        "path": f"{path}/subset_RheumatologyScreening_no-sex",
        "path_model": f"{path}/subset_RheumatologyScreening_no-sex/models/DANet/1/48",
        "color": "gray",
        'bkg_count': 50,
    },
    "BloodPressure": {
        "name": "Кровяное Давление",
        "path": f"{path}/subset_BloodPressure_no-sex",
        "path_model": f"{path}/subset_BloodPressure_no-sex/models/DANet/1/75",
        "color": "orchid",
        'bkg_count': 300,
    },
}

feats_all = []
for comp in components:
    components[comp]['data'] = pd.read_excel(f"{components[comp]['path']}/data.xlsx", index_col=0)
    components[comp]['feats'] = pd.read_excel(f"{components[comp]['path']}/feats.xlsx", index_col=0)
    components[comp]['results'] = pd.read_excel(f"{components[comp]['path_model']}/df.xlsx", index_col=0)
    components[comp]['metrics'] = pd.read_excel(f"{components[comp]['path_model']}/metrics.xlsx", index_col=0)
    components[comp]['shap'] = pd.read_excel(f"{components[comp]['path_model']}/explanation.xlsx", index_col=0)
    components[comp]['model'] = TabularModel.load_model(f"{components[comp]['path_model']}")
    components[comp]['corrector'] = LinearBiasCorrector()
    comp_results = components[comp]['results']
    components[comp]['corrector'].fit(comp_results.loc[comp_results['Group'] == 'Train', feat_trgt].values, comp_results.loc[comp_results['Group'] == 'Train', 'Prediction'].values)
    res_cols = ['Group', 'Prediction', 'Error', 'Prediction Unbiased', 'Error Unbiased']
    components[comp]['data'].loc[components[comp]['data'].index, res_cols] = comp_results.loc[components[comp]['data'].index, res_cols]
    components[comp]['data_shap'] = components[comp]['data'].copy()
    
    feats = components[comp]['feats'].index.values
    feats = feats[feats != feat_trgt]
    feats_all += list(feats)
    feats_all += [f"Предсказание {components[comp]['name']}", f"Возрастная Аккселерация {components[comp]['name']}"]
    
    def predict_func(X):
        X_df = pd.DataFrame(data=X, columns=feats)
        y = components[comp]['model'].predict(X_df)[f'{feat_trgt}_prediction'].values
        y = components[comp]['corrector'].predict(y)
        return y
    
    components[comp]['predict_func'] = predict_func

In [None]:
for comp in components:
    print(f"{comp}: {components[comp]['data'].shape[0]}")

# Prediction for samples

In [None]:
nan_part = 0.2

samples = pd.DataFrame(columns=feats_all)

for sample_id in (pbar := tqdm(data.index.values)):
    pbar.set_description(f"Sample {sample_id}")
    
    for comp in components:
        feats_w_trgt = components[comp]['feats'].index.values
        feats = feats_w_trgt[feats_w_trgt != feat_trgt]
        n_feats = len(feats)
        n_nans = data.loc[sample_id, feats].isna().sum()
        if n_nans / n_feats < nan_part:
            data_sample = data.loc[[sample_id], feats_w_trgt]
            if n_nans != n_feats:
                data_bkcg = components[comp]['data'].loc[:, feats_w_trgt]
                data_imp = pd.concat([data_sample, data_bkcg], axis=0, ignore_index=True)
                imputer = KNNImputer(n_neighbors=5)
                data_sample.loc[sample_id, feats_w_trgt] = imputer.fit_transform(data_imp.loc[:, feats_w_trgt].values)[0, :]
            pred = components[comp]['model'].predict(data_sample)[f'{feat_trgt}_prediction'].values
            pred = components[comp]['corrector'].predict(pred)
            data_sample.at[sample_id, f"Предсказание {components[comp]['name']}"] = pred
            data_sample.at[sample_id, f"Возрастная Аккселерация {components[comp]['name']}"] = pred - data_sample.at[sample_id, feat_trgt]
            samples.loc[sample_id, data_sample.columns] = data_sample.loc[sample_id, data_sample.columns]

samples.to_excel(f"{path}/AtlasAge/data.xlsx")

## Load calculated data

In [11]:
samples = pd.read_excel(f"{path}/AtlasAge/data.xlsx", index_col=0)

# Local explainability

In [None]:
trgt_id = 144



for comp in components:
    data = components[comp]['data'] = pd.read_excel(f"{components[comp]['path']}/data.xlsx", index_col=0)
    components[comp]['feats'] = pd.read_excel(f"{components[comp]['path']}/feats.xlsx", index_col=0)
    components[comp]['results'] = pd.read_excel(f"{components[comp]['path_model']}/df.xlsx", index_col=0)
    components[comp]['metrics'] = pd.read_excel(f"{components[comp]['path_model']}/metrics.xlsx", index_col=0)
    components[comp]['shap'] = pd.read_excel(f"{components[comp]['path_model']}/explanation.xlsx", index_col=0)
    components[comp]['model'] = TabularModel.load_model(f"{components[comp]['path_model']}")
    components[comp]['corrector'] = LinearBiasCorrector()
    comp_results = components[comp]['results']
    components[comp]['corrector'].fit(comp_results.loc[comp_results['Group'] == 'Train', feat_trgt].values, comp_results.loc[comp_results['Group'] == 'Train', 'Prediction'].values)
    res_cols = ['Group', 'Prediction', 'Error', 'Prediction Unbiased', 'Error Unbiased']
    components[comp]['data'].loc[components[comp]['data'].index, res_cols] = comp_results.loc[components[comp]['data'].index, res_cols]
    components[comp]['data_shap'] = components[comp]['data'].copy()
    
    feats = components[comp]['feats'].index.values
    feats = feats[feats != feat_trgt]
    feats_all += list(feats)
    feats_all += [f"Предсказание {components[comp]['name']}", f"Возрастная Аккселерация {components[comp]['name']}"]
    
    def predict_func(X):
        X_df = pd.DataFrame(data=X, columns=feats)
        y = components[comp]['model'].predict(X_df)[f'{feat_trgt}_prediction'].values
        y = components[comp]['corrector'].predict(y)
        return y
    
    components[comp]['predict_func'] = predict_func



data_all = []
feats_all = []
local_exlp = {}

datasets_trgt = ['inbody_mrmr', 'lab']

for ds in datasets_trgt:
    ds_feats = datasets[ds]['feats']
    feats = ds_feats.index.values
    feats = feats[feats != 'Возраст']
    feats_cnt = ds_feats.index[ds_feats['Type'] == 'continuous'].to_list()
    feats_cnt = list(feats_cnt[feats_cnt != 'Возраст'])
    feats_cat = ds_feats.index[ds_feats['Type'] != 'continuous'].to_list()

    ds_data = datasets[ds]['data']
    ds_results = datasets[ds]['results']
    ds_metrics = datasets[ds]['metrics']
    ds_shap = datasets[ds]['shap']
    ds_model = datasets[ds]['model']
    ds_corrector = datasets[ds]['corrector']
    ds_color = datasets[ds]['color']
    ds_data_shap = datasets[ds]['data_shap']
    ds_predict_func = datasets[ds]['predict_func']

    trgt_age = ds_data_shap.at[trgt_id, feat_trgt]
    trgt_pred = ds_data_shap.at[trgt_id, 'Prediction Unbiased']
    trgt_aa = trgt_pred - trgt_age
    # print(trgt_age)
    # print(trgt_pred)
    # print(trgt_aa * ds_metrics.at['Test', 'pearson_corrcoef_unbiased'] / len(datasets_trgt))

    n_closest = datasets[ds]['bkg_count']
    data_closest = ds_data_shap.iloc[(ds_data_shap['Prediction Unbiased'] - trgt_age).abs().argsort()[:n_closest]]

    explainer = shap.SamplingExplainer(ds_predict_func, data_closest.loc[:, feats].values)
    # print(explainer.expected_value)
    shap_values = explainer.shap_values(ds_data_shap.loc[[trgt_id], feats].values)[0]
    shap_values = shap_values * (trgt_pred - trgt_age) / (trgt_pred - explainer.expected_value)
    shap_values *= ds_metrics.at['Test', 'pearson_corrcoef_unbiased'] / len(datasets_trgt)
    # print(sum(shap_values))
    
    df_shap = pd.DataFrame(index=feats, data=shap_values, columns=[trgt_id])
    df_shap.sort_values(by=trgt_id, key=abs, inplace=True)
    # df_shap['cumsum'] = df_shap[trgt_id].cumsum()

    df_less_more = pd.DataFrame(index=df_shap.index, columns=['Меньше', 'Больше'])
    df_cat_part = {}
    for f in df_less_more.index:
        if ds_feats.at[f, 'Type'] != 'categorical':
            df_less_more.at[f, 'Меньше'] = round(scipy.stats.percentileofscore(data_closest.loc[:, f].values, ds_data_shap.at[trgt_id, f]))
            df_less_more.at[f, 'Больше'] = 100.0 - df_less_more.at[f, 'Меньше']
        else:
            df_less_more.at[f, 'Меньше'] = np.nan
            df_less_more.at[f, 'Больше'] = np.nan
            
            f_value_counts = data_closest.loc[:, 'Пол'].value_counts()
            f_value_counts_rename = {x: datasets[ds]['cat_encoders']['Пол'].inverse_transform([x])[0] for x in f_value_counts.index.astype(int).values}
            f_value_counts.rename(index=f_value_counts_rename, inplace=True)
            f_value_counts = np.rint(f_value_counts / f_value_counts.sum() * 100)
            
            df_cat_part[f] = {
                'distribution': f_value_counts.astype(int)
            }
            if f == 'Пол':
                df_cat_part[f]['palette'] = {'жен': 'crimson', 'муж': 'dodgerblue'}  
        
    local_exlp[ds] = {
        'df_shap': df_shap,
        'df_less_more': df_less_more,
        'df_cat_part': df_cat_part,
        'age_acceleration': (trgt_pred - trgt_age) * ds_metrics.at['Test', 'pearson_corrcoef_unbiased'] / len(datasets_trgt),
    }
    
    data_all.append(ds_data.loc[[trgt_id], :])
    feats_all.append(ds_feats.loc[feats, :])

data_all = reduce(lambda left, right: pd.merge(left, right, left_index=True, right_index=True, suffixes=('', '_y')), data_all)
feats_all = pd.concat(feats_all)
feats_all = feats_all[~feats_all.index.duplicated(keep='first')]

feat_cmn = 'Пол'

df_shap_cmn = pd.DataFrame(index=[feat_cmn], columns=[trgt_id], data=np.zeros(1))
dfs_shap = [df_shap_cmn]
df_less_more_cmn = pd.DataFrame(index=[feat_cmn], columns=[trgt_id], data=np.nan)
dfs_less_more_cmn = [df_less_more_cmn]
df_cat_part_cmn = {
    'distribution': pd.Series(index=['жен', 'муж'], data=[0, 0]),
    'palette': {'жен': 'crimson', 'муж': 'dodgerblue'}
}
for ds in ['inbody_mrmr', 'lab']:
    print(local_exlp[ds]['age_acceleration'])
    df_shap_cmn.at[feat_cmn, trgt_id] += local_exlp[ds]['df_shap'].at[feat_cmn, trgt_id]
    df_cat_part_cmn['distribution'] += df_cat_part['Пол']['distribution'] / len(datasets_trgt)
    dfs_shap.append(local_exlp[ds]['df_shap'].drop([feat_cmn]))
    dfs_less_more_cmn.append(local_exlp[ds]['df_less_more'].drop([feat_cmn]))
    
df_shap_union = pd.concat(dfs_shap)
df_less_more_union = pd.concat(dfs_less_more_cmn)
df_shap_union.sort_values(by=trgt_id, key=abs, inplace=True)
df_shap_union['cumsum'] = df_shap_union[trgt_id].cumsum()
df_less_more_union = df_less_more_union.loc[df_shap_union.index, :]
trgt_aa = df_shap_union[trgt_id].sum()
trgt_age = data_all.at[trgt_id, feat_trgt]

aa_1 = local_exlp['lab']['age_acceleration']
aa_2 = local_exlp['inbody_mrmr']['age_acceleration']

fig = make_subplots(rows=1, cols=2, shared_yaxes=True, shared_xaxes=False, column_widths=[2.5, 1], horizontal_spacing=0.15, subplot_titles=['', "Распределение признаков у людей<br>в данном возрастном диапазоне"])
fig.add_trace(
    go.Waterfall(
        hovertext=["Хронологический возраст", "Возрастная акселерация (Анализ Крови)", "Возрастная акселерация (Биоимпеданс)", "Биологический возраст"],
        orientation="h",
        measure=['absolute', 'relative', 'relative', 'absolute'],
        y=[-1.5, df_shap_union.shape[0] + 0.5, df_shap_union.shape[0] + 1.5, df_shap_union.shape[0] + 2.5],
        x=[trgt_age, aa_1, aa_2, trgt_age+trgt_aa],
        base=0,
        text=[f"{trgt_age:0.2f}", f"+{aa_1:0.2f}" if aa_1 > 0 else f"{aa_1:0.2f}", f"+{aa_2:0.2f}" if aa_2 > 0 else f"{aa_2:0.2f}", f"{trgt_age+trgt_aa:0.2f}"],
        textposition = "auto",
        decreasing = {"marker":{"color": "deepskyblue", "line": {"color": "black", "width": 1}}},
        increasing = {"marker":{"color": "crimson", "line": {"color": "black", "width": 1}}},
        totals= {"marker":{"color": "dimgray", "line": {"color": "black", "width": 1}}},
        connector={
            "mode": "between",
            "line": {"width": 1, "color": "black", "dash": "dot"},
        },
    ),
    row=1,
    col=1,
)
fig.add_trace(
    go.Waterfall(
        hovertext=df_shap_union.index.values,
        orientation="h",
        measure=["relative"] * len(feats),
        y=list(range(df_shap_union.shape[0])),
        x=df_shap_union[trgt_id].values,
        base=trgt_age,
        text=[f"+{x:0.2f}" if x > 0 else f"{x:0.2f}" for x in df_shap_union[trgt_id].values],
        textposition = "auto",
        decreasing = {"marker":{"color": "lightblue", "line": {"color": "black", "width": 1}}},
        increasing = {"marker":{"color": "lightcoral", "line": {"color": "black", "width": 1}}},
        connector={
            "mode": "between",
            "line": {"width": 1, "color": "black", "dash": "solid"},
        },
    ),
    row=1,
    col=1,
)
fig.update_traces(row=1, col=1, showlegend=False)
fig.update_yaxes(
    row=1,
    col=1,
    automargin=True,
    tickmode="array",
    tickvals=[-1.5] + list(range(df_shap_union.shape[0])) + [df_shap_union.shape[0] + 0.5, df_shap_union.shape[0] + 1.5, df_shap_union.shape[0] + 2.5],
    ticktext=["Хронологический возраст"] + [f"{x} = {data_all.at[trgt_id, x]:0.2f}" if feats_all.at[x, 'Type'] != 'categorical' else f"{x} = {data_all.at[trgt_id, x]}" for x in df_shap_union.index] + ["Возрастная акселерация (Анализ Крови)", "Возрастная акселерация (Биоимпеданс)", "Биологический возраст"],
    tickfont=dict(size=18),
)
fig.update_xaxes(
    row=1,
    col=1,
    automargin=True,
    title='Возраст',
    titlefont=dict(size=25),
    range=[
        trgt_age + df_shap_union['cumsum'].min() * 1.2 - 2,
        trgt_age + df_shap_union['cumsum'].max() * 1.2 + 2
    ],
)

fig.add_trace(
    go.Bar(
        hovertext=df_shap_union.index.values,
        orientation="h",
        name='Меньше',
        x=df_less_more_union.loc[df_shap_union.index.values, 'Меньше'],
        y=list(range(df_shap_union.shape[0])),
        marker=dict(color='steelblue', line=dict(color="black", width=1)),
        text=df_less_more_union.loc[df_shap_union.index.values, 'Меньше'],
        textposition='auto'
    ),
    row=1,
    col=2,
)
fig.add_trace(
    go.Bar(
        hovertext=df_shap_union.index.values,
        orientation="h",
        name='Больше',
        x=df_less_more_union.loc[df_shap_union.index.values, 'Больше'],
        y=list(range(df_shap_union.shape[0])),
        marker=dict(color='violet', line=dict(color="black", width=1)),
        text=df_less_more_union.loc[df_shap_union.index.values, 'Больше'],
        textposition='auto',
    ),
    row=1,
    col=2
)

for f_val in df_cat_part_cmn['distribution'].index:
    fig.add_trace(
        go.Bar(
            hovertext=[feat_cmn],
            orientation="h",
            name=f_val,
            x=[df_cat_part_cmn['distribution'][f_val]],
            y=[df_shap_union.index.get_loc(feat_cmn)],
            marker=dict(color=df_cat_part_cmn['palette'][f_val], line=dict(color="black", width=1)),
            text=[f_val],
            textposition='auto',
            showlegend=False
        ),
        row=1,
        col=2
    )

fig.update_xaxes(
    row=1,
    col=2,
    automargin=True,
    showgrid=False,
    showline=False,
    zeroline=False,
    showticklabels=False,
)
fig.update_yaxes(
    row=1,
    col=2,
    automargin=True,
    showgrid=False,
    showline=False,
    zeroline=False,
    showticklabels=False,
)
fig.update_layout(barmode="stack")
fig.update_layout(
    legend=dict(
        title=dict(side="top"),
        orientation="h",
        yanchor="bottom",
        y=0.98,
        xanchor="center",
        x=0.87
    ),
)
fig.update_layout(
    title=f"Возрастная акселерация для {trgt_id}",
    titlefont=dict(size=25),
    template="none",
    width=1600,
    height=1300,
    margin=go.layout.Margin(l=120, r=100, b=50, t=50, pad=0),
)
fig.show()
fig.write_image(f"{path}/complex_model/shap_local/{trgt_id}.pdf", format="pdf")
fig.write_image(f"{path}/complex_model/shap_local/{trgt_id}.png", scale=2)
df_shap_union.to_excel(f"{path}/complex_model/shap_local/{trgt_id}.xlsx")

# Local explainability checking

## Inbody + Blood model

In [None]:
ds = 'inbody_mrmr_lab'

trgt_id = 18698 

ds_feats = datasets[ds]['feats']
feats = ds_feats.index.values
feats = feats[feats != 'Возраст']
feats_cnt = ds_feats.index[ds_feats['Type'] == 'continuous'].to_list()
feats_cnt = list(feats_cnt[feats_cnt != 'Возраст'])
feats_cat = ds_feats.index[ds_feats['Type'] != 'continuous'].to_list()

ds_data = datasets[ds]['data']
ds_results = datasets[ds]['results']
ds_metrics = datasets[ds]['metrics']
ds_shap = datasets[ds]['shap']
ds_model = datasets[ds]['model']
ds_corrector = datasets[ds]['corrector']
ds_color = datasets[ds]['color']
ds_data_shap = datasets[ds]['data_shap']
ds_predict_func = datasets[ds]['predict_func']

trgt_age = ds_data_shap.at[trgt_id, feat_trgt]
trgt_pred = ds_data_shap.at[trgt_id, 'Prediction Unbiased']
trgt_aa = trgt_pred - trgt_age
print(trgt_age)
print(trgt_pred)

n_closest = datasets[ds]['bkg_count']
data_closest = ds_data_shap.iloc[(ds_data_shap['Prediction Unbiased'] - trgt_age).abs().argsort()[:n_closest]]

explainer = shap.SamplingExplainer(ds_predict_func, data_closest.loc[:, feats].values)
print(explainer.expected_value)
shap_values = explainer.shap_values(ds_data_shap.loc[[trgt_id], feats].values)[0]
shap_values = shap_values * (trgt_pred - trgt_age) / (trgt_pred - explainer.expected_value)

df_shap = pd.DataFrame(index=feats, data=shap_values, columns=[trgt_id])
df_shap.sort_values(by=trgt_id, key=abs, inplace=True)
df_shap['cumsum'] = df_shap[trgt_id].cumsum()

df_less_more = pd.DataFrame(index=df_shap.index, columns=['Less', 'More'])
df_cat_part = {}
for f_id, f in enumerate(df_less_more.index):
    if ds_feats.at[f, 'Type'] != 'categorical':
        df_less_more.at[f, 'Меньше'] = round(scipy.stats.percentileofscore(data_closest.loc[:, f].values, ds_data_shap.at[trgt_id, f]))
        df_less_more.at[f, 'Больше'] = 100.0 - df_less_more.at[f, 'Меньше']
    else:
        df_less_more.at[f, 'Меньше'] = np.nan
        df_less_more.at[f, 'Больше'] = np.nan
        
        f_value_counts = data_closest.loc[:, 'Пол'].value_counts()
        f_value_counts_rename = {x: datasets[ds]['cat_encoders']['Пол'].inverse_transform([x])[0] for x in f_value_counts.index.astype(int).values}
        f_value_counts.rename(index=f_value_counts_rename, inplace=True)
        f_value_counts = np.rint(f_value_counts / f_value_counts.sum() * 100)
        
        df_cat_part[f_id] = {
            'name': f,
            'distribution': f_value_counts.astype(int)
        }
        if f == 'Пол':
            df_cat_part[f_id]['palette'] = {'жен': 'crimson', 'муж': 'dodgerblue'}

fig = make_subplots(rows=1, cols=2, shared_yaxes=True, shared_xaxes=False, column_widths=[2.5, 1], horizontal_spacing=0.05, subplot_titles=['', "Распределение признаков у людей<br>в данном возрастном диапазоне"])
fig.add_trace(
    go.Waterfall(
        hovertext=["Хронологический возраст", "Возрастная акселерация", "Биологический возраст"],
        orientation="h",
        measure=['absolute', 'relative', 'absolute'],
        y=[-1.5, df_shap.shape[0] + 0.5, df_shap.shape[0] + 1.5],
        x=[trgt_age, trgt_aa, trgt_age+trgt_aa],
        base=0,
        text=[f"{trgt_age:0.2f}", f"+{trgt_aa:0.2f}" if trgt_aa > 0 else f"{trgt_aa:0.2f}", f"{trgt_age+trgt_aa:0.2f}"],
        textposition = "auto",
        decreasing = {"marker":{"color": "deepskyblue", "line": {"color": "black", "width": 1}}},
        increasing = {"marker":{"color": "crimson", "line": {"color": "black", "width": 1}}},
        totals= {"marker":{"color": "dimgray", "line": {"color": "black", "width": 1}}},
        connector={
            "mode": "between",
            "line": {"width": 1, "color": "black", "dash": "dot"},
        },
    ),
    row=1,
    col=1,
)
fig.add_trace(
    go.Waterfall(
        hovertext=df_shap.index.values,
        orientation="h",
        measure=["relative"] * len(feats),
        y=list(range(df_shap.shape[0])),
        x=df_shap[trgt_id].values,
        base=trgt_age,
        text=[f"+{x:0.2f}" if x > 0 else f"{x:0.2f}" for x in df_shap[trgt_id].values],
        textposition = "auto",
        decreasing = {"marker":{"color": "lightblue", "line": {"color": "black", "width": 1}}},
        increasing = {"marker":{"color": "lightcoral", "line": {"color": "black", "width": 1}}},
        connector={
            "mode": "between",
            "line": {"width": 1, "color": "black", "dash": "solid"},
        },
    ),
    row=1,
    col=1,
)
fig.update_traces(row=1, col=1, showlegend=False)
fig.update_yaxes(
    row=1,
    col=1,
    automargin=True,
    tickmode="array",
    tickvals=[-1.5] + list(range(df_shap.shape[0])) + [df_shap.shape[0] + 0.5, df_shap.shape[0] + 1.5],
    ticktext=["Хронологический возраст"] + [f"{x} = {ds_data.at[trgt_id, x]:0.2f}" if ds_feats.at[x, 'Type'] != 'categorical' else f"{x} = {ds_data.at[trgt_id, x]}" for x in df_shap.index] + ["Возрастная акселерация", "Биологический возраст"],
    tickfont=dict(size=18),
)
fig.update_xaxes(
    row=1,
    col=1,
    automargin=True,
    title='Возраст',
    titlefont=dict(size=25),
    range=[
        trgt_age + df_shap['cumsum'].min() * 1.2 - 2,
        trgt_age + df_shap['cumsum'].max() * 1.2 + 2
    ],
)

fig.add_trace(
    go.Bar(
        hovertext=df_shap.index.values,
        orientation="h",
        name='Меньше',
        x=df_less_more.loc[df_shap.index.values, 'Меньше'],
        y=list(range(df_shap.shape[0])),
        marker=dict(color='steelblue', line=dict(color="black", width=1)),
        text=df_less_more.loc[df_shap.index.values, 'Меньше'],
        textposition='auto'
    ),
    row=1,
    col=2,
)
fig.add_trace(
    go.Bar(
        hovertext=df_shap.index.values,
        orientation="h",
        name='Больше',
        x=df_less_more.loc[df_shap.index.values, 'Больше'],
        y=list(range(df_shap.shape[0])),
        marker=dict(color='violet', line=dict(color="black", width=1)),
        text=df_less_more.loc[df_shap.index.values, 'Больше'],
        textposition='auto',
    ),
    row=1,
    col=2
)

for f_cat_id, f_cat_dict in df_cat_part.items():
    for f_val in f_cat_dict['distribution'].index:
        fig.add_trace(
            go.Bar(
                hovertext=[f_cat_dict['name']],
                orientation="h",
                name=f_val,
                x=[f_cat_dict['distribution'][f_val]],
                y=[f_cat_id],
                marker=dict(color=f_cat_dict['palette'][f_val], line=dict(color="black", width=1)),
                text=[f_val],
                textposition='auto',
                showlegend=False
            ),
            row=1,
            col=2
        )

fig.update_xaxes(
    row=1,
    col=2,
    automargin=True,
    showgrid=False,
    showline=False,
    zeroline=False,
    showticklabels=False,
)
fig.update_yaxes(
    row=1,
    col=2,
    automargin=True,
    showgrid=False,
    showline=False,
    zeroline=False,
    showticklabels=False,
)
fig.update_layout(barmode="stack")
fig.update_layout(
    legend=dict(
        title=dict(side="top"),
        orientation="h",
        yanchor="bottom",
        y=0.98,
        xanchor="center",
        x=0.87
    ),
)
fig.update_layout(
    title=f"Возрастная акселерация для {trgt_id}",
    titlefont=dict(size=25),
    template="none",
    width=1600,
    height=1300,
    margin=go.layout.Margin(l=120, r=80, b=50, t=50, pad=0),
)
fig.show()
fig.write_image(f"{datasets[ds]['path']}/shap_local/{trgt_id}.pdf", format="pdf")
fig.write_image(f"{datasets[ds]['path']}/shap_local/{trgt_id}.png", scale=2)
df_shap.to_excel(f"{datasets[ds]['path']}/shap_local/{trgt_id}.xlsx")

## Inbody mRMR

In [None]:
ds = 'inbody_mrmr'

trgt_id = 19389 # 1159

ds_feats = datasets[ds]['feats']
feats = ds_feats.index.values
feats = feats[feats != 'Возраст']
feats_cnt = ds_feats.index[ds_feats['Type'] == 'continuous'].to_list()
feats_cnt = list(feats_cnt[feats_cnt != 'Возраст'])
feats_cat = ds_feats.index[ds_feats['Type'] != 'continuous'].to_list()

ds_data = datasets[ds]['data']
ds_results = datasets[ds]['results']
ds_metrics = datasets[ds]['metrics']
ds_shap = datasets[ds]['shap']
ds_model = datasets[ds]['model']
ds_corrector = datasets[ds]['corrector']
ds_color = datasets[ds]['color']
ds_data_shap = datasets[ds]['data_shap']
ds_predict_func = datasets[ds]['predict_func']

trgt_age = ds_data_shap.at[trgt_id, feat_trgt]
trgt_pred = ds_data_shap.at[trgt_id, 'Prediction Unbiased']
trgt_aa = trgt_pred - trgt_age
print(trgt_age)
print(trgt_pred)

n_closest = datasets[ds]['bkg_count']
data_closest = ds_data_shap.iloc[(ds_data_shap['Prediction Unbiased'] - trgt_age).abs().argsort()[:n_closest]]

explainer = shap.SamplingExplainer(ds_predict_func, data_closest.loc[:, feats].values)
print(explainer.expected_value)
shap_values = explainer.shap_values(ds_data_shap.loc[[trgt_id], feats].values)[0]
shap_values = shap_values * (trgt_pred - trgt_age) / (trgt_pred - explainer.expected_value)

df_shap = pd.DataFrame(index=feats, data=shap_values, columns=[trgt_id])
df_shap.sort_values(by=trgt_id, key=abs, inplace=True)
df_shap['cumsum'] = df_shap[trgt_id].cumsum()

df_less_more = pd.DataFrame(index=df_shap.index, columns=['Less', 'More'])
df_cat_part = {}
for f_id, f in enumerate(df_less_more.index):
    if ds_feats.at[f, 'Type'] != 'categorical':
        df_less_more.at[f, 'Меньше'] = round(scipy.stats.percentileofscore(data_closest.loc[:, f].values, ds_data_shap.at[trgt_id, f]))
        df_less_more.at[f, 'Больше'] = 100.0 - df_less_more.at[f, 'Меньше']
    else:
        df_less_more.at[f, 'Меньше'] = np.nan
        df_less_more.at[f, 'Больше'] = np.nan
        
        f_value_counts = data_closest.loc[:, 'Пол'].value_counts()
        f_value_counts_rename = {x: datasets[ds]['cat_encoders']['Пол'].inverse_transform([x])[0] for x in f_value_counts.index.astype(int).values}
        f_value_counts.rename(index=f_value_counts_rename, inplace=True)
        f_value_counts = np.rint(f_value_counts / f_value_counts.sum() * 100)
        
        df_cat_part[f_id] = {
            'name': f,
            'distribution': f_value_counts.astype(int)
        }
        if f == 'Пол':
            df_cat_part[f_id]['palette'] = {'жен': 'crimson', 'муж': 'dodgerblue'}

fig = make_subplots(rows=1, cols=2, shared_yaxes=True, shared_xaxes=False, column_widths=[2.5, 1], horizontal_spacing=0.05, subplot_titles=['', "Распределение признаков у людей<br>в данном возрастном диапазоне"])
fig.add_trace(
    go.Waterfall(
        hovertext=["Хронологический возраст", "Возрастная акселерация", "Биологический возраст"],
        orientation="h",
        measure=['absolute', 'relative', 'absolute'],
        y=[-1.5, df_shap.shape[0] + 0.5, df_shap.shape[0] + 1.5],
        x=[trgt_age, trgt_aa, trgt_age+trgt_aa],
        base=0,
        text=[f"{trgt_age:0.2f}", f"+{trgt_aa:0.2f}" if trgt_aa > 0 else f"{trgt_aa:0.2f}", f"{trgt_age+trgt_aa:0.2f}"],
        textposition = "auto",
        decreasing = {"marker":{"color": "deepskyblue", "line": {"color": "black", "width": 1}}},
        increasing = {"marker":{"color": "crimson", "line": {"color": "black", "width": 1}}},
        totals= {"marker":{"color": "dimgray", "line": {"color": "black", "width": 1}}},
        connector={
            "mode": "between",
            "line": {"width": 1, "color": "black", "dash": "dot"},
        },
    ),
    row=1,
    col=1,
)
fig.add_trace(
    go.Waterfall(
        hovertext=df_shap.index.values,
        orientation="h",
        measure=["relative"] * len(feats),
        y=list(range(df_shap.shape[0])),
        x=df_shap[trgt_id].values,
        base=trgt_age,
        text=[f"+{x:0.2f}" if x > 0 else f"{x:0.2f}" for x in df_shap[trgt_id].values],
        textposition = "auto",
        decreasing = {"marker":{"color": "lightblue", "line": {"color": "black", "width": 1}}},
        increasing = {"marker":{"color": "lightcoral", "line": {"color": "black", "width": 1}}},
        connector={
            "mode": "between",
            "line": {"width": 1, "color": "black", "dash": "solid"},
        },
    ),
    row=1,
    col=1,
)
fig.update_traces(row=1, col=1, showlegend=False)
fig.update_yaxes(
    row=1,
    col=1,
    automargin=True,
    tickmode="array",
    tickvals=[-1.5] + list(range(df_shap.shape[0])) + [df_shap.shape[0] + 0.5, df_shap.shape[0] + 1.5],
    ticktext=["Хронологический возраст"] + [f"{x} = {ds_data.at[trgt_id, x]:0.2f}" if ds_feats.at[x, 'Type'] != 'categorical' else f"{x} = {ds_data.at[trgt_id, x]}" for x in df_shap.index] + ["Возрастная акселерация", "Биологический возраст"],
    tickfont=dict(size=18),
)
fig.update_xaxes(
    row=1,
    col=1,
    automargin=True,
    title='Возраст',
    titlefont=dict(size=25),
    range=[
        trgt_age + df_shap['cumsum'].min() * 1.2 - 2,
        trgt_age + df_shap['cumsum'].max() * 1.2 + 2
    ],
)

fig.add_trace(
    go.Bar(
        hovertext=df_shap.index.values,
        orientation="h",
        name='Меньше',
        x=df_less_more.loc[df_shap.index.values, 'Меньше'],
        y=list(range(df_shap.shape[0])),
        marker=dict(color='steelblue', line=dict(color="black", width=1)),
        text=df_less_more.loc[df_shap.index.values, 'Меньше'],
        textposition='auto'
    ),
    row=1,
    col=2,
)
fig.add_trace(
    go.Bar(
        hovertext=df_shap.index.values,
        orientation="h",
        name='Больше',
        x=df_less_more.loc[df_shap.index.values, 'Больше'],
        y=list(range(df_shap.shape[0])),
        marker=dict(color='violet', line=dict(color="black", width=1)),
        text=df_less_more.loc[df_shap.index.values, 'Больше'],
        textposition='auto',
    ),
    row=1,
    col=2
)

for f_cat_id, f_cat_dict in df_cat_part.items():
    for f_val in f_cat_dict['distribution'].index:
        fig.add_trace(
            go.Bar(
                hovertext=[f_cat_dict['name']],
                orientation="h",
                name=f_val,
                x=[f_cat_dict['distribution'][f_val]],
                y=[f_cat_id],
                marker=dict(color=f_cat_dict['palette'][f_val], line=dict(color="black", width=1)),
                text=[f_val],
                textposition='auto',
                showlegend=False
            ),
            row=1,
            col=2
        )

fig.update_xaxes(
    row=1,
    col=2,
    automargin=True,
    showgrid=False,
    showline=False,
    zeroline=False,
    showticklabels=False,
)
fig.update_yaxes(
    row=1,
    col=2,
    automargin=True,
    showgrid=False,
    showline=False,
    zeroline=False,
    showticklabels=False,
)
fig.update_layout(barmode="stack")
fig.update_layout(
    legend=dict(
        title=dict(side="top"),
        orientation="h",
        yanchor="bottom",
        y=0.98,
        xanchor="center",
        x=0.86
    ),
)
fig.update_layout(
    title=f"Возрастная акселерация для {trgt_id}",
    titlefont=dict(size=25),
    template="none",
    width=1300,
    height=1100,
    margin=go.layout.Margin(l=120, r=80, b=50, t=50, pad=0),
)
fig.show()
fig.write_image(f"{datasets[ds]['path']}/shap_local/{trgt_id}.pdf", format="pdf")
fig.write_image(f"{datasets[ds]['path']}/shap_local/{trgt_id}.png", scale=2)
df_shap.to_excel(f"{datasets[ds]['path']}/shap_local/{trgt_id}.xlsx")

## Model combinations: Blood + Inbody

In [None]:
trgt_id = 18698

data_all = []
feats_all = []
local_exlp = {}

datasets_trgt = ['inbody_mrmr', 'lab']

for ds in datasets_trgt:
    ds_feats = datasets[ds]['feats']
    feats = ds_feats.index.values
    feats = feats[feats != 'Возраст']
    feats_cnt = ds_feats.index[ds_feats['Type'] == 'continuous'].to_list()
    feats_cnt = list(feats_cnt[feats_cnt != 'Возраст'])
    feats_cat = ds_feats.index[ds_feats['Type'] != 'continuous'].to_list()

    ds_data = datasets[ds]['data']
    ds_results = datasets[ds]['results']
    ds_metrics = datasets[ds]['metrics']
    ds_shap = datasets[ds]['shap']
    ds_model = datasets[ds]['model']
    ds_corrector = datasets[ds]['corrector']
    ds_color = datasets[ds]['color']
    ds_data_shap = datasets[ds]['data_shap']
    ds_predict_func = datasets[ds]['predict_func']

    trgt_age = ds_data_shap.at[trgt_id, feat_trgt]
    trgt_pred = ds_data_shap.at[trgt_id, 'Prediction Unbiased']
    trgt_aa = trgt_pred - trgt_age
    # print(trgt_age)
    # print(trgt_pred)
    # print(trgt_aa * ds_metrics.at['Test', 'pearson_corrcoef_unbiased'] / len(datasets_trgt))

    n_closest = datasets[ds]['bkg_count']
    data_closest = ds_data_shap.iloc[(ds_data_shap['Prediction Unbiased'] - trgt_age).abs().argsort()[:n_closest]]

    explainer = shap.SamplingExplainer(ds_predict_func, data_closest.loc[:, feats].values)
    # print(explainer.expected_value)
    shap_values = explainer.shap_values(ds_data_shap.loc[[trgt_id], feats].values)[0]
    shap_values = shap_values * (trgt_pred - trgt_age) / (trgt_pred - explainer.expected_value)
    shap_values *= ds_metrics.at['Test', 'pearson_corrcoef_unbiased'] / len(datasets_trgt)
    # print(sum(shap_values))
    
    df_shap = pd.DataFrame(index=feats, data=shap_values, columns=[trgt_id])
    df_shap.sort_values(by=trgt_id, key=abs, inplace=True)
    # df_shap['cumsum'] = df_shap[trgt_id].cumsum()

    df_less_more = pd.DataFrame(index=df_shap.index, columns=['Меньше', 'Больше'])
    df_cat_part = {}
    for f in df_less_more.index:
        if ds_feats.at[f, 'Type'] != 'categorical':
            df_less_more.at[f, 'Меньше'] = round(scipy.stats.percentileofscore(data_closest.loc[:, f].values, ds_data_shap.at[trgt_id, f]))
            df_less_more.at[f, 'Больше'] = 100.0 - df_less_more.at[f, 'Меньше']
        else:
            df_less_more.at[f, 'Меньше'] = np.nan
            df_less_more.at[f, 'Больше'] = np.nan
            
            f_value_counts = data_closest.loc[:, 'Пол'].value_counts()
            f_value_counts_rename = {x: datasets[ds]['cat_encoders']['Пол'].inverse_transform([x])[0] for x in f_value_counts.index.astype(int).values}
            f_value_counts.rename(index=f_value_counts_rename, inplace=True)
            f_value_counts = np.rint(f_value_counts / f_value_counts.sum() * 100)
            
            df_cat_part[f] = {
                'distribution': f_value_counts.astype(int)
            }
            if f == 'Пол':
                df_cat_part[f]['palette'] = {'жен': 'crimson', 'муж': 'dodgerblue'}  
        
    local_exlp[ds] = {
        'df_shap': df_shap,
        'df_less_more': df_less_more,
        'df_cat_part': df_cat_part,
        'age_acceleration': (trgt_pred - trgt_age) * ds_metrics.at['Test', 'pearson_corrcoef_unbiased'] / len(datasets_trgt),
    }
    
    data_all.append(ds_data.loc[[trgt_id], :])
    feats_all.append(ds_feats.loc[feats, :])

data_all = reduce(lambda left, right: pd.merge(left, right, left_index=True, right_index=True, suffixes=('', '_y')), data_all)
feats_all = pd.concat(feats_all)
feats_all = feats_all[~feats_all.index.duplicated(keep='first')]

feat_cmn = 'Пол'

df_shap_cmn = pd.DataFrame(index=[feat_cmn], columns=[trgt_id], data=np.zeros(1))
dfs_shap = [df_shap_cmn]
df_less_more_cmn = pd.DataFrame(index=[feat_cmn], columns=[trgt_id], data=np.nan)
dfs_less_more_cmn = [df_less_more_cmn]
df_cat_part_cmn = {
    'distribution': pd.Series(index=['жен', 'муж'], data=[0, 0]),
    'palette': {'жен': 'crimson', 'муж': 'dodgerblue'}
}
for ds in ['inbody_mrmr', 'lab']:
    print(local_exlp[ds]['age_acceleration'])
    df_shap_cmn.at[feat_cmn, trgt_id] += local_exlp[ds]['df_shap'].at[feat_cmn, trgt_id]
    df_cat_part_cmn['distribution'] += df_cat_part['Пол']['distribution'] / len(datasets_trgt)
    dfs_shap.append(local_exlp[ds]['df_shap'].drop([feat_cmn]))
    dfs_less_more_cmn.append(local_exlp[ds]['df_less_more'].drop([feat_cmn]))
    
df_shap_union = pd.concat(dfs_shap)
df_less_more_union = pd.concat(dfs_less_more_cmn)
df_shap_union.sort_values(by=trgt_id, key=abs, inplace=True)
df_shap_union['cumsum'] = df_shap_union[trgt_id].cumsum()
df_less_more_union = df_less_more_union.loc[df_shap_union.index, :]
trgt_aa = df_shap_union[trgt_id].sum()
trgt_age = data_all.at[trgt_id, feat_trgt]

aa_1 = local_exlp['lab']['age_acceleration']
aa_2 = local_exlp['inbody_mrmr']['age_acceleration']

fig = make_subplots(rows=1, cols=2, shared_yaxes=True, shared_xaxes=False, column_widths=[2.5, 1], horizontal_spacing=0.15, subplot_titles=['', "Распределение признаков у людей<br>в данном возрастном диапазоне"])
fig.add_trace(
    go.Waterfall(
        hovertext=["Хронологический возраст", "Возрастная акселерация (Анализ Крови)", "Возрастная акселерация (Биоимпеданс)", "Биологический возраст"],
        orientation="h",
        measure=['absolute', 'relative', 'relative', 'absolute'],
        y=[-1.5, df_shap_union.shape[0] + 0.5, df_shap_union.shape[0] + 1.5, df_shap_union.shape[0] + 2.5],
        x=[trgt_age, aa_1, aa_2, trgt_age+trgt_aa],
        base=0,
        text=[f"{trgt_age:0.2f}", f"+{aa_1:0.2f}" if aa_1 > 0 else f"{aa_1:0.2f}", f"+{aa_2:0.2f}" if aa_2 > 0 else f"{aa_2:0.2f}", f"{trgt_age+trgt_aa:0.2f}"],
        textposition = "auto",
        decreasing = {"marker":{"color": "deepskyblue", "line": {"color": "black", "width": 1}}},
        increasing = {"marker":{"color": "crimson", "line": {"color": "black", "width": 1}}},
        totals= {"marker":{"color": "dimgray", "line": {"color": "black", "width": 1}}},
        connector={
            "mode": "between",
            "line": {"width": 1, "color": "black", "dash": "dot"},
        },
    ),
    row=1,
    col=1,
)
fig.add_trace(
    go.Waterfall(
        hovertext=df_shap_union.index.values,
        orientation="h",
        measure=["relative"] * len(feats),
        y=list(range(df_shap_union.shape[0])),
        x=df_shap_union[trgt_id].values,
        base=trgt_age,
        text=[f"+{x:0.2f}" if x > 0 else f"{x:0.2f}" for x in df_shap_union[trgt_id].values],
        textposition = "auto",
        decreasing = {"marker":{"color": "lightblue", "line": {"color": "black", "width": 1}}},
        increasing = {"marker":{"color": "lightcoral", "line": {"color": "black", "width": 1}}},
        connector={
            "mode": "between",
            "line": {"width": 1, "color": "black", "dash": "solid"},
        },
    ),
    row=1,
    col=1,
)
fig.update_traces(row=1, col=1, showlegend=False)
fig.update_yaxes(
    row=1,
    col=1,
    automargin=True,
    tickmode="array",
    tickvals=[-1.5] + list(range(df_shap_union.shape[0])) + [df_shap_union.shape[0] + 0.5, df_shap_union.shape[0] + 1.5, df_shap_union.shape[0] + 2.5],
    ticktext=["Хронологический возраст"] + [f"{x} = {data_all.at[trgt_id, x]:0.2f}" if feats_all.at[x, 'Type'] != 'categorical' else f"{x} = {data_all.at[trgt_id, x]}" for x in df_shap_union.index] + ["Возрастная акселерация (Анализ Крови)", "Возрастная акселерация (Биоимпеданс)", "Биологический возраст"],
    tickfont=dict(size=18),
)
fig.update_xaxes(
    row=1,
    col=1,
    automargin=True,
    title='Возраст',
    titlefont=dict(size=25),
    range=[
        trgt_age + df_shap_union['cumsum'].min() * 1.2 - 2,
        trgt_age + df_shap_union['cumsum'].max() * 1.2 + 2
    ],
)

fig.add_trace(
    go.Bar(
        hovertext=df_shap_union.index.values,
        orientation="h",
        name='Меньше',
        x=df_less_more_union.loc[df_shap_union.index.values, 'Меньше'],
        y=list(range(df_shap_union.shape[0])),
        marker=dict(color='steelblue', line=dict(color="black", width=1)),
        text=df_less_more_union.loc[df_shap_union.index.values, 'Меньше'],
        textposition='auto'
    ),
    row=1,
    col=2,
)
fig.add_trace(
    go.Bar(
        hovertext=df_shap_union.index.values,
        orientation="h",
        name='Больше',
        x=df_less_more_union.loc[df_shap_union.index.values, 'Больше'],
        y=list(range(df_shap_union.shape[0])),
        marker=dict(color='violet', line=dict(color="black", width=1)),
        text=df_less_more_union.loc[df_shap_union.index.values, 'Больше'],
        textposition='auto',
    ),
    row=1,
    col=2
)

for f_val in df_cat_part_cmn['distribution'].index:
    fig.add_trace(
        go.Bar(
            hovertext=[feat_cmn],
            orientation="h",
            name=f_val,
            x=[df_cat_part_cmn['distribution'][f_val]],
            y=[df_shap_union.index.get_loc(feat_cmn)],
            marker=dict(color=df_cat_part_cmn['palette'][f_val], line=dict(color="black", width=1)),
            text=[f_val],
            textposition='auto',
            showlegend=False
        ),
        row=1,
        col=2
    )

fig.update_xaxes(
    row=1,
    col=2,
    automargin=True,
    showgrid=False,
    showline=False,
    zeroline=False,
    showticklabels=False,
)
fig.update_yaxes(
    row=1,
    col=2,
    automargin=True,
    showgrid=False,
    showline=False,
    zeroline=False,
    showticklabels=False,
)
fig.update_layout(barmode="stack")
fig.update_layout(
    legend=dict(
        title=dict(side="top"),
        orientation="h",
        yanchor="bottom",
        y=0.98,
        xanchor="center",
        x=0.87
    ),
)
fig.update_layout(
    title=f"Возрастная акселерация для {trgt_id}",
    titlefont=dict(size=25),
    template="none",
    width=1600,
    height=1300,
    margin=go.layout.Margin(l=120, r=100, b=50, t=50, pad=0),
)
fig.show()
fig.write_image(f"{path}/complex_model/shap_local/{trgt_id}.pdf", format="pdf")
fig.write_image(f"{path}/complex_model/shap_local/{trgt_id}.png", scale=2)
df_shap_union.to_excel(f"{path}/complex_model/shap_local/{trgt_id}.xlsx")