# Debugging autoreload

In [None]:
%load_ext autoreload
%autoreload 2

# Load packages

In [None]:

from pytorch_tabular.utils import load_covertype_dataset
from rich.pretty import pprint
from plotly.subplots import make_subplots
from pytorch_tabular import TabularModel
import torch
import plotly.graph_objects as go
from scipy import stats
import shap
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.impute import KNNImputer
from glob import glob
import ast
import matplotlib.pyplot as plt
import seaborn as sns
import copy
import itertools
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from pytorch_tabular import model_sweep
from src.pt.model_sweep import model_sweep_custom
import warnings
import pathlib
from tqdm import tqdm
import distinctipy
import matplotlib.patheffects as pe
import matplotlib.colors as mcolors
from statannotations.Annotator import Annotator
from scipy.stats import mannwhitneyu
from plottable import ColumnDefinition, Table
from plottable.plots import bar
from plottable.cmap import normed_cmap, centered_cmap
import optuna
from matplotlib.colors import LinearSegmentedColormap
import matplotlib.cm
import matplotlib as mpl
from statsmodels.stats.multitest import multipletests
import re
import datetime
from collections import Counter
from matplotlib.ticker import MaxNLocator
from itertools import chain
from sklearn.metrics import mean_absolute_error
import pyaging as pya
import matplotlib.lines as mlines
import statsmodels.formula.api as smf
from itertools import chain
from sklearn.preprocessing import LabelEncoder 
import upsetplot
from pathlib import Path
from openai import OpenAI
from docx import Document
from docx.shared import Mm, Pt
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.shared import RGBColor
from docx.oxml.ns import qn


def make_rgb_transparent(rgb, bg_rgb, alpha):
    return [alpha * c1 + (1 - alpha) * c2 for (c1, c2) in zip(rgb, bg_rgb)]


def form_bar(base):
    def formatter(x):
        return f'{str(int(round(x * base)))}/{base}'
    return formatter


def get_sections(sets):
    """
    Given a list of sets, return a new list of sets with all the possible
    mutually exclusive overlapping combinations of those sets.  Another way
    to think of this is the mutually exclusive sections of a venn diagram
    of the sets.  If the original list has N sets, the returned list will
    have (2**N)-1 sets.

    Parameters
    ----------
    sets : list of set

    Returns
    -------
    combinations : list of tuple
        tag : str
            Binary string representing which sets are included / excluded in
            the combination.
        set : set
            The set formed by the overlapping input sets.
    """
    num_combinations = 2 ** len(sets)
    bit_flags = [2 ** n for n in range(len(sets))]
    flags_zip_sets = [z for z in zip(bit_flags, sets)]

    combo_sets = {}
    for bits in range(num_combinations - 1, 0, -1):
        include_sets = [s for flag, s in flags_zip_sets if bits & flag]
        exclude_sets = [s for flag, s in flags_zip_sets if not bits & flag]
        combo = set.intersection(*include_sets)
        combo = set.difference(combo, *exclude_sets)
        tag = ''.join([str(int((bits & flag) > 0)) for flag in bit_flags])
        combo_sets[tag] = combo
    return combo_sets

def markdown_to_docx(markdown_text, doc):
    
    # Регулярные выражения для обработки элементов
    header_re = re.compile(r'^(#+)\s+(.*)')
    list_re = re.compile(r'^(\s*)- (.*)')
    bold_re = re.compile(r'(\*\*|__)(.*?)\1')
    italic_re = re.compile(r'(\*|_)(.*?)\1')
    
    # Обработка каждой строки
    for line in markdown_text.split('\n'):
        stripped_line = line.strip()
        
        # Пропуск пустых строк и разделителей
        if not stripped_line or re.match(r'^-{3,}$', stripped_line):
            continue
        
        # Обработка заголовков
        header_match = header_re.match(line)
        if header_match:
            level = len(header_match.group(1))
            text = header_match.group(2).strip()
            doc.add_heading(text, level=min(level, 6))
            continue
        
        # Обработка списков
        if re.match(r'^[\s]*[-*+] ', line):
            indent = len(line) - len(line.lstrip())
            p = doc.add_paragraph(style='ListBullet')
            p.paragraph_format.left_indent = Pt(25 + 10 * (indent//2))
            line_content = line.lstrip()[2:]
            add_formatted_text(p, line_content, bold_re, italic_re)
            continue
        
        # Обработка обычного текста
        p = doc.add_paragraph()
        add_formatted_text(p, line.strip(), bold_re, italic_re)
    
    return doc


def add_formatted_text(paragraph, text, bold_re, italic_re):
    # Добавление текста с форматированием
    fragments = []
    last_pos = 0
    
    # Разделение текста на фрагменты с форматированием
    for match in bold_re.finditer(text):
        start, end = match.start(), match.end()
        if last_pos < start:
            fragments.append(('normal', text[last_pos:start]))
        fragments.append(('bold', match.group(2)))
        last_pos = end
    
    if last_pos < len(text):
        fragments.append(('normal', text[last_pos:]))
    
    # Обработка курсива внутри оставшихся фрагментов
    final_fragments = []
    for frag_type, content in fragments:
        if frag_type == 'bold':
            final_fragments.append(('bold', content))
            continue
            
        sub_last = 0
        for match in italic_re.finditer(content):
            start, end = match.start(), match.end()
            if sub_last < start:
                final_fragments.append(('normal', content[sub_last:start]))
            final_fragments.append(('italic', match.group(2)))
            sub_last = end
        
        if sub_last < len(content):
            final_fragments.append(('normal', content[sub_last:]))
    
    # Добавление фрагментов в параграф
    for frag_type, content in final_fragments:
        run = paragraph.add_run(content)
        if frag_type == 'bold':
            run.bold = True
        elif frag_type == 'italic':
            run.italic = True


def delete_paragraph(paragraph):
    p = paragraph._element
    p.getparent().remove(p)
    p._p = p._element = None


# Process DNAm

## Betas to pkl

In [None]:
path = "E:/YandexDisk/Work/bbd/unn/dnam/001_longitudinal_examples"
pheno = pd.read_csv(f"{path}/controls_from_central(169).csv", index_col=0)
pheno.index = pheno.index.astype(str)
betas = pd.read_csv(f"{path}/betas_funnorm.csv", index_col=0).transpose()
betas = betas.loc[pheno.index.values, :]
betas.set_index(pheno['ID'], inplace=True)
betas.to_pickle(f"{path}/betas_funnorm.pkl")

## Calculate epigenetic ages

### Load DNAm data

In [None]:
path = "E:/YandexDisk/Work/bbd/unn/dnam/001_longitudinal_examples"
pheno = pd.read_csv(f"{path}/controls_from_central(169).csv", index_col='ID')
pheno.index = pheno.index.astype(str)
betas = pd.read_pickle(f"{path}/betas_funnorm.pkl")

feats_for_ages = ['Age', 'Sex', 'Tissue']

df_for_ages = pd.merge(pheno[feats_for_ages], betas, left_index=True, right_index=True)

df_for_ages['Female'] = (df_for_ages['Sex'] == 'F').astype(int)
df_for_ages = pya.pp.epicv2_probe_aggregation(df_for_ages, verbose=True)

### Calculate pyaging

In [None]:
path_clocks = "E:/YandexDisk/Work/pydnameth/datasets/pyaging"
clocks = [
    "altumage",
    "dunedinpace",
    "han",
    "knight",
    "leecontrol",
    "leerefinedrobust",
    "leerobust",
    "dnamfitage",
    "dnamphenoage",
    "dnamtl",
    "encen100",
    "encen40",
    "grimage",
    "grimage2",
    "hannum",
    "horvath2013",
    "hrsinchphenoage",
    "lin",
    "pcdnamtl",
    "pcgrimage",
    "pchannum",
    "pchorvath2013",
    "pcphenoage",
    "pcskinandblood",
    "pedbe",
    "replitali",
    "skinandblood",
    "stemtoc",
    "stoch",
    "stocp",
    "stocz",
    "yingadaptage",
    "yingcausage",
    "yingdamage",
    "zhangblup",
    "zhangen",
    "zhangmortality",
    "epitoc1",
    "retroelementagev1",
    "retroelementagev2",
    "intrinclock",
    "abec",
    "cabec",
    "eabec",
    "pipekelasticnet",
    "pipekfilteredh",
    "pipekretrainedh",
    "dnamic"
]

adata = pya.pp.df_to_adata(df_for_ages, metadata_cols=['Sex', 'Tissue'], imputer_strategy='knn', verbose=True)
pya.pred.predict_age(adata=adata, dir=path_clocks, clock_names=clocks, verbose=True)
results = pd.merge(pheno, adata.obs[clocks], left_index=True, right_index=True)

pyaging_meta = pd.read_excel(f"{path_clocks}/clocks_meta_upd.xlsx", index_col='Clock Name')
pyaging_meta['Clock Name'] = pyaging_meta.index
results.rename(columns=dict(zip(pyaging_meta['Model ID'].values, pyaging_meta['Clock Name'].values)), inplace=True)

results.to_excel(f"{path}/pheno.xlsx")

### Calculate EpImAge

In [None]:
path_epim = "E:/Git/EpImAge"

imms_epim = pd.read_excel(f"{path_epim}/models/InflammatoryMarkers/InflammatoryMarkers.xlsx", index_col='feature').index.values
imms_epim_log = [f"{f}_log" for f in imms_epim]
cpgs_epim = pd.read_excel(f"{path_epim}/data/CpGs.xlsx", index_col=0).index.to_list()
cpgs_epim_missed = list(set(cpgs_epim) - set(df_for_ages.columns.values))
cpgs_epim_present = list(set.intersection(set(cpgs_epim), set(df_for_ages.columns.values)))

unn_samples = pd.read_excel(f"{path_epim}/data/cytokines-regression/data.xlsx", index_col=0)
unn_samples = unn_samples.index[unn_samples['Status'] == 'Control'].values

df_for_epim = df_for_ages.loc[:, ['Age'] + cpgs_epim_present]
df_for_epim.loc[:, cpgs_epim_missed] = None

models_imms = {}
for imm in (pbar := tqdm(imms_epim)):
    pbar.set_description(f"Loading model for {imm}")
    models_imms[imm] = TabularModel.load_model(f"{path_epim}/models/InflammatoryMarkers/{imm}")

model_age = TabularModel.load_model(f"{path_epim}/models/EpInflammAge")

bkgrd_imp = pd.read_pickle(f"{path_epim}/models/background-imputation.pkl")
# bkgrd_imp = bkgrd_imp.loc[bkgrd_imp.index.intersection(set(unn_samples)), :]

imp_method = 'KNN'
n_nans = df_for_epim.isna().sum().sum()
if n_nans > 0:
    bkgrd_imp.set_index(bkgrd_imp.index.astype(str) + f'_imputation_{imp_method}', inplace=True)
    data_epim_all = pd.concat([df_for_epim, bkgrd_imp], axis=0, verify_integrity=True)
    if imp_method == "KNN":
        imputer = KNNImputer(n_neighbors=5)
    data_epim_all.loc[:, cpgs_epim] = imputer.fit_transform(data_epim_all.loc[:, cpgs_epim].values) 
    df_for_epim.loc[df_for_epim.index, cpgs_epim] = data_epim_all.loc[df_for_epim.index, cpgs_epim]

for imm in imms_epim:
    df_for_epim[f"{imm}_log"] = models_imms[imm].predict(df_for_epim)
df_for_epim['EpInflammAge'] = model_age.predict(df_for_epim.loc[:, [f"{imm}_log" for imm in imms_epim]])

for f in ['EpInflammAge'] + imms_epim_log:
    results.loc[results.index, f] = df_for_epim.loc[results.index, f]
    
results.to_excel(f"{path}/pheno.xlsx")

# Plot epigenetic ages

In [None]:

path = "E:/YandexDisk/Work/bbd/unn/dnam/001_longitudinal_examples"
path_clocks = "E:/YandexDisk/Work/pydnameth/datasets/pyaging"

df = pd.read_excel(f"{path}/pheno.xlsx", index_col=0)
df.drop('I1_duplicate', inplace=True)

pyaging_meta = pd.read_excel(f"{path_clocks}/clocks_meta_upd.xlsx", index_col='Clock Name')
pyaging_meta['Clock Name'] = pyaging_meta.index
pyaging_meta.drop(index=['Knight', 'LeeControl', 'LeeRefinedRobust', 'LeeRobust', 'PedBE', 'RepliTali', 'ENCen100', 'CpGPTGrimAge3', 'CpGPTPCGrimAge3',
                         'GrimAge2ADM', 'GrimAge2B2M', 'GrimAge2CystatinC', 'GrimAge2GDF15', 'GrimAge2Leptin', 'GrimAge2LogA1C', 'GrimAge2LogCRP', 'GrimAge2PackYrs', 'GrimAge2PAI1', 'GrimAge2TIMP1', 
                         'DNAmFitAgeGaitF', 'DNAmFitAgeGaitM', 'DNAmFitAgeGripF', 'DNAmFitAgeGripM', 'DNAmFitAgeVO2Max', 'DNAmIC'], inplace=True)
epi_ages = pyaging_meta[pyaging_meta['Type'] == 'Age'].index.to_list() + ['EpInflammAge']

for epiage_id, epiage in enumerate(epi_ages):
    linreg = smf.ols(formula=f"{epiage} ~ Age", data=df).fit()
    df[f"{epiage}_linear_pred"] = linreg.predict(df)
    df[f"{epiage} acceleration (Linreg)"] = df[epiage] - df[f"{epiage}_linear_pred"]
    df[f"{epiage} acceleration (True)"] = df[epiage] - df["Age"]
    df[f"{epiage} corrected"] = df["Age"] + df[f"{epiage} acceleration (Linreg)"]

df['Group'] = 'Other'
df.loc[df['Subject_ID'] == 'I1', 'Group'] = 'I1'
df.loc[df['Subject_ID'] == 'I8', 'Group'] = 'I8'

colors_groups = {
    'Other': 'gray',
    'I1': 'dodgerblue',
    'I8': 'crimson',
}


## Scatters

In [None]:
df['MarkerSize'] = 10
df.loc[df['Subject_ID'] == 'I1', 'MarkerSize'] = 40
df.loc[df['Subject_ID'] == 'I8', 'MarkerSize'] = 40

nrows = 5
ncols = 7

sns.set_theme(style='ticks')
fig = plt.figure(
    figsize=(30, 20),
    layout="constrained"
)

subfigs = fig.subfigures(
    nrows=nrows,
    ncols=ncols,
    # wspace=0.001,
    # hspace=0.001,
)
for epiage_id, epiage in enumerate(epi_ages):
    row_id, col_id = divmod(epiage_id, ncols)

    axs = subfigs[row_id, col_id].subplot_mosaic(
        [
            ['11'],
        ],
        # height_ratios=[1, 4],
        # width_ratios=[3, 1.5],
        gridspec_kw={
            # "bottom": 0.14,
            # "top": 0.95,
            # "left": 0.1,
            # "right": 0.5,
            # "wspace": 0.33,
            # "hspace": 0.01,
        },
    )
    
    xy_min = df[['Age', epiage]].min().min()
    xy_max = df[['Age', epiage]].max().max()
    xy_ptp = xy_max - xy_min
    bisect = sns.lineplot(
        x=[xy_min - 0.1 * xy_ptp, xy_max + 0.1 * xy_ptp],
        y=[xy_min - 0.1 * xy_ptp, xy_max + 0.1 * xy_ptp],
        linestyle='--',
        color='black',
        linewidth=1.0,
        ax=axs['11']
    )
    regplot = sns.regplot(
        data=df,
        x='Age',
        y=epiage,
        color='black',
        line_kws={'linewidth': 1},
        scatter=False,
        truncate=False,
        ax=axs['11']
    )
    scatter = sns.scatterplot(
        data=df,
        x='Age',
        y=epiage,
        hue='Group',
        palette=colors_groups,
        linewidth=0.5,
        alpha=0.75,
        edgecolor="k",
        size='MarkerSize',
        # s=20,
        hue_order=list(colors_groups.keys()),
        #legend=True,
        ax=axs['11'],
    )
    
    # To remove other legends, if they were generated
    handles, labels = axs['11'].get_legend_handles_labels()
    # Assuming 'species' is the first legend entry and you want to keep only that
    # You might need to inspect 'labels' to find the correct indices for hue
    if len(handles) > 1:
        axs['11'].legend(handles=handles[0:4], labels=labels[0:4]) # Keep only the first legend (hue)
    
    axs['11'].set_xlim(xy_min - 0.1 * xy_ptp, xy_max + 0.1 * xy_ptp)
    axs['11'].set_ylim(xy_min - 0.1 * xy_ptp, xy_max + 0.1 * xy_ptp)

fig.savefig(f"{path}/ages_distribution.png", bbox_inches='tight', dpi=200)
fig.savefig(f"{path}/ages_distribution.pdf", bbox_inches='tight')
plt.close(fig)

## Barplots

In [None]:
subject = 'I8'
subject_ids = df[df['Subject_ID'] == subject].sort_values(by=['Age'], ascending=[True]).index.values

nrows = 1
ncols = len(subject_ids) * 2

sns.set_theme(style='ticks')
fig = plt.figure(
    figsize=(2 * ncols * 2, 10),
    layout="constrained"
)

subfigs = fig.subfigures(
    nrows=nrows,
    ncols=ncols,
    # wspace=0.001,
    # hspace=0.001,
)

for plot_id, subject_id in enumerate(subject_ids):
    df_subject_id = pd.DataFrame(index=epi_ages, columns=['Age acceleration (True)', 'Acceleration type (True)', 'Age acceleration (Linreg)', 'Acceleration type (Linreg)'])
    df_subject_id['EpiAges'] = epi_ages
    df_subject_id.loc[epi_ages, 'Age acceleration (True)'] = df.loc[subject_id, [f"{epiage} acceleration (True)" for epiage in epi_ages]].values
    df_subject_id.loc[df_subject_id['Age acceleration (True)'] > 0, 'Acceleration type (True)'] = '+'
    df_subject_id.loc[df_subject_id['Age acceleration (True)'] <= 0, 'Acceleration type (True)'] = '-'
    df_subject_id.loc[epi_ages, 'Age acceleration (Linreg)'] = df.loc[subject_id, [f"{epiage} acceleration (Linreg)" for epiage in epi_ages]].values
    df_subject_id.loc[df_subject_id['Age acceleration (Linreg)'] > 0, 'Acceleration type (Linreg)'] = '+'
    df_subject_id.loc[df_subject_id['Age acceleration (Linreg)'] <= 0, 'Acceleration type (Linreg)'] = '-'
    
    df_subject_id.sort_values(by='Age acceleration (True)', key=abs, ascending=False, inplace=True)
    max_x = df_subject_id['Age acceleration (True)'].abs().max()
    axs = subfigs[plot_id].subplot_mosaic(
        [
            ['11'],
        ],
        # height_ratios=[1, 4],
        # width_ratios=[3, 1.5],
        gridspec_kw={
            # "bottom": 0.14,
            # "top": 0.95,
            # "left": 0.1,
            # "right": 0.5,
            # "wspace": 0.33,
            # "hspace": 0.01,
        },
    )
    barplot = sns.barplot(
        data=df_subject_id,
        y='EpiAges',
        x='Age acceleration (True)',
        edgecolor='black',
        palette={'+': 'crimson', '-': 'dodgerblue'},
        hue='Acceleration type (True)',
        ax=axs['11'],
        legend=False,
    )
    axs['11'].set_xlim([-max_x * 1.2, max_x * 1.2])
    axs['11'].set_ylabel('')
    axs['11'].set_title(f"{subject_id}: {df.at[subject_id, 'Age']:0.2f}")
    
    df_subject_id.sort_values(by='Age acceleration (Linreg)', key=abs, ascending=False, inplace=True)
    max_x = df_subject_id['Age acceleration (Linreg)'].abs().max()
    axs = subfigs[len(subject_ids) + plot_id].subplot_mosaic(
        [
            ['11'],
        ],
        # height_ratios=[1, 4],
        # width_ratios=[3, 1.5],
        gridspec_kw={
            # "bottom": 0.14,
            # "top": 0.95,
            # "left": 0.1,
            # "right": 0.5,
            # "wspace": 0.33,
            # "hspace": 0.01,
        },
    )
    barplot = sns.barplot(
        data=df_subject_id,
        y='EpiAges',
        x='Age acceleration (Linreg)',
        edgecolor='black',
        palette={'+': 'crimson', '-': 'dodgerblue'},
        hue='Acceleration type (Linreg)',
        ax=axs['11'],
        legend=False,
    )
    axs['11'].set_xlim([-max_x * 1.2, max_x * 1.2])
    axs['11'].set_ylabel('')
    axs['11'].set_title(f"{subject_id}: {df.at[subject_id, 'Age']:0.2f}")

fig.savefig(f"{path}/{subject}.png", bbox_inches='tight', dpi=200)
fig.savefig(f"{path}/{subject}.pdf", bbox_inches='tight')
plt.close(fig)

## Reports

In [None]:
subject = 'I1'
subject_ids = df[df['Subject_ID'] == subject].sort_values(by=['Age'], ascending=[True]).index.values

nrows = 1
ncols = len(subject_ids) * 2

df_epi_ages_mae = pd.DataFrame(index=epi_ages, columns=['True', 'Linreg'])
for epiage_id, epiage in enumerate(epi_ages):
    df_epi_ages_mae.at[epiage, 'True'] = np.mean(np.abs(df[f"{epiage} acceleration (True)"].values))
    df_epi_ages_mae.at[epiage, 'Linreg'] = np.mean(np.abs(df[f"{epiage} acceleration (Linreg)"].values))
    
colors_clocks = {
    '+': 'crimson',
    '-': 'dodgerblue',
    '0': 'gainsboro'
}


doc = Document()
doc.styles['Title'].font.size = Pt(16)
sections = doc.sections
for section in sections:
    section.page_width = Mm(210)
    section.page_height = Mm(297)
    section.top_margin = Mm(20)
    section.bottom_margin = Mm(20)
    section.left_margin = Mm(30)
    section.right_margin = Mm(15)



for subject_id in subject_ids:
    print(subject_id)
    
    df_subject_id = pd.DataFrame(index=epi_ages, columns=['Acc/MAE (True)', 'Age acceleration (True)', 'Acceleration type (True)', 'Acc/MAE (Linreg)', 'Age acceleration (Linreg)', 'Acceleration type (Linreg)'])
    df_subject_id['EpiAges'] = epi_ages
    df_subject_id.loc[epi_ages, 'Age acceleration (True)'] = df.loc[subject_id, [f"{epiage} acceleration (True)" for epiage in epi_ages]].values
    df_subject_id.loc[epi_ages, 'Age acceleration (Linreg)'] = df.loc[subject_id, [f"{epiage} acceleration (Linreg)" for epiage in epi_ages]].values
    for epiage_id, epiage in enumerate(epi_ages):
        df_subject_id.at[epiage, 'Acc/MAE (True)'] = df.at[subject_id, f"{epiage} acceleration (True)"] / df_epi_ages_mae.at[epiage, 'True']
        df_subject_id.at[epiage, 'Acc/MAE (Linreg)'] = df.at[subject_id, f"{epiage} acceleration (Linreg)"] / df_epi_ages_mae.at[epiage, 'Linreg']
        
        if df_subject_id.at[epiage, 'Acc/MAE (True)'] > 1.0:
            df_subject_id.at[epiage, 'Acceleration type (True)'] = '+'
        elif df_subject_id.at[epiage, 'Acc/MAE (True)'] < -1.0:
            df_subject_id.at[epiage, 'Acceleration type (True)'] = '-'
        else:
            df_subject_id.at[epiage, 'Acceleration type (True)'] = '0'
            
        if df_subject_id.at[epiage, 'Acc/MAE (Linreg)'] > 1.0:
            df_subject_id.at[epiage, 'Acceleration type (Linreg)'] = '+'
        elif df_subject_id.at[epiage, 'Acc/MAE (Linreg)'] < -1.0:
            df_subject_id.at[epiage, 'Acceleration type (Linreg)'] = '-'
        else:
            df_subject_id.at[epiage, 'Acceleration type (Linreg)'] = '0'
        
    
    df_subject_id.sort_values(by='Age acceleration (Linreg)', key=abs, ascending=False, inplace=True)
    max_x = df_subject_id['Age acceleration (Linreg)'].abs().max()
    
    sns.set_theme(style='ticks')
    fig, ax = plt.subplots(figsize=(8, 8))
    barplot = sns.barplot(
        data=df_subject_id,
        y='EpiAges',
        x='Age acceleration (Linreg)',
        edgecolor='black',
        hue='Acceleration type (Linreg)',
        palette=colors_clocks,
        ax=ax,
    )
    ax.set_xlim([-max_x * 1.2, max_x * 1.2])
    ax.set_ylabel('')
    ax.set_title(f"Хронологический возраст = {df.at[subject_id, 'Age']:0.2f}", fontsize=16)
    plt.setp(ax.get_legend().get_texts(), fontsize=10) # for legend text
    plt.setp(ax.get_legend().get_title(), fontsize=12)
    plt.savefig(f"{path}/reports/{subject_id}.png", bbox_inches='tight', dpi=200)
    plt.savefig(f"{path}/reports/{subject_id}.pdf", bbox_inches='tight')
    plt.close(fig)
    
    sample_prompt = pathlib.Path(f"{path}/reports/prompt.txt").read_text()
    sample_prompt += '\n\n' + f"Age {df.at[subject_id, 'Age']:0.2f}"
    for epiage in epi_ages:
        sample_signicance = df_subject_id.at[epiage, 'Acceleration type (Linreg)']
        if sample_signicance == '0':
            sample_signicance = 'Нет статистической значимости'
        elif sample_signicance == '+':
            sample_signicance = 'Значительное ускорение'
        elif sample_signicance == '-':
            sample_signicance = 'Значительное nзамедление'
        sample_prompt += '\n' + f"{epiage} {df.at[subject_id, f'{epiage} corrected']:0.2f}" + f" ({sample_signicance})"
    pathlib.Path(f"{path}/reports/{subject_id}_prompt.txt").write_text(sample_prompt)
    
    
    client = OpenAI(api_key="", base_url="https://api.deepseek.com")
    response = client.chat.completions.create(
        model="deepseek-chat",
        messages=[
            {"role": "user", "content": sample_prompt},
        ],
        max_tokens=8192
    )
    df_llm_stat = pd.DataFrame(
        {
            'completion_tokens': response.usage.completion_tokens,
            'prompt_tokens': response.usage.prompt_tokens,
            'total_tokens': response.usage.total_tokens,
            'prompt_cache_hit_tokens': response.usage.prompt_cache_hit_tokens,
            'prompt_cache_miss_tokens': response.usage.prompt_cache_miss_tokens
        }.items(), columns=['Feature', 'Value']
    )
    df_llm_stat.to_excel(f"{path}/reports/{subject_id}_llm_stat.xlsx", index=False)
    Path(f"{path}/reports/{subject_id}_llm_content.txt").write_text(response.choices[0].message.content, encoding="utf-8")
    
    doc.add_heading(f"Анализ эпигенетического возрастного ускорения для {subject_id}", level=0)

    table = doc.add_table(rows=1, cols=2)
    delete_paragraph(table.cell(0, 0).paragraphs[0])
    paragraph = table.cell(0, 0).add_paragraph(style='Normal')
    paragraph.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
    run = paragraph.add_run(f'Возраст: ')
    run.font.color.rgb = RGBColor(0, 0, 0)
    run.font.size=Pt(14)
    run = paragraph.add_run(f"{df.at[subject_id, 'Age']:0.2f}")
    run.font.color.rgb = RGBColor(0, 0, 0)
    run.font.size=Pt(14)
    run.font.bold = True
    delete_paragraph(table.cell(0, 1).paragraphs[0])
    paragraph = table.cell(0, 1).add_paragraph(style='Normal')
    paragraph.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
    run = paragraph.add_run(f'Пол: ')
    run.font.color.rgb = RGBColor(0, 0, 0)
    run.font.size=Pt(14)
    sex_str = df.at[subject_id, 'Sex']
    if sex_str == 'M':
        run = paragraph.add_run(f'Мужской')
    else:
        run = paragraph.add_run(f'Женский')
    run.font.color.rgb = RGBColor(0, 0, 0)
    run.font.size=Pt(14)
    run.font.bold = True

    paragraph = doc.add_paragraph()
    paragraph.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
    paragraph.add_run().add_picture(f"{path}/reports/{subject_id}.png", width=Mm(160))
    
    llm_answer = Path(f"{path}/reports/{subject_id}_llm_content.txt").read_text()
    doc = markdown_to_docx(llm_answer, doc)
    doc.add_page_break()
    
doc.save(f"{path}/reports/{subject}.docx")
    