In [1]:
import os.path

import matplotlib.pyplot as plt
from matplotlib import ticker

plt.style.use('seaborn-v0_8-paper')
import seaborn as sns

import pickle

import pandas as pd
import numpy as np

from tqdm.auto import tqdm

from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)
simplefilter(action='ignore', category=UserWarning)

np.seterr(all='ignore')

from utilities import (SamplingDistribution, evaluate_custom_oversampling, evaluate_custom_oversampling_biochem,
                       serialize_xgboost_classifier, prepare_biospectra_inputs, make_biospectra_predictions)

Constants.

In [2]:
ROOT = '.'
OUT_FOLDER = f'{ROOT}/results'
DATA_FOLDER = f'{ROOT}/data'
FILE = f"{DATA_FOLDER}/Supp. File 1.xlsx"
ENDPOINT = 'binaryDILI'
N_JOBS = -1

Determine the distributions to sample features of new negative molecules from. 

In [3]:
distribs = [SamplingDistribution(1, True, True),
            SamplingDistribution(2, True, True),
            SamplingDistribution(3, True, True),
            SamplingDistribution(4, True, True),
            SamplingDistribution(5, True, True)
            ]

Set up the oversampling parameters.

In [4]:
ENDPOINT = 'binaryDILI'
# Oversampling parameters
N_OVERSAMPLING = 5 # Number of sampling rounds (default=5)
N_FROM = 0 # Minimum number of oversampled molecules to include (default=16)
N_TO = 251 # Maximum number of oversampled molecules to include (default=251)
N_STEP = 10 # Incremental step of molecules to include in the oversampling (default=10, extended=1)
N_REPEATS = 3 # Number of model fitting with different random seeds (default=3, extended=15)

Load the different data sets.

In [5]:
# Load labels of the tested and complementary molecules
original_data_labels = pd.read_excel(FILE, sheet_name='Original Dataset Labels',
                                     usecols=['InChIKey', 'binaryDILI', 'vDILIConcern'])
additional_data_labels = pd.read_excel(FILE, sheet_name='Additional Set MolDescs',
                                       usecols=['InChIKey', 'binaryDILI', 'vDILIConcern'])
original_data_labels['source'] = 'original'
additional_data_labels['source'] = 'additional'
labels = pd.concat([original_data_labels, additional_data_labels], axis=0)
del original_data_labels, additional_data_labels

In [6]:
# Load the set of features not in range
non_normal_features = pd.read_excel(f'{OUT_FOLDER}/DILI_kept_features_in_interval_pos_neg.xlsx')
non_normal_features = non_normal_features[non_normal_features.neg_interval_in_pos_interval == False].feature

In [7]:
# Biological descriptor of the tested molecules
data_bio = pd.read_excel(FILE, sheet_name='Original Dataset BioDescs')
data_bio = data_bio[data_bio.loc[:,"Inclusion"] == "Included"].drop(['Name', 'SMILES', 'Inclusion'], axis=1)
data_bio = data_bio.drop(columns=non_normal_features)

GFP_level_filter = data_bio.columns[data_bio.columns.str.contains('^.*_cmax\d+_tp\d+_Gfp')]
PI_fraction_filter = data_bio.columns[data_bio.columns.str.contains('^.*_cmax\d+_tp\d+_Pi')]
AnV_fraction_filter = data_bio.columns[data_bio.columns.str.contains('^.*_cmax\d+_tp\d+_Annexin')]
integrated_intensity_filter = data_bio.columns[data_bio.columns.str.contains('^.*_cmax\d+_tp\d+_CytoplasmNucleiIntegratedIntensity')]
mean_intensity_filter = data_bio.columns[data_bio.columns.str.contains('^.*_cmax\d+_tp\d+_CytoplasmNucleiMeanIntensity')]
DMSO_filter = data_bio.columns[data_bio.columns.str.contains('^.*_cmax\d+_tp\d+_Dmso')]
normcounts_filter = data_bio.columns[data_bio.columns.str.contains('^.*_cmax\d+_tp\d+_Norm')]
parentobj_filter = data_bio.columns[data_bio.columns.str.contains('^.*_cmax\d+_tp\d+_ImageCountParentObj')]

full_set_bio = labels.merge(data_bio, on='InChIKey').drop(['InChIKey', 'source'], axis=1).dropna()
full_set_bio_Y = full_set_bio[[ENDPOINT]].values.ravel()
full_set_bio_X = full_set_bio.drop(['vDILIConcern', 'binaryDILI'], axis=1)

bio_datasets = {## All biological features
                'All': full_set_bio.drop(['vDILIConcern', 'CMAX_uM'], axis=1),
                # GFP features
                'GFP': pd.concat([full_set_bio.binaryDILI, full_set_bio[GFP_level_filter]], axis=1),
                # PI features
                'PI': pd.concat([full_set_bio.binaryDILI, full_set_bio[PI_fraction_filter]], axis=1),
                # AnV features
                'AnV': pd.concat([full_set_bio.binaryDILI, full_set_bio[AnV_fraction_filter]], axis=1),
                ## Integrated intensity features
                'IntegratedIntensity': pd.concat([full_set_bio.binaryDILI, full_set_bio[integrated_intensity_filter]], axis=1),
                ## Mean intensity features
                'MeanIntensity': pd.concat([full_set_bio.binaryDILI, full_set_bio[mean_intensity_filter]], axis=1),
                ## DMSO features
                'DMSO': pd.concat([full_set_bio.binaryDILI, full_set_bio[DMSO_filter]], axis=1),
                ## Normalized counts features
                'NormCounts': pd.concat([full_set_bio.binaryDILI, full_set_bio[normcounts_filter]], axis=1),
                ## Parent objects features
                'NumObjects': pd.concat([full_set_bio.binaryDILI, full_set_bio[parentobj_filter]], axis=1),
                # PI & AnV
                'Anv-PI': pd.concat([full_set_bio.binaryDILI, full_set_bio[PI_fraction_filter], full_set_bio[AnV_fraction_filter]], axis=1),
                # GFP & PI & AnV
                'GFP-Anv-PI': pd.concat([full_set_bio.binaryDILI, full_set_bio[GFP_level_filter], full_set_bio[PI_fraction_filter], full_set_bio[AnV_fraction_filter]], axis=1),
                }

In [8]:
# Molecular descriptor of the tested molecules
data_chem = pd.read_excel(FILE, sheet_name='Original Dataset MolDescs')
ECFP6 = data_chem['ECFP_6'].str.extractall('(.)')[0].unstack().astype(int)
ECFP6 = ECFP6.rename_axis(index=None, columns=None).rename(columns=lambda x: f'ECFP_6_{x+1}')

data_chem = data_chem.drop(['SMILES', 'ECFP_6'], axis=1)
data_chem = pd.concat([data_chem, ECFP6], axis=1)

In [9]:
# Molecular descriptors of the complementary DILIRank non-tested molecules
comp_data = pd.read_excel(FILE, sheet_name='Additional Set MolDescs',
                          usecols=['InChIKey', 'binaryDILI', 'vDILIConcern', 'ALogP',
                                   'Molecular_Weight', 'Molecular_Solubility', 'H_Count', 'C_Count',
                                   'N_Count', 'O_Count', 'F_Count', 'S_Count', 'Cl_Count',
                                   'Num_H_Acceptors_Lipinski', 'Num_H_Donors_Lipinski', 'JY', 'Wiener',
                                   'CHI_V_3_P', 'CHI_V_3_C', 'ES_Sum_sCH3', 'ES_Sum_ssCH2', 'ES_Sum_dsCH',
                                   'ES_Sum_aaCH', 'ES_Sum_sssCH', 'ES_Sum_dssC', 'ES_Sum_aasC',
                                   'ES_Sum_aaaC', 'ES_Sum_ssssC', 'ES_Sum_sNH2', 'ES_Sum_ssNH',
                                   'ES_Sum_aaN', 'ES_Sum_sssN', 'ES_Sum_ddsN', 'ES_Sum_sOH', 'ES_Sum_dO',
                                   'ES_Sum_ssO', 'ES_Sum_ssS', 'Kappa_3_AM', 'PHI', 'ECFP_6'])
ECFP6 = comp_data['ECFP_6'].str.extractall('(.)')[0].unstack().astype(int)
ECFP6 = ECFP6.rename_axis(index=None, columns=None).rename(columns=lambda x: f'ECFP_6_{x+1}')

comp_data = comp_data.drop(['ECFP_6'], axis=1)
comp_data = pd.concat([comp_data, ECFP6], axis=1)

In [10]:
# Obtain feature statistics for the tested molecules
statistics = full_set_bio.loc[full_set_bio[ENDPOINT] == 0, :].drop(['vDILIConcern', 'binaryDILI', 'CMAX_uM'], axis=1).describe()
statistics.loc['range', :] = statistics.loc['max', :] - statistics.loc['min', :]
statistics.loc['normal', :] = (statistics.loc['mean', :] - statistics.loc['50%', :]) / statistics.loc['range', :]

In [11]:
data_biospectra = pd.read_excel(FILE, sheet_name='Bioactivity spectra')
data_chem_biospectra = (data_biospectra.query('Set == "Original set" and Inclusion == "Included"')
                                       .drop(columns=['Name', 'SMILES', 'Inclusion', 'Set']))
data_chem_biospectra = data_chem.merge(data_chem_biospectra, on='InChIKey')

In [12]:
comp_data_biospectra = (data_biospectra.query('Set == "Additional set"')
                                       .drop(columns=['InChIKey', 'Name', 'SMILES', 'Inclusion', 'Set'])
                                       .reset_index(drop=True))
comp_data_biospectra = (pd.concat((comp_data,
                                   comp_data_biospectra.reset_index(drop=True)),
                                  axis=1)
                        .drop(columns='InChIKey'))

Save the datasets to pickle files.

In [None]:
if not any((os.path.exists(f'{DATA_FOLDER}/data_chem.pkl'),
            os.path.exists(f'{DATA_FOLDER}/data_chem_biospectra.pkl'),
            os.path.exists(f'{DATA_FOLDER}/statistics.pkl'),
            os.path.exists(f'{DATA_FOLDER}/comp_data.pkl'),
            os.path.exists(f'{DATA_FOLDER}/comp_data_biospectra.pkl'),
            os.path.exists(f'{DATA_FOLDER}/bio_datasets.pkl'),
            )):
    with open(f'{DATA_FOLDER}/data_chem.pkl', 'wb') as oh:
        pickle.dump(data_chem, oh)
    with open(f'{DATA_FOLDER}/data_chem_biospectra.pkl', 'wb') as oh:
        pickle.dump(data_chem_biospectra, oh)
    with open(f'{DATA_FOLDER}/statistics.pkl', 'wb') as oh:
        pickle.dump(statistics, oh)
    with open(f'{DATA_FOLDER}/comp_data.pkl', 'wb') as oh:
        pickle.dump(comp_data.drop(columns=['InChIKey']), oh)
    with open(f'{DATA_FOLDER}/comp_data_biospectra.pkl', 'wb') as oh:
        pickle.dump(comp_data_biospectra.drop(columns=['InChIKey']), oh)
    with open(f'{DATA_FOLDER}/bio_datasets.pkl', 'wb') as oh:
        pickle.dump(bio_datasets, oh)
else:
    with (open(fr'{DATA_FOLDER}/bio_datasets.pkl', 'rb') as fh1,
          open(fr'{DATA_FOLDER}/statistics.pkl', 'rb') as fh2,
          open(fr'{DATA_FOLDER}/data_chem.pkl', 'rb') as fh3,
          open(fr'{DATA_FOLDER}/comp_data.pkl', 'rb') as fh4,
          open(fr'{DATA_FOLDER}/data_chem_biospectra.pkl', 'rb') as fh5,
          open(fr'{DATA_FOLDER}/comp_data_biospectra.pkl', 'rb') as fh6):
        bio_datasets = pickle.load(fh1)
        statistics = pickle.load(fh2)
        data_chem = pickle.load(fh3)
        comp_data = pickle.load(fh4)
        data_chem_biospectra = pickle.load(fh5)
        comp_data_biospectra = pickle.load(fh6)

Perform custom oversampling.

In [None]:
if not os.path.exists(f'{DATA_FOLDER}/DILI_complete_oversampling_analysis2024.feather'):
    pbar = tqdm(total=len(bio_datasets) * 3, smoothing=0.0)
    # Perform oversampling
    result = []
    for dataset_name, dataset in bio_datasets.items():
        pbar.set_description(f'{dataset_name} bio', refresh=True)
        # Build models based only on biological descriptors
        cstm_os_bio = evaluate_custom_oversampling(N_OVERSAMPLING, N_FROM, N_TO, N_STEP, distribs,
                                                   statistics,
                                                   dataset,
                                                   ENDPOINT, N_REPEATS, 1234, N_JOBS, True)
        cstm_os_bio = pd.concat([pd.Series([dataset_name] * cstm_os_bio.shape[0], name='dataset'),
                                 pd.Series(['bio'] * cstm_os_bio.shape[0], name='descriptors'),
                                 cstm_os_bio.reset_index(drop=True)],
                                axis=1)
        _ = pbar.update()
        # Build models based only on biological and molecular descriptors
        pbar.set_description(f'{dataset_name} biochem', refresh=True)
        cstm_os_biochem = evaluate_custom_oversampling_biochem(N_OVERSAMPLING, N_FROM, N_TO, N_STEP, distribs,
                                                               statistics,
                                                               dataset,
                                                               data_chem[data_chem.InChIKey != 'AOJJSUZBOXZQNB-TZSSRYMLSA-N'].drop(columns=['InChIKey']),
                                                               comp_data.drop(columns=['vDILIConcern']),
                                                               ENDPOINT, N_REPEATS, 1234, N_JOBS, True)
        cstm_os_biochem = pd.concat([pd.Series([dataset_name] * cstm_os_bio.shape[0], name='dataset'),
                                     pd.Series(['biochem'] * cstm_os_bio.shape[0], name='descriptors'),
                                     cstm_os_biochem.reset_index(drop=True)],
                                    axis=1)
        _ = pbar.update()
        # Build models based on biological, molecular descriptors, and predicted bioactivity spectra
        pbar.set_description(f'{dataset_name} biospectra', refresh=True)
        cstm_os_biospectra = evaluate_custom_oversampling_biochem(N_OVERSAMPLING, N_FROM, N_TO, N_STEP, distribs,
                                                               statistics,
                                                               dataset,
                                                               data_chem_biospectra[data_chem_biospectra.InChIKey != 'AOJJSUZBOXZQNB-TZSSRYMLSA-N'].drop(columns='InChIKey'),
                                                               comp_data_biospectra.drop(columns=['vDILIConcern']),
                                                               ENDPOINT, N_REPEATS, 1234, N_JOBS, True)
        cstm_os_biospectra = pd.concat([pd.Series([dataset_name] * cstm_os_biospectra.shape[0], name='dataset'),
                                     pd.Series(['biospectra'] * cstm_os_biospectra.shape[0], name='descriptors'),
                                     cstm_os_biospectra.reset_index(drop=True)],
                                    axis=1)
        _ = pbar.update()
        # Accumulate in a temporary list
        result.append(pd.concat([cstm_os_bio, cstm_os_biochem, cstm_os_biospectra]).reset_index(drop=True))
    
    # Concatenate all results
    all_results = pd.concat(result).reset_index()
    # Serialize models
    all_results['model'] = all_results.model.apply(serialize_xgboost_classifier)
    # Save results (including serialized models) to a feather file
    all_results.to_feather(f'{DATA_FOLDER}/DILI_complete_oversampling_analysis2024.feather')
    del result
    del all_results

Plot the custom oversampling results.

In [14]:
# Load results obtained above.
cstm_os_all = pd.read_feather(f'{DATA_FOLDER}/DILI_complete_oversampling_analysis2024.feather')

In [15]:
measures = ['MCC', 'Balanced Accuracy', 'Sensitivity', 'Specificity', 'AUC']

with sns.axes_style('white', rc={'xtick.bottom': True, 'ytick.left': True, 'font.size': 6},):
    colors = sns.color_palette('colorblind')
    for dataset in tqdm(cstm_os_all.dataset.unique()):
        for std in range(1, 6):
            fig=plt.figure(figsize=(6,10))
            axes = fig.subplots(5, 3, sharey=False, sharex=False)
            for i_column, descriptor in enumerate(['bio', 'biochem', 'biospectra']):
                mean_cstm_os_bio = (cstm_os_all.query(f'descriptors == "{descriptor}" and dataset == "{dataset}" and num_stds == {std}')
                                               .drop(['0:1', 'F1', 'AUC 0'], axis=1)
                                               .rename(columns={'AUC 1': 'AUC', 'Acc' : 'Accuracy',
                                                                'BAcc' : 'Balanced Accuracy', 'Sen' : 'Sensitivity',
                                                                'Spe' : 'Specificity'})
                                               .groupby(['samples added'], as_index=False)
                                               .agg({'MCC':['mean', 'std'], 'Accuracy':['mean', 'std'],
                                                     'Balanced Accuracy':['mean', 'std'], 'Sensitivity':['mean', 'std'],
                                                     'Specificity':['mean', 'std'], 'NPV':['mean', 'std'],
                                                     'PPV':['mean', 'std'], 'AUC':['mean', 'std']})
                                    )
                if mean_cstm_os_bio.empty:
                    continue
                mean_cstm_os_bio.columns = [f'{x} {y}' if len(y) else x for x,y in mean_cstm_os_bio.columns]
                mean_cstm_os_bio = mean_cstm_os_bio.melt(id_vars=['samples added'])

                mean_cstm_os_bio = pd.concat((mean_cstm_os_bio.iloc[:, 0],
                                              pd.DataFrame(mean_cstm_os_bio.variable.str.replace('Balanced Accuracy', 'Balanced_Accuracy').str.split(' ').tolist(),
                                                           columns=['variable_0', 'variable_1']),
                                              mean_cstm_os_bio.iloc[:, 2:]), axis=1).replace('Balanced_Accuracy', 'Balanced Accuracy')
                plot_cstm_os_bio = mean_cstm_os_bio.pivot_table(index=['samples added', 'variable_0'], columns='variable_1')
                plot_cstm_os_bio.columns = plot_cstm_os_bio.columns.droplevel().rename(None)
                plot_cstm_os_bio= plot_cstm_os_bio.reset_index().rename(columns={'variable_0':'measure'})
                plot_cstm_os_bio['min'] = plot_cstm_os_bio['mean'] - plot_cstm_os_bio['std']
                plot_cstm_os_bio['max'] = plot_cstm_os_bio['mean'] + plot_cstm_os_bio['std']
                for ax, (i_measure, measure), color in zip(axes[:, i_column], enumerate(measures), colors):
                    _ = sns.lineplot(x='samples added', y='mean', color=color,
                                 data=plot_cstm_os_bio.loc[plot_cstm_os_bio['measure'] == measure, :],
                                ax=ax,
                                linewidth = 0.5)
                    _ = ax.fill_between(x=plot_cstm_os_bio.loc[plot_cstm_os_bio['measure'] == measure, 'samples added'],
                                    y1=plot_cstm_os_bio.loc[plot_cstm_os_bio['measure'] == measure, 'min'],
                                    y2=plot_cstm_os_bio.loc[plot_cstm_os_bio['measure'] == measure, 'max'],
                                    color=color,
                                    edgecolor='none',
                                    alpha=0.3
                                   )
                    _ = ax.xaxis.set_major_locator(ticker.MultipleLocator(base=100))
                    _ = ax.xaxis.set_minor_locator(ticker.MultipleLocator(base=25))
                    _ = ax.yaxis.set_major_locator(ticker.MultipleLocator(base=0.5))
                    _ = ax.yaxis.set_minor_locator(ticker.MultipleLocator(base=0.1))
                    _ = ax.yaxis.set_major_formatter(ticker.ScalarFormatter())
                    xlabel, ylabel, title = '', '', ''

                    if i_measure == len(measures) - 1 and i_column == 1:
                        xlabel='Additional negative compounds'
                    elif i_column == 0:
                        ylabel=measure
                        if i_measure == 0:
                            title = descriptor
                    elif i_measure == 0:
                        title = descriptor
                    _ = ax.set(xlabel=xlabel, ylabel=ylabel, title=title)
                    if i_column > 0:
                        _ = ax.set_yticklabels([])
                    _ = ax.set_ylim(0,1.09)
                    _ = ax.set_xlim(0,250)
                    _ = ax.tick_params(axis='y', which='minor', length=3)
                    _ = ax.tick_params(axis='x', which='minor', length=3)
                    for ytick in ax.get_yticklines():
                        _ = ytick.set_color('grey')
            _ = fig.suptitle(f"{dataset} {std} std")
            plt.subplots_adjust(wspace=0.05)
            fig.savefig(fr'{OUT_FOLDER}/{dataset}_{std}_std.svg')
            plt.close()

  0%|          | 0/11 [00:00<?, ?it/s]

Determine the minimum number of negative sample to be added to reach a given specificity. 

In [17]:
if not os.path.exists(f'{OUT_FOLDER}/Analysis_DILI_num_samples_to_high_specificity.feather'):
    ttperfs = []
    for dataset in tqdm(['All', 'AnV', 'AnV-PI', 'GFP', 'GFP-AnV-PI', 'PI',
                         'IntegratedIntensity', 'MeanIntensity', 'DMSO', 'NormCounts', 'NumObjects']):
        for std in range(1,6):
            ttperf = []
            for descriptor in ['bio', 'biochem', 'biospectra']:
                tmp = cstm_os_all.query(f'descriptors == "{descriptor}" and dataset == "{dataset}" and num_stds == {std}')
                tmp = tmp.groupby('samples added').agg({'MCC': 'mean', 'BAcc': 'mean', 'Sen': 'mean', 'Spe': 'mean'})
                if tmp.empty or len(tmp) < 10:
                    continue
                # Baseline performance
                baseline_perf = tmp[tmp.index == tmp.index.min()].reset_index(drop=True).T.squeeze()
                # Performance at thresholds and number of additional negative samples
                num_spe70 = tmp[tmp.Spe > 0.70].iloc[:1].index
                num_spe70 = np.nan if num_spe70.empty else num_spe70.item()
                perf_spe70 = tmp[tmp.Spe > 0.70].iloc[:1].reset_index(drop=True).T.squeeze()
                if perf_spe70.empty:
                    perf_spe70 = pd.Series([None] * 4, index=['MCC', 'BAcc', 'Sen', 'Spe'])
                num_spe80 = tmp[tmp.Spe > 0.80].iloc[:1].index
                num_spe80 = np.nan if num_spe80.empty else num_spe80.item()
                perf_spe80 = tmp[tmp.Spe > 0.80].iloc[:1].reset_index(drop=True).T.squeeze()
                if perf_spe80.empty:
                    perf_spe80 = pd.Series([None] * 4, index=['MCC', 'BAcc', 'Sen', 'Spe'])
                num_spe90 = tmp[tmp.Spe > 0.90].iloc[:1].index
                num_spe90 = np.nan if num_spe90.empty else num_spe90.item()
                perf_spe90 = tmp[tmp.Spe > 0.90].iloc[:1].reset_index(drop=True).T.squeeze()
                if perf_spe90.empty:
                    perf_spe90 = pd.Series([None] * 4, index=['MCC', 'BAcc', 'Sen', 'Spe'])
                num_spe95 = tmp[tmp.Spe > 0.95].iloc[:1].index
                num_spe95 = np.nan if num_spe95.empty else num_spe95.item()
                perf_spe95 = tmp[tmp.Spe > 0.95].iloc[:1].reset_index(drop=True).T.squeeze()
                if perf_spe95.empty:
                    perf_spe95 = pd.Series([None] * 4, index=['MCC', 'BAcc', 'Sen', 'Spe'])
                # Combine
                combined = (pd.concat((baseline_perf.rename('baseline').reset_index(drop=True),
                                       pd.Series([num_spe70] * 4, name='#samples Spe > 0.70'), perf_spe70.rename('perf at Spe > 0.70').reset_index(drop=True),
                                       pd.Series([num_spe80] * 4, name='#samples Spe > 0.80'), perf_spe80.rename('perf at Spe > 0.80').reset_index(drop=True),
                                       pd.Series([num_spe90] * 4, name='#samples Spe > 0.90'), perf_spe90.rename('perf at Spe > 0.90').reset_index(drop=True),
                                       pd.Series([num_spe95] * 4, name='#samples Spe > 0.95'), perf_spe95.rename('perf at Spe > 0.95').reset_index(drop=True)),
                                      axis=1)
                              .set_index(pd.MultiIndex.from_tuples([(dataset, std, descriptor, metric)
                                                                    for metric in baseline_perf.index],
                                                                   names=["dataset", "num_stds", "descriptor", "metric"]))
                            )
                ttperf.append(combined)
            ttperfs.extend(ttperf)
    
    ttperfs = pd.concat(ttperfs)
    ttperfs.to_feather(f'{OUT_FOLDER}/Analysis_DILI_num_samples_to_high_specificity.feather')

  0%|          | 0/11 [00:00<?, ?it/s]