In [1]:
import chemprop
import os
import numpy as np
import pandas as pd

def get_evidential_unc(evidential_dir: str,
                       save_path: str,
                       num_folds: int = 5,
                       add_ground_truth: bool = True):
    for fold_i in range(num_folds):
        cp_dir = os.path.join(evidential_dir, f'fold_{fold_i}', f'model_0')
        pred_data_path = os.path.join(evidential_dir, f'fold_{fold_i}', f'test_full.csv')
        pred_feature_path = os.path.join(evidential_dir, f'fold_{fold_i}', 'test_features.csv')
        arguments = [
                    '--test_path', pred_data_path,
                    '--preds_path', '/dev/null',
                    '--checkpoint_dir', cp_dir,
                    '--uncertainty_method', 'evidential_epistemic',
                    '--features_path', pred_feature_path
                ]

        args = chemprop.args.PredictArgs().parse_args(arguments)
        preds_epi = chemprop.train.make_predictions(args=args, return_uncertainty=True)
        args.uncertainty_method = 'evidential_aleatoric'
        preds_ale = chemprop.train.make_predictions(args=args, return_uncertainty=True)
        ale = np.array(preds_ale)[1:2, :, :]
        pred_fold = np.transpose(np.squeeze(np.concatenate([preds_epi, ale], axis=0)))
        df_names = ['pred', 'evidential_epistemic', 'evidential_aleatoric']
        evi_df = pd.DataFrame(data=pred_fold, columns=df_names)
        if add_ground_truth:
            ground_truth = pd.read_csv(pred_data_path)
            evi_df = pd.concat([ground_truth, evi_df], axis=1)
        evi_df.to_csv(f'{save_path}/evidential_fold_{fold_i}_pred.csv', index=False)

In [2]:
%%capture

cp_path = os.path.join('../trained_models', 'bimol01')
save_path = 'bimol01'
if not os.path.exists(save_path):
    os.makedirs(save_path)
get_evidential_unc(evidential_dir=cp_path, save_path=save_path)