From db49b7d0f053de51c22a7859fbe22cbe3e0b8d82 Mon Sep 17 00:00:00 2001 From: GilesStrong Date: Wed, 13 Oct 2021 10:56:29 +0200 Subject: [PATCH] Fix boostrap stats and add median --- CHANGES.md | 2 ++ lumin/utils/statistics.py | 31 +++++++++++++++++-------------- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 687ea61..deb89e4 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -7,6 +7,7 @@ ## Additions - `plot_feat` now shows a bar plot for categorical data +- `bootstrap_stats` added median computation ## Removals @@ -19,6 +20,7 @@ - Fixes to do with the number of batches to expect when running few number of folds than the `FoldYielder` contains. - Correctly implements leave-one-out for the training folds - renamed `n_folds` to `n_repeats` to more accurately reflect its role. +- `bootstrap_stats` corrected computation of central 68% CI: was `np.percentile(np.abs(points), 68.2)` now `(np.percentile(points, 84.135)-np.percentile(points, 15.865))/2` ## Changes diff --git a/lumin/utils/statistics.py b/lumin/utils/statistics.py index bc28333..9fb8bac 100644 --- a/lumin/utils/statistics.py +++ b/lumin/utils/statistics.py @@ -30,14 +30,15 @@ def bootstrap_stats(args:Dict[str,Any], out_q:Optional[mp.Queue]=None) -> Union[ Result dictionary if `out_q` is `None` else `None`. ''' - out_dict, mean, std, c68, boot = {}, [], [], [], [] + out_dict, mean, median, std, c68, boot = {}, [], [], [], [], [] name = '' if 'name' not in args else args['name'] weights = None if 'weights' not in args else args['weights'] - if 'n' not in args: args['n'] = 100 - if 'kde' not in args: args['kde'] = False - if 'mean' not in args: args['mean'] = False - if 'std' not in args: args['std'] = False - if 'c68' not in args: args['c68'] = False + if 'n' not in args: args['n'] = 100 + if 'kde' not in args: args['kde'] = False + if 'mean' not in args: args['mean'] = False + if 'median' not in args: args['median'] = False + if 'std' not in args: args['std'] = False + if 'c68' not in args: args['c68'] = False if args['kde'] and args['data'].dtype != 'float64': data = np.array(args['data'], dtype='float64') else: data = args['data'] len_d = len(data) @@ -49,14 +50,16 @@ def bootstrap_stats(args:Dict[str,Any], out_q:Optional[mp.Queue]=None) -> Union[ kde = KDEUnivariate(points) kde.fit() boot.append([kde.evaluate(x) for x in args['x']]) - if args['mean']: mean.append(np.mean(points)) - if args['std']: std.append(np.std(points, ddof=1)) - if args['c68']: c68.append(np.percentile(np.abs(points), 68.2)) - - if args['kde']: out_dict[f'{name}_kde'] = boot - if args['mean']: out_dict[f'{name}_mean'] = mean - if args['std']: out_dict[f'{name}_std'] = std - if args['c68']: out_dict[f'{name}_c68'] = c68 + if args['mean']: mean.append(np.mean(points)) + if args['median']: median.append(np.median(points)) + if args['std']: std.append(np.std(points, ddof=1)) + if args['c68']: c68.append((np.percentile(points, 84.135)-np.percentile(points, 15.865))/2) + + if args['kde']: out_dict[f'{name}_kde'] = boot + if args['mean']: out_dict[f'{name}_mean'] = mean + if args['median']: out_dict[f'{name}_median'] = median + if args['std']: out_dict[f'{name}_std'] = std + if args['c68']: out_dict[f'{name}_c68'] = c68 if out_q is not None: out_q.put(out_dict) else: return out_dict