Skip to content

Commit

Permalink
Off by one in find (#100)
Browse files Browse the repository at this point in the history
* fix off by one error

* fix off by one error

* add comments

---------

Co-authored-by: J.R. Angevaare <joran.angevaare@gmail.com>
  • Loading branch information
JoranAngevaare and J.R. Angevaare committed Jul 31, 2023
1 parent a2bc661 commit ab126ce
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 16 deletions.
30 changes: 16 additions & 14 deletions optim_esm_tools/analyze/find_matches.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,9 @@ def find_matches(
Returns:
list: of matches corresponding to the query
"""
log = get_logger()
if grid is not None:
get_logger().warning(
f'grid argument for find_matches is deprecated, use grid_label'
)
log.error(f'grid argument for find_matches is deprecated, use grid_label')
grid_label = grid
if max_versions is None:
max_versions = int(9e9)
Expand All @@ -88,21 +87,24 @@ def find_matches(
for candidate in variants:
folders = candidate.split(os.sep)
group = folders[-7]
member = folders[-5]
version = folders[-1]

if group not in seen:
seen[group] = defaultdict(list)
seen_members = seen[group]
if len(seen_members) < max_members or member in seen_members:
if required_file and required_file not in os.listdir(candidate):
get_logger().warning(f'{required_file} not in {candidate}')
continue
if len(seen_members.get(version, [])) == max_versions:
continue
if is_excluded(candidate):
continue
seen_members[version].append(candidate)

if (
len(seen_members) == max_versions
and len(seen_members.get(version, [])) == max_members
):
continue
if required_file and required_file not in os.listdir(candidate):
log.warning(f'{required_file} not in {candidate}')
continue
if is_excluded(candidate):
log.info(f'{candidate} is excluded')
continue
seen_members[version].append(candidate)

return [
folder
Expand Down Expand Up @@ -212,7 +214,7 @@ def associate_historical(
search['experiment_id'] = match_to
if search_kw:
search.update(search_kw)
print(search)

if query_updates is None:
query_updates = [
dict(),
Expand Down
22 changes: 21 additions & 1 deletion optim_esm_tools/analyze/time_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,23 @@ def __init__(self, data_set: xr.Dataset, calculation_kwargs=None) -> None:
def calculate_statistics(self) -> ty.Dict[str, ty.Optional[float]]:
"""
For a given dataset calculate the statistical properties of the dataset based on three tests:
1. The standard deviation w.r.t. the standard
1. The standard deviation w.r.t. the standard deviation of the piControl run
2. The p-value of the "dip test" [1]
3. The p-value of the Skewness test [2]
Citations:
[1]:
Hartigan, P. M. (1985). Computation of the Dip Statistic to Test for Unimodality.
Journal of the Royal Statistical Society. Series C (Applied Statistics), 34(3),
320-325.
Code from:
https://pypi.org/project/diptest/
[2]:
R. B. D'Agostino, A. J. Belanger and R. B. D'Agostino Jr., "A suggestion for using
powerful and informative tests of normality", American Statistician 44, pp.
316-321, 1990.
Code from:
https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.skewtest.html
Returns:
ty.Dict[ty.Optional[float]]: _description_
Expand Down Expand Up @@ -83,6 +99,10 @@ def calculate_skewtest(ds, field=None):
import scipy

values = get_values_from_data_set(ds, field, add='')
if sum(~np.isnan(values)) < 8:
# At least 8 samples are needed
oet.config.get_logger().error('Dataset too short for skewtest')
return None
return scipy.stats.skewtest(values, nan_policy='omit').pvalue


Expand Down
8 changes: 7 additions & 1 deletion optim_esm_tools/optim_esm_conf.ini
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,11 @@ excluded =
AWI AWI-CM-1-1-MR * * Oyear * gn v20181218

# This dataset contains overlapping time stamps
NCC NorESM2-MM historical r3i1p1f1 * * gn v20200702
NCC NorESM2-MM historical r3i1p1f1 * * gn v20200702

; Maybe we want to exclude this ds? It has 1's for SOS in greenland rather than NaNs?
; E3SM-Project E3SM-1-1 piControl r1i1p1f1 Oyear sos gr *


## Short datasets
; HAMMOZ-Consortium MPI-ESM-1-2-HAM ssp370 r3i1p1f1 tas * * v20191218
Expand All @@ -73,6 +77,8 @@ excluded =
; # Projection fails
; DKRZ MPI-ESM1-2-LR ssp119 r1i1p1f1 siconc * * v20210901



[log]
logging_level = WARNING

Expand Down

0 comments on commit ab126ce

Please sign in to comment.