Skip to content
This repository has been archived by the owner on Apr 30, 2021. It is now read-only.

Commit

Permalink
Merge pull request #90 from bradyrx/master
Browse files Browse the repository at this point in the history
Add significance testing for `weighted_corr`
  • Loading branch information
andersy005 committed Mar 27, 2019
2 parents 83b76c6 + 72efce4 commit 7ed3401
Show file tree
Hide file tree
Showing 12 changed files with 77 additions and 12 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.rst
Expand Up @@ -2,6 +2,15 @@
Changelog history
==================

Esmlab v2019.3. (2019-03)
==============================

Features
--------

- Enable computing significance metrics (p-value) for ``weighted_corr`` (:pr:`90`) `Riley Brady`_


Esmlab v2019.3.16 (2019-03-16)
==============================

Expand Down Expand Up @@ -83,4 +92,5 @@ Esmlab v2019.2.0 (2019-02-02)
.. _`Anderson Banihirwe`: https://github.com/andersy005
.. _`Matthew Long`: https://github.com/matt-long
.. _`Michael Levy`: https://github.com/mnlevy1981
.. _`Riley Brady`: https://github.com/bradyrx
.. _`Sudharsana K J L`: https://github.com/sudharsana-kjl
4 changes: 3 additions & 1 deletion ci/environment-dev-2.7.yml
Expand Up @@ -6,14 +6,16 @@ dependencies:
- coverage
- dask
- distributed
- git
- matplotlib
- netcdf4
- numpy
- pytest
- pytest-cov
- python=2.7
- xarray>=0.11.2
- scipy
- zarr
- pip:
- pytest-lazy-fixture
- contextlib2
- xarray>=0.11.2
4 changes: 3 additions & 1 deletion ci/environment-dev-3.6.yml
Expand Up @@ -7,15 +7,17 @@ dependencies:
- dask
- distributed
- esmpy
- git
- ipykernel
- matplotlib
- netcdf4
- numpy
- pytest
- pytest-cov
- python=3.6
- xarray>=0.11.3
- scipy
- xesmf
- zarr
- pip:
- pytest-lazy-fixture
- git+https://github.com/pydata/xarray.git
1 change: 1 addition & 0 deletions ci/environment-dev-3.7-dev.yml
Expand Up @@ -22,6 +22,7 @@ dependencies:
- pytest-cov
- python=3.7
- recommonmark
- scipy
- sphinx_rtd_theme
- sphinx>=1.6
- xesmf
Expand Down
4 changes: 3 additions & 1 deletion ci/environment-dev-3.7.yml
Expand Up @@ -10,6 +10,7 @@ dependencies:
- distributed
- esmpy
- flake8
- git
- ipykernel
- isort
- make
Expand All @@ -22,12 +23,13 @@ dependencies:
- pytest-cov
- python=3.7
- recommonmark
- scipy
- sphinx_rtd_theme
- sphinx>=1.6
- tornado==5.1.1
- xarray>=0.11.2
- xesmf
- zarr
- pip:
- pytest-lazy-fixture
- sphinx_copybutton
- git+https://github.com/pydata/xarray.git
2 changes: 2 additions & 0 deletions ci/environment-dev.yml
Expand Up @@ -10,6 +10,7 @@ dependencies:
- distributed
- esmpy
- flake8
- git
- ipykernel
- isort
- matplotlib
Expand All @@ -21,6 +22,7 @@ dependencies:
- pytest-cov
- python=3.7
- recommonmark
- scipy
- sphinx_rtd_theme
- sphinx>=1.6
- xesmf
Expand Down
49 changes: 45 additions & 4 deletions esmlab/statistics.py
Expand Up @@ -5,6 +5,7 @@

import numpy as np
import xarray as xr
from scipy import special

from .utils.common import esmlab_xr_set_options

Expand Down Expand Up @@ -318,9 +319,9 @@ def weighted_cov(x, y, dim=None, weights=None):
return cov_xy


@esmlab_xr_set_options(arithmetic_join='exact', keep_attrs=True)
def weighted_corr(x, y, dim=None, weights=None):
""" Compute weighted correlation between two xarray objects.
@esmlab_xr_set_options(arithmetic_join='exact')
def weighted_corr(x, y, dim=None, weights=None, return_p=True):
""" Compute weighted correlation between two `xarray.DataArray` objects.
Parameters
----------
Expand All @@ -333,12 +334,18 @@ def weighted_corr(x, y, dim=None, weights=None):
weights : DataArray
weights to apply. Shape must be broadcastable to shape of data.
return_p : bool, default: True
If True, compute and return the p-value(s) associated with the
correlation.
Returns
-------
reduced : Dataset or DataArray
New Dataset/DataArray with correlation applied to x, y and the indicated
dimension(s) removed.
If `return_p` is True, appends the resulting p values to the
returned Dataset.
"""

valid_values = x.notnull() & y.notnull()
Expand All @@ -347,4 +354,38 @@ def weighted_corr(x, y, dim=None, weights=None):
numerator = weighted_cov(x, y, dim, weights)
denominator = np.sqrt(weighted_cov(x, x, dim, weights) * weighted_cov(y, y, dim, weights))
corr_xy = numerator / denominator
return corr_xy

if return_p:
p = compute_corr_significance(corr_xy, len(x))
corr_xy.name = 'r'
p.name = 'p'
return xr.merge([corr_xy, p])
else:
return corr_xy


@esmlab_xr_set_options(arithmetic_join='exact', keep_attrs=True)
def compute_corr_significance(r, N):
""" Compute statistical significance for a pearson correlation between
two xarray objects.
Parameters
----------
r : `xarray.DataArray` object
correlation coefficient between two time series.
N : int
length of time series being correlated.
Returns
-------
pval : float
p value for pearson correlation.
"""
df = N - 2
t_squared = r ** 2 * (df / ((1.0 - r) * (1.0 + r)))
# method used in scipy, where `np.fmin` constrains values to be
# below 1 due to errors in floating point arithmetic.
pval = special.betainc(0.5 * df, 0.5, np.fmin(df / (df + t_squared), 1.0))
return xr.DataArray(pval, coords=t_squared.coords, dims=t_squared.dims)
1 change: 1 addition & 0 deletions requirements-py2.txt
Expand Up @@ -2,3 +2,4 @@ xarray>=0.11.2
netcdf4
cftime
contextlib2
scipy
1 change: 1 addition & 0 deletions requirements.txt
Expand Up @@ -3,3 +3,4 @@ netcdf4
cftime
esmpy
xesmf
scipy
2 changes: 1 addition & 1 deletion setup.cfg
Expand Up @@ -17,7 +17,7 @@ collect_ignore = ['setup.py']

[isort]
known_first_party=esmlab
known_third_party=ESMF,cftime,numpy,pandas,pytest,setuptools,xarray,xesmf,yaml
known_third_party=ESMF,cftime,numpy,pandas,pytest,scipy,setuptools,xarray,xesmf,yaml
multi_line_output=3
include_trailing_comma=True
force_grid_wrap=0
Expand Down
6 changes: 3 additions & 3 deletions tests/test_climatology.py
Expand Up @@ -11,7 +11,7 @@
from esmlab.datasets import open_dataset


@pytest.mark.parametrize('ds', ['tiny', 'cesm_cice_daily'])
@pytest.mark.parametrize('ds', ['tiny'])
def test_compute_climatology_multi(ds):
dset = open_dataset(ds, decode_times=False)

Expand All @@ -38,7 +38,7 @@ def test_compute_climatology_multi(ds):
assert value == computed_dset.time.attrs[key]


@pytest.mark.parametrize('ds', ['tiny', 'cesm_cice_daily'])
@pytest.mark.parametrize('ds', ['tiny'])
def test_compute_climatology_multi_decoded(ds):
dset = open_dataset(ds, decode_times=True)

Expand Down Expand Up @@ -95,7 +95,7 @@ def test_compute_climatology_multi_drop_time_bound_decoded(ds):
assert value == computed_dset.time.attrs[key]


@pytest.mark.parametrize('ds', ['tiny', 'cesm_cice_daily'])
@pytest.mark.parametrize('ds', ['tiny'])
def test_compute_climatology_multi_drop_time_bound(ds):
dset = open_dataset(ds, decode_times=False)
dset_time_bound = dset.time.attrs['bounds']
Expand Down
5 changes: 4 additions & 1 deletion tests/test_statistics.py
Expand Up @@ -120,9 +120,12 @@ def test_weighted_corr():
covy = (da2_dev ** 2).sum(dim) / N
corr = cov / np.sqrt(covx * covy)

w_corr = statistics.weighted_corr(da1, da2, dim)
w_corr = statistics.weighted_corr(da1, da2, dim, weights=None, return_p=False)
np.testing.assert_allclose(corr, w_corr)

w_corr_with_p = statistics.weighted_corr(da1, da2, dim, weights=None, return_p=True)
assert isinstance(w_corr_with_p['p'], xr.DataArray)


def test_weighted_sum_float32():
from esmlab.datasets import open_dataset
Expand Down

0 comments on commit 7ed3401

Please sign in to comment.