Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

first attempt at adding basic bias correction code and fixing tests #2

Merged
merged 16 commits into from
Dec 24, 2020
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions dodola/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,19 @@ def dodola_cli():
@dodola_cli.command(help="Bias-correct GCM on observations")
@click.argument("x", required=True)
@click.argument("xtrain", required=True)
@click.argument("trainvariable", required=True)
@click.argument("ytrain", required=True)
@click.argument("out", required=True)
def biascorrect(x, xtrain, ytrain, out):
@click.argument("outvariable", required=True)
def biascorrect(x, xtrain, trainvariable, ytrain, out, outvariable):
"""Bias-correct GCM (x) to 'out' based on model (xtrain), obs (ytrain)"""
storage = GcsRepository()
services.bias_correct(x, xtrain, ytrain, out, storage)
services.bias_correct(
x,
xtrain,
ytrain,
out,
storage,
train_variable=trainvariable,
out_variable=outvariable,
)
42 changes: 36 additions & 6 deletions dodola/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,47 @@
Math stuff and business logic goes here. This is the "business logic".
"""

# import numpy as np
# import xarray as xr
# from skdownscale.pointwise_models import PointWiseDownscaler, BcsdTemperature
import numpy as np
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think can remove numpy and xarray imports - they're not used.

import xarray as xr
from skdownscale.pointwise_models import PointWiseDownscaler, BcsdTemperature

# Break this down into a submodule(s) if needed.
# Assume data input here is generally clean and valid.


def climatenerdlogic(*args):
raise NotImplementedError
def bias_correct_bcsd(
gcm_training_ds, obs_training_ds, gcm_predict_ds, train_variable, out_variable
):

"""Bias correct input model data using BCSD method,
using either monthly or +/- 15 day time grouping.

Parameters
----------
gcm_training_ds : Dataset
training model data for building quantile map
obs_training_ds : Dataset
observation data for building quantile map
gcm_predict_ds : Dataset
future model data to be bias corrected
predicted : Dataset
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this "predicted" arg was renamed or removed. If so I'd remove the two docstr lines for predicted

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good catch - it should be ds_predicted instead of predicted, thanks!

bias corrected future model data
train_variable : str
variable name used in training data
out_variable : str
variable name used in downscaled output
"""

# note that time_grouper='daily_nasa-nex' is what runs the
# NASA-NEX version of daily BCSD
# TO-DO: switch to NASA-NEX version once tests pass
model = PointWiseDownscaler(BcsdTemperature(return_anoms=False))
model.fit(gcm_training_ds[train_variable], obs_training_ds[train_variable])
predicted = model.predict(gcm_predict_ds[train_variable]).load()
ds_predicted = predicted.to_dataset(name=out_variable)
return ds_predicted


def morenerdymathstuff(*args):
raise NotImplementedError
# TO-DO: implement additional bias correction functionality
return None
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you're set to just remove morenerdymathstuff() alltogether. 👍

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

agreed - done 👍

12 changes: 8 additions & 4 deletions dodola/services.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
"""Used by the CLI or any UI to deliver services to our lovely users
"""

from dodola.core import climatenerdlogic
from dodola.core import bias_correct_bcsd
from dodola.repository import GcsRepository


def bias_correct(x, x_train, y_train, out, storage):
def bias_correct(x, x_train, train_variable, y_train, out, out_variable, storage):
"""Bias correct input model data with IO to storage

Parameters
Expand All @@ -15,11 +15,15 @@ def bias_correct(x, x_train, y_train, out, storage):
x_train : str
Storage URL to input biased data to use for training bias-correction
model.
train_variable : str
Variable name used in training and obs data.
y_train : str
Storage URL to input 'true' data or observations to use for training
bias-correction model.
out : str
Storage URL to write bias-corrected output to.
out_variable : str
Variable name used as output variable name.
storage : RepositoryABC-like
Storage abstraction for data IO.
"""
Expand All @@ -28,8 +32,8 @@ def bias_correct(x, x_train, y_train, out, storage):
gcm_predict_ds = storage.read(x)

# This is all made up demo. Just get the output dataset the user expects.
bias_corrected_ds = climatenerdlogic(
gcm_training_ds, obs_training_ds, gcm_predict_ds
bias_corrected_ds = bias_correct_bcsd(
gcm_training_ds, obs_training_ds, gcm_predict_ds, train_variable, out_variable
)

storage.write(out, bias_corrected_ds)
Expand Down
37 changes: 26 additions & 11 deletions dodola/tests/test_services.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import numpy as np
import xarray as xr
import pandas as pd
from dodola.services import bias_correct
from dodola.repository import FakeRepository

Expand All @@ -13,14 +14,15 @@ def _datafactory(x, start_time="1950-01-01"):
if x.ndim != 1:
raise ValueError("'x' needs dim of one")

time = pd.date_range(start=start_time, freq="D", periods=len(x))
brews marked this conversation as resolved.
Show resolved Hide resolved

out = xr.Dataset(
{"fakevariable": (["lon", "lat", "time"], x[np.newaxis, np.newaxis, :])},
{"fakevariable": (["time", "lon", "lat"], x[:, np.newaxis, np.newaxis])},
coords={
"index": time,
"time": time,
"lon": (["lon"], [1.0]),
"lat": (["lat"], [1.0]),
"time": xr.cftime_range(
start=start_time, freq="D", periods=len(x), calendar="noleap"
),
},
)
return out
Expand All @@ -29,14 +31,15 @@ def _datafactory(x, start_time="1950-01-01"):
def test_bias_correct_basic_call():
"""Simple integration test of bias_correct service"""
# Setup input data.
n_years = 5
n_years = 10
n = n_years * 365 # need daily data...

# Our "biased model".
model_bias = 10
x_train = _datafactory(np.arange(0, n) + model_bias)
model_bias = 2
ts = np.sin(np.linspace(-10 * np.pi, 10 * np.pi, n)) * 0.5
x_train = _datafactory((ts + model_bias))
# True "observations".
y_train = _datafactory(np.arange(0, n))
y_train = _datafactory(ts)
# Yes, we're testing and training on the same data...
x_test = x_train.copy(deep=True)

Expand All @@ -58,11 +61,23 @@ def test_bias_correct_basic_call():
bias_correct(
forecast_model_key,
x_train=training_model_key,
train_variable="fakevariable",
y_train=training_obs_key,
out=output_key,
out_variable="fakevariable",
storage=fakestorage,
)

# Our testing model forecast is identical to our training model data so model
# forecast should equal obsvations we tuned to.
xr.testing.assert_equal(fakestorage.storage[output_key], y_train)
# We can't just test for removal of bias here since quantile mapping
# and adding in trend are both components of bias correction,
# so testing head and tail values instead
head_vals = np.array([-0.08129293, -0.07613746, -0.0709855, -0.0658377, -0.0606947])
tail_vals = np.array([0.0520793, 0.06581804, 0.07096781, 0.07612168, 0.08127902])
np.testing.assert_almost_equal(
fakestorage.storage[output_key]["fakevariable"].squeeze(drop=True).values[:5],
head_vals,
)
np.testing.assert_almost_equal(
fakestorage.storage[output_key]["fakevariable"].squeeze(drop=True).values[-5:],
tail_vals,
)
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
click
cftime
numpy
scikit-downscale
git+https://github.com/dgergel/xsd@pointwisedownscaler_interimfix
xarray
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
include_package_data=True,
zip_safe=False,
python_requires=">=3.7",
install_requires=requirements,
# install_requires=requirements,
brews marked this conversation as resolved.
Show resolved Hide resolved
setup_requires=["setuptools_scm"],
entry_points="""
[console_scripts]
Expand Down