Skip to content

Commit

Permalink
Handle warnings, mainly from pandas.
Browse files Browse the repository at this point in the history
  • Loading branch information
ecomodeller committed Mar 26, 2024
1 parent f68e0e9 commit 890788a
Show file tree
Hide file tree
Showing 7 changed files with 67 additions and 46 deletions.
4 changes: 3 additions & 1 deletion modelskill/comparison/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,9 @@ def calc_metrics(group: pd.DataFrame) -> pd.Series:
df, by = _add_dt_to_df(df, by)

# sort=False to avoid re-ordering compared to original cc (also for performance)
res = df.groupby(by=by, observed=False, sort=False).apply(calc_metrics)
res = df.groupby(by=by, observed=False, sort=False, group_keys=True)[
["time", "obs_val", "mod_val"]
].apply(calc_metrics)

if n_min:
# nan for all cols but n
Expand Down
8 changes: 5 additions & 3 deletions tests/test_comparer.py
Original file line number Diff line number Diff line change
Expand Up @@ -644,9 +644,9 @@ def test_skill_freq(pc):
assert len(sk.to_dataframe()) == 3

# aggregate to 12 hours (up-sampling) doesn't interpolate
sk2 = pc.skill(by="freq:12H")
sk2 = pc.skill(by="freq:12h")
assert len(sk2.to_dataframe()) == 9
assert np.isnan(sk2.to_dataframe()["rmse"][3])
assert np.isnan(sk2.to_dataframe().loc["2019-01-02 12:00:00", "rmse"])


def test_xy_in_skill_pt(pc):
Expand Down Expand Up @@ -824,7 +824,9 @@ def test_from_matched_track_data():
"mikeswcal5hm0": [1.22, 1.3],
},
)
assert isinstance(df.index, pd.RangeIndex) # Sometime we don't care about time only space
assert isinstance(
df.index, pd.RangeIndex
) # Sometime we don't care about time only space

cmp = ms.from_matched(
data=df, obs_item="c2", mod_items="mikeswcal5hm0", x_item="lon", y_item="lat"
Expand Down
30 changes: 16 additions & 14 deletions tests/test_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def o2_gaps():
obs = mikeio.read(fn, items=0).to_dataframe().rename(columns=dict(Hm0="obs")) + 1
dt = pd.Timedelta(180, unit="s")
obs.index = obs.index - dt
obs.index = obs.index.round("S")
obs.index = obs.index.round("s")
return ms.PointObservation(obs, item=0, x=3.2760, y=51.9990, name="EPL")


Expand All @@ -40,7 +40,7 @@ def o3():
def mr12_gaps():
fn = "tests/testdata/SW/ts_storm_4.dfs0"
df1 = mikeio.read(fn, items=0).to_dataframe()
df1 = df1.resample("2H").nearest()
df1 = df1.resample("2h").nearest()
df1 = df1.rename(columns={df1.columns[0]: "mr1"})
df2 = df1.copy().rename(columns=dict(mr1="mr2")) - 1

Expand Down Expand Up @@ -214,14 +214,16 @@ def test_small_multi_model_shifted_time_match():
# observation has four timesteps, but only three of them are in the Simple model and three in the NotSimple model
# the number of overlapping points for all three datasets are 2, but three if we look at the models individually

cmp1 = ms.match(obs=obs, mod=mod)
assert cmp1.n_points == 3
with pytest.warns(UserWarning):
cmp1 = ms.match(obs=obs, mod=mod)
cmp1 = ms.match(obs=obs, mod=mod)
assert cmp1.n_points == 3

cmp2 = ms.match(obs=obs, mod=mod2)
assert cmp2.n_points == 3
cmp2 = ms.match(obs=obs, mod=mod2)
assert cmp2.n_points == 3

mcmp = ms.match(obs=obs, mod=[mod, mod2])
assert mcmp.n_points == 2
mcmp = ms.match(obs=obs, mod=[mod, mod2])
assert mcmp.n_points == 2


def test_matched_data_single_model():
Expand Down Expand Up @@ -387,7 +389,7 @@ def test_wind_directions():
"obs": [359, 91, 181, 268],
"mod": [0, 90, 180, 270],
},
index=pd.date_range("2017-01-01", periods=4, freq="H"),
index=pd.date_range("2017-01-01", periods=4, freq="h"),
)

cc = ms.from_matched(
Expand Down Expand Up @@ -465,39 +467,39 @@ def test_mod_aux_items_overlapping_names():
def test_multiple_obs_not_allowed_with_non_spatial_modelresults():
o1 = ms.PointObservation(
pd.DataFrame(
{"wl": [1.0, 2.0]}, index=pd.date_range("2000", freq="H", periods=2)
{"wl": [1.0, 2.0]}, index=pd.date_range("2000", freq="h", periods=2)
),
name="o1",
x=1,
y=2,
)
o2 = ms.PointObservation(
pd.DataFrame(
{"wl": [1.0, 2.0]}, index=pd.date_range("2000", freq="H", periods=2)
{"wl": [1.0, 2.0]}, index=pd.date_range("2000", freq="h", periods=2)
),
name="o2",
x=2,
y=3,
)
m1 = ms.PointModelResult(
pd.DataFrame(
{"wl": [1.0, 2.0]}, index=pd.date_range("2000", freq="H", periods=2)
{"wl": [1.0, 2.0]}, index=pd.date_range("2000", freq="h", periods=2)
),
name="m1",
x=1,
y=2,
)
m2 = ms.PointModelResult(
pd.DataFrame(
{"wl": [1.0, 2.0]}, index=pd.date_range("2000", freq="H", periods=2)
{"wl": [1.0, 2.0]}, index=pd.date_range("2000", freq="h", periods=2)
),
name="m2",
x=2,
y=3,
)
m3 = ms.PointModelResult(
pd.DataFrame(
{"wl": [1.0, 2.0]}, index=pd.date_range("2000", freq="H", periods=2)
{"wl": [1.0, 2.0]}, index=pd.date_range("2000", freq="h", periods=2)
),
name="m3",
x=3,
Expand Down
4 changes: 2 additions & 2 deletions tests/test_multimodelcompare.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,12 +328,12 @@ def test_mm_scatter(cc):

def cm_1(obs, model):
"""Custom metric #1"""
return np.mean(obs.ravel() / model.ravel())
return np.mean(obs / model)


def cm_2(obs, model):
"""Custom metric #2"""
return np.mean(obs.ravel() * 1.5 / model.ravel())
return np.mean(obs * 1.5 / model)


def cm_3(obs, model):
Expand Down
9 changes: 5 additions & 4 deletions tests/test_multivariable_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,12 @@ def test_mv_skill(cc_1model):
assert list(df.reset_index().observation) == cc_1model.obs_names
assert df.index.names[0] == "observation"
assert df.index.names[1] == "quantity"
assert pytest.approx(df.iloc[1].rmse) == 0.22792652
assert pytest.approx(df.iloc[1].rmse) == 0.22792652
idx = ("HKNA_wind", "Wind speed")
assert pytest.approx(df.loc[idx].rmse) == 1.5610323
assert pytest.approx(df.loc[idx].rmse) == 1.5610323
# spatial_interp nearest: 0.22359663 and 1.2761789


def test_mv_mm_skill(cc):
df = cc.skill().to_dataframe()
assert df.index.names[0] == "model"
Expand Down Expand Up @@ -145,12 +146,12 @@ def test_mv_mm_scatter(cc):

def cm_1(obs, model):
"""Custom metric #1"""
return np.mean(obs.ravel() / model.ravel())
return np.mean(obs / model)


def cm_2(obs, model):
"""Custom metric #2"""
return np.mean(obs.ravel() * 1.5 / model.ravel())
return np.mean(obs * 1.5 / model)


def test_custom_metric_skilltable_mv_mm_scatter(cc):
Expand Down
50 changes: 30 additions & 20 deletions tests/test_simple_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,34 +16,38 @@ def fn_obs():

def test_compare(fn_obs, fn_mod):
df_mod = mikeio.open(fn_mod).read(items=0).to_dataframe()
c = ms.match(fn_obs, df_mod)
with pytest.warns(UserWarning):
c = ms.match(fn_obs, df_mod)
assert c.n_points == 67
assert c.time[0] == datetime(2017, 10, 27, 0, 0, 0)
assert c.time[-1] == datetime(2017, 10, 29, 18, 0, 0)


def test_compare_mod_item(fn_obs, fn_mod):
c = ms.match(fn_obs, fn_mod, mod_item=0)
with pytest.warns(UserWarning):
c = ms.match(fn_obs, fn_mod, mod_item=0)

# not very useful assert, but if you don't provide a model name, you'll get a default one
assert c.mod_names[0] == "ts_storm_4"


def test_compare_mod_item_2(fn_obs, fn_mod):
df_mod = mikeio.open(fn_mod).read(items=[0, 1, 2]).to_dataframe()
c = ms.match(fn_obs, df_mod, mod_item=0)
with pytest.warns(UserWarning):
c = ms.match(fn_obs, df_mod, mod_item=0)
assert c.n_points > 0


def test_compare_fn(fn_obs):
c = ms.match(fn_obs, fn_obs)
c = ms.match(fn_obs, fn_obs, gtype="point")
assert c.n_points == 95


def test_compare_df(fn_obs, fn_mod):
df_obs = mikeio.open(fn_obs).read().to_dataframe()
df_mod = mikeio.open(fn_mod).read(items=0).to_dataframe()
c = ms.match(df_obs, df_mod)
with pytest.warns(UserWarning):
c = ms.match(df_obs, df_mod)
assert c.n_points == 67
assert c.time[0] == datetime(2017, 10, 27, 0, 0, 0)
assert c.time[-1] == datetime(2017, 10, 29, 18, 0, 0)
Expand All @@ -60,34 +64,40 @@ def test_compare_fail(fn_obs, fn_mod):
df_mod = mikeio.open(fn_mod).read(items=[0, 1, 2]).to_dataframe()
with pytest.raises(ValueError):
# multiple items in model df -> ambigous
ms.match(fn_obs, df_mod)
with pytest.warns(UserWarning):
ms.match(fn_obs, df_mod)

df_obs2, fn_mod2 = df_mod, fn_obs
with pytest.raises(ValueError):
# multiple items in obs df -> ambigous
ms.match(df_obs2, fn_mod2)
with pytest.warns(UserWarning):
ms.match(df_obs2, fn_mod2)


def test_compare_obs_item(fn_mod):
c = ms.match(
"tests/testdata/SW/eur_Hm0.dfs0", fn_mod, mod_item=0
) # obs file has only 1 item, not necessary to specify obs_item
with pytest.warns(UserWarning):
c = ms.match(
"tests/testdata/SW/eur_Hm0.dfs0", fn_mod, mod_item=0
) # obs file has only 1 item, not necessary to specify obs_item
assert c.n_points == 67

with pytest.raises(IndexError):
ms.match(
"tests/testdata/SW/eur_Hm0.dfs0", fn_mod, mod_item=0, obs_item=1
) # file has only 1 item

c = ms.match(
"tests/testdata/SW/eur_Hm0_Quality.dfs0", fn_mod, mod_item=0, obs_item=0
)
with pytest.warns(UserWarning):
ms.match(
"tests/testdata/SW/eur_Hm0.dfs0", fn_mod, mod_item=0, obs_item=1
) # file has only 1 item

with pytest.warns(UserWarning):
c = ms.match(
"tests/testdata/SW/eur_Hm0_Quality.dfs0", fn_mod, mod_item=0, obs_item=0
)
assert c.n_points == 67

with pytest.raises(ValueError):
ms.match(
"tests/testdata/SW/eur_Hm0_Quality.dfs0", fn_mod
) # Obs file has multiple items, but we did not specify one
with pytest.warns(UserWarning):
ms.match(
"tests/testdata/SW/eur_Hm0_Quality.dfs0", fn_mod
) # Obs file has multiple items, but we did not specify one


def test_compare_obs_item_pointobs(fn_mod):
Expand Down
8 changes: 6 additions & 2 deletions tests/test_trackcompare.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,12 @@ def test_tiny_mod_unique(obs_tiny, mod_tiny_unique):

# Currently fails as check on x, y difference is missing!
def test_tiny_mod_xy_difference(obs_tiny_df, mod_tiny_unique):
obs_tiny_df.x.iloc[0] = 1.1 # difference in x larger than tolerance
obs_tiny_df.y.iloc[3] = 13.6 # difference in y larger than tolerance
obs_tiny_df.loc["2017-10-27 13:00:01", "x"] = (
1.1 # difference in x larger than tolerance
)
obs_tiny_df.loc["2017-10-27 13:00:03", "y"] = (
13.6 # difference in y larger than tolerance
)
with pytest.warns(UserWarning, match="Removed 2 duplicate timestamps"):
obs_tiny = ms.TrackObservation(
obs_tiny_df, item="alti", x_item="x", y_item="y", keep_duplicates="first"
Expand Down

0 comments on commit 890788a

Please sign in to comment.