Handle warnings, mainly from pandas.

DHI · Mar 26, 2024 · 890788a · 890788a
1 parent f68e0e9
commit 890788a
Show file tree

Hide file tree

Showing 7 changed files with 67 additions and 46 deletions.
diff --git a/modelskill/comparison/_utils.py b/modelskill/comparison/_utils.py
@@ -65,7 +65,9 @@ def calc_metrics(group: pd.DataFrame) -> pd.Series:
         df, by = _add_dt_to_df(df, by)
 
     # sort=False to avoid re-ordering compared to original cc (also for performance)
-    res = df.groupby(by=by, observed=False, sort=False).apply(calc_metrics)
+    res = df.groupby(by=by, observed=False, sort=False, group_keys=True)[
+        ["time", "obs_val", "mod_val"]
+    ].apply(calc_metrics)
 
     if n_min:
         # nan for all cols but n

diff --git a/tests/test_comparer.py b/tests/test_comparer.py
@@ -644,9 +644,9 @@ def test_skill_freq(pc):
     assert len(sk.to_dataframe()) == 3
 
     # aggregate to 12 hours (up-sampling) doesn't interpolate
-    sk2 = pc.skill(by="freq:12H")
+    sk2 = pc.skill(by="freq:12h")
     assert len(sk2.to_dataframe()) == 9
-    assert np.isnan(sk2.to_dataframe()["rmse"][3])
+    assert np.isnan(sk2.to_dataframe().loc["2019-01-02 12:00:00", "rmse"])
 
 
 def test_xy_in_skill_pt(pc):
@@ -824,7 +824,9 @@ def test_from_matched_track_data():
             "mikeswcal5hm0": [1.22, 1.3],
         },
     )
-    assert isinstance(df.index, pd.RangeIndex) # Sometime we don't care about time only space
+    assert isinstance(
+        df.index, pd.RangeIndex
+    )  # Sometime we don't care about time only space
 
     cmp = ms.from_matched(
         data=df, obs_item="c2", mod_items="mikeswcal5hm0", x_item="lon", y_item="lat"

diff --git a/tests/test_match.py b/tests/test_match.py
@@ -26,7 +26,7 @@ def o2_gaps():
     obs = mikeio.read(fn, items=0).to_dataframe().rename(columns=dict(Hm0="obs")) + 1
     dt = pd.Timedelta(180, unit="s")
     obs.index = obs.index - dt
-    obs.index = obs.index.round("S")
+    obs.index = obs.index.round("s")
     return ms.PointObservation(obs, item=0, x=3.2760, y=51.9990, name="EPL")
 
 
@@ -40,7 +40,7 @@ def o3():
 def mr12_gaps():
     fn = "tests/testdata/SW/ts_storm_4.dfs0"
     df1 = mikeio.read(fn, items=0).to_dataframe()
-    df1 = df1.resample("2H").nearest()
+    df1 = df1.resample("2h").nearest()
     df1 = df1.rename(columns={df1.columns[0]: "mr1"})
     df2 = df1.copy().rename(columns=dict(mr1="mr2")) - 1
 
@@ -214,14 +214,16 @@ def test_small_multi_model_shifted_time_match():
     # observation has four timesteps, but only three of them are in the Simple model and three in the NotSimple model
     # the number of overlapping points for all three datasets are 2, but three if we look at the models individually
 
-    cmp1 = ms.match(obs=obs, mod=mod)
-    assert cmp1.n_points == 3
+    with pytest.warns(UserWarning):
+        cmp1 = ms.match(obs=obs, mod=mod)
+        cmp1 = ms.match(obs=obs, mod=mod)
+        assert cmp1.n_points == 3
 
-    cmp2 = ms.match(obs=obs, mod=mod2)
-    assert cmp2.n_points == 3
+        cmp2 = ms.match(obs=obs, mod=mod2)
+        assert cmp2.n_points == 3
 
-    mcmp = ms.match(obs=obs, mod=[mod, mod2])
-    assert mcmp.n_points == 2
+        mcmp = ms.match(obs=obs, mod=[mod, mod2])
+        assert mcmp.n_points == 2
 
 
 def test_matched_data_single_model():
@@ -387,7 +389,7 @@ def test_wind_directions():
             "obs": [359, 91, 181, 268],
             "mod": [0, 90, 180, 270],
         },
-        index=pd.date_range("2017-01-01", periods=4, freq="H"),
+        index=pd.date_range("2017-01-01", periods=4, freq="h"),
     )
 
     cc = ms.from_matched(
@@ -465,39 +467,39 @@ def test_mod_aux_items_overlapping_names():
 def test_multiple_obs_not_allowed_with_non_spatial_modelresults():
     o1 = ms.PointObservation(
         pd.DataFrame(
-            {"wl": [1.0, 2.0]}, index=pd.date_range("2000", freq="H", periods=2)
+            {"wl": [1.0, 2.0]}, index=pd.date_range("2000", freq="h", periods=2)
         ),
         name="o1",
         x=1,
         y=2,
     )
     o2 = ms.PointObservation(
         pd.DataFrame(
-            {"wl": [1.0, 2.0]}, index=pd.date_range("2000", freq="H", periods=2)
+            {"wl": [1.0, 2.0]}, index=pd.date_range("2000", freq="h", periods=2)
         ),
         name="o2",
         x=2,
         y=3,
     )
     m1 = ms.PointModelResult(
         pd.DataFrame(
-            {"wl": [1.0, 2.0]}, index=pd.date_range("2000", freq="H", periods=2)
+            {"wl": [1.0, 2.0]}, index=pd.date_range("2000", freq="h", periods=2)
         ),
         name="m1",
         x=1,
         y=2,
     )
     m2 = ms.PointModelResult(
         pd.DataFrame(
-            {"wl": [1.0, 2.0]}, index=pd.date_range("2000", freq="H", periods=2)
+            {"wl": [1.0, 2.0]}, index=pd.date_range("2000", freq="h", periods=2)
         ),
         name="m2",
         x=2,
         y=3,
     )
     m3 = ms.PointModelResult(
         pd.DataFrame(
-            {"wl": [1.0, 2.0]}, index=pd.date_range("2000", freq="H", periods=2)
+            {"wl": [1.0, 2.0]}, index=pd.date_range("2000", freq="h", periods=2)
         ),
         name="m3",
         x=3,

diff --git a/tests/test_multimodelcompare.py b/tests/test_multimodelcompare.py
@@ -328,12 +328,12 @@ def test_mm_scatter(cc):
 
 def cm_1(obs, model):
     """Custom metric #1"""
-    return np.mean(obs.ravel() / model.ravel())
+    return np.mean(obs / model)
 
 
 def cm_2(obs, model):
     """Custom metric #2"""
-    return np.mean(obs.ravel() * 1.5 / model.ravel())
+    return np.mean(obs * 1.5 / model)
 
 
 def cm_3(obs, model):

diff --git a/tests/test_multivariable_compare.py b/tests/test_multivariable_compare.py
@@ -89,11 +89,12 @@ def test_mv_skill(cc_1model):
     assert list(df.reset_index().observation) == cc_1model.obs_names
     assert df.index.names[0] == "observation"
     assert df.index.names[1] == "quantity"
-    assert pytest.approx(df.iloc[1].rmse) == 0.22792652  
+    assert pytest.approx(df.iloc[1].rmse) == 0.22792652
     idx = ("HKNA_wind", "Wind speed")
-    assert pytest.approx(df.loc[idx].rmse) == 1.5610323  
+    assert pytest.approx(df.loc[idx].rmse) == 1.5610323
     # spatial_interp nearest: 0.22359663 and 1.2761789
 
+
 def test_mv_mm_skill(cc):
     df = cc.skill().to_dataframe()
     assert df.index.names[0] == "model"
@@ -145,12 +146,12 @@ def test_mv_mm_scatter(cc):
 
 def cm_1(obs, model):
     """Custom metric #1"""
-    return np.mean(obs.ravel() / model.ravel())
+    return np.mean(obs / model)
 
 
 def cm_2(obs, model):
     """Custom metric #2"""
-    return np.mean(obs.ravel() * 1.5 / model.ravel())
+    return np.mean(obs * 1.5 / model)
 
 
 def test_custom_metric_skilltable_mv_mm_scatter(cc):

diff --git a/tests/test_simple_compare.py b/tests/test_simple_compare.py
@@ -16,34 +16,38 @@ def fn_obs():
 
 def test_compare(fn_obs, fn_mod):
     df_mod = mikeio.open(fn_mod).read(items=0).to_dataframe()
-    c = ms.match(fn_obs, df_mod)
+    with pytest.warns(UserWarning):
+        c = ms.match(fn_obs, df_mod)
     assert c.n_points == 67
     assert c.time[0] == datetime(2017, 10, 27, 0, 0, 0)
     assert c.time[-1] == datetime(2017, 10, 29, 18, 0, 0)
 
 
 def test_compare_mod_item(fn_obs, fn_mod):
-    c = ms.match(fn_obs, fn_mod, mod_item=0)
+    with pytest.warns(UserWarning):
+        c = ms.match(fn_obs, fn_mod, mod_item=0)
 
     # not very useful assert, but if you don't provide a model name, you'll get a default one
     assert c.mod_names[0] == "ts_storm_4"
 
 
 def test_compare_mod_item_2(fn_obs, fn_mod):
     df_mod = mikeio.open(fn_mod).read(items=[0, 1, 2]).to_dataframe()
-    c = ms.match(fn_obs, df_mod, mod_item=0)
+    with pytest.warns(UserWarning):
+        c = ms.match(fn_obs, df_mod, mod_item=0)
     assert c.n_points > 0
 
 
 def test_compare_fn(fn_obs):
-    c = ms.match(fn_obs, fn_obs)
+    c = ms.match(fn_obs, fn_obs, gtype="point")
     assert c.n_points == 95
 
 
 def test_compare_df(fn_obs, fn_mod):
     df_obs = mikeio.open(fn_obs).read().to_dataframe()
     df_mod = mikeio.open(fn_mod).read(items=0).to_dataframe()
-    c = ms.match(df_obs, df_mod)
+    with pytest.warns(UserWarning):
+        c = ms.match(df_obs, df_mod)
     assert c.n_points == 67
     assert c.time[0] == datetime(2017, 10, 27, 0, 0, 0)
     assert c.time[-1] == datetime(2017, 10, 29, 18, 0, 0)
@@ -60,34 +64,40 @@ def test_compare_fail(fn_obs, fn_mod):
     df_mod = mikeio.open(fn_mod).read(items=[0, 1, 2]).to_dataframe()
     with pytest.raises(ValueError):
         # multiple items in model df -> ambigous
-        ms.match(fn_obs, df_mod)
+        with pytest.warns(UserWarning):
+            ms.match(fn_obs, df_mod)
 
     df_obs2, fn_mod2 = df_mod, fn_obs
     with pytest.raises(ValueError):
         # multiple items in obs df -> ambigous
-        ms.match(df_obs2, fn_mod2)
+        with pytest.warns(UserWarning):
+            ms.match(df_obs2, fn_mod2)
 
 
 def test_compare_obs_item(fn_mod):
-    c = ms.match(
-        "tests/testdata/SW/eur_Hm0.dfs0", fn_mod, mod_item=0
-    )  # obs file has only 1 item, not necessary to specify obs_item
+    with pytest.warns(UserWarning):
+        c = ms.match(
+            "tests/testdata/SW/eur_Hm0.dfs0", fn_mod, mod_item=0
+        )  # obs file has only 1 item, not necessary to specify obs_item
     assert c.n_points == 67
 
     with pytest.raises(IndexError):
-        ms.match(
-            "tests/testdata/SW/eur_Hm0.dfs0", fn_mod, mod_item=0, obs_item=1
-        )  # file has only 1 item
-
-    c = ms.match(
-        "tests/testdata/SW/eur_Hm0_Quality.dfs0", fn_mod, mod_item=0, obs_item=0
-    )
+        with pytest.warns(UserWarning):
+            ms.match(
+                "tests/testdata/SW/eur_Hm0.dfs0", fn_mod, mod_item=0, obs_item=1
+            )  # file has only 1 item
+
+    with pytest.warns(UserWarning):
+        c = ms.match(
+            "tests/testdata/SW/eur_Hm0_Quality.dfs0", fn_mod, mod_item=0, obs_item=0
+        )
     assert c.n_points == 67
 
     with pytest.raises(ValueError):
-        ms.match(
-            "tests/testdata/SW/eur_Hm0_Quality.dfs0", fn_mod
-        )  # Obs file has multiple items, but we did not specify one
+        with pytest.warns(UserWarning):
+            ms.match(
+                "tests/testdata/SW/eur_Hm0_Quality.dfs0", fn_mod
+            )  # Obs file has multiple items, but we did not specify one
 
 
 def test_compare_obs_item_pointobs(fn_mod):

diff --git a/tests/test_trackcompare.py b/tests/test_trackcompare.py
@@ -178,8 +178,12 @@ def test_tiny_mod_unique(obs_tiny, mod_tiny_unique):
 
 # Currently fails as check on x, y difference is missing!
 def test_tiny_mod_xy_difference(obs_tiny_df, mod_tiny_unique):
-    obs_tiny_df.x.iloc[0] = 1.1  # difference in x larger than tolerance
-    obs_tiny_df.y.iloc[3] = 13.6  # difference in y larger than tolerance
+    obs_tiny_df.loc["2017-10-27 13:00:01", "x"] = (
+        1.1  # difference in x larger than tolerance
+    )
+    obs_tiny_df.loc["2017-10-27 13:00:03", "y"] = (
+        13.6  # difference in y larger than tolerance
+    )
     with pytest.warns(UserWarning, match="Removed 2 duplicate timestamps"):
         obs_tiny = ms.TrackObservation(
             obs_tiny_df, item="alti", x_item="x", y_item="y", keep_duplicates="first"