Skip to content

Commit

Permalink
Add init time to parquet files (#349)
Browse files Browse the repository at this point in the history
I forgot to test for the presence of the initialization time in the
parquet output.
  • Loading branch information
esheehan-gsl committed Jun 15, 2023
1 parent 52ff32a commit 241574e
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 2 deletions.
1 change: 1 addition & 0 deletions src/unified_graphics/etl/diag.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@ def prep_dataframe(ds: xr.Dataset) -> pd.DataFrame:
df = ds.to_dataframe()

df["loop"] = ds.loop
df["initialization_time"] = datetime.fromisoformat(ds.initialization_time)

# FIXME: Clean the string columns Station_ID, Provider_Name, Subprovider_Name

Expand Down
7 changes: 5 additions & 2 deletions tests/etl/test_save.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def parquet_file(model, data_path):

def dataset_to_table(dataset: xr.Dataset) -> pd.DataFrame:
df = dataset.to_dataframe()
df["initialization_time"] = datetime.fromisoformat(dataset.initialization_time)
df["loop"] = dataset.loop

return df.astype({"loop": "category"})
Expand Down Expand Up @@ -258,7 +259,9 @@ def dataset(self, model, diag_dataset, session, zarr_file):

@pytest.fixture(scope="class")
def dataframe(self, dataset):
return pd.concat(map(dataset_to_table, dataset))
return pd.concat(map(dataset_to_table, dataset)).sort_values(
"initialization_time"
)

@pytest.mark.parametrize(
"init_time,expected", (("2022-05-05T14:00", 0), ("2022-05-05T15:00", 1))
Expand All @@ -272,7 +275,7 @@ def test_parquet(self, dataframe, parquet_file):
result = pd.read_parquet(
parquet_file / "ps",
filters=(("loop", "=", "ges"),),
)
).sort_values("initialization_time")

pd.testing.assert_frame_equal(result, dataframe)

Expand Down

0 comments on commit 241574e

Please sign in to comment.