Skip to content

Commit

Permalink
Don't write Parquet files during etl
Browse files Browse the repository at this point in the history
I think there's something wrong with writing Parquet from our Lambda
function. Until we get that sorted, I'm reverting this because it's
holding up the data pipeline.
  • Loading branch information
esheehan-gsl committed Jun 20, 2023
1 parent 241574e commit c222ac5
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 14 deletions.
14 changes: 0 additions & 14 deletions src/unified_graphics/etl/diag.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,20 +317,6 @@ def save(session: Session, path: Union[Path, str], *args: xr.Dataset):
logger.info(f"Saving dataset to Zarr at: {path}")
ds.to_zarr(path, group=group, mode="a", consolidated=False)

parquet_path = (
Path(path)
/ ".."
/ "_".join((model, background, system, domain, frequency))
/ ds.name
)
logger.info(f"Saving dataframe to Parquet at: {parquet_path}")
prep_dataframe(ds).to_parquet(
parquet_path,
engine="pyarrow",
index=True,
partition_cols=["loop"],
)

logger.info("Saving dataset to Database")
session.commit()

Expand Down
4 changes: 4 additions & 0 deletions tests/etl/test_save.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ def test_zarr_created(self, model, dataset, zarr_file):

xr.testing.assert_equal(result, dataset)

@pytest.mark.xfail
def test_parquet_created(self, dataframe, parquet_file):
result = pd.read_parquet(
parquet_file / "ps",
Expand Down Expand Up @@ -150,6 +151,7 @@ def test_zarr(self, dataset, model, zarr_file, variable, expected):
result = xr.open_zarr(zarr_file, group=group, consolidated=False)
xr.testing.assert_equal(result, dataset[expected])

@pytest.mark.xfail
@pytest.mark.parametrize("variable,expected", (("ps", 0), ("t", 1)))
def test_parquet(self, dataframe, parquet_file, variable, expected):
result = pd.read_parquet(
Expand Down Expand Up @@ -211,6 +213,7 @@ def test_zarr(self, dataset, model, zarr_file, loop, expected):
result = xr.open_zarr(zarr_file, group=group, consolidated=False)
xr.testing.assert_equal(result, dataset[expected])

@pytest.mark.xfail
@pytest.mark.parametrize("loop,expected", (("ges", 0), ("anl", 1)))
def test_parquet(self, dataframe, parquet_file, loop, expected):
result = pd.read_parquet(
Expand Down Expand Up @@ -271,6 +274,7 @@ def test_zarr(self, dataset, model, zarr_file, init_time, expected):
result = xr.open_zarr(zarr_file, group=group, consolidated=False)
xr.testing.assert_equal(result, dataset[expected])

@pytest.mark.xfail
def test_parquet(self, dataframe, parquet_file):
result = pd.read_parquet(
parquet_file / "ps",
Expand Down

0 comments on commit c222ac5

Please sign in to comment.