Skip to content

Commit

Permalink
Merge pull request #185 from NOAA-OWP/memory_analysis
Browse files Browse the repository at this point in the history
Memory analysis
  • Loading branch information
fernando-aristizabal committed Jun 14, 2024
2 parents 9d5bc26 + 462c3c7 commit 19f8c1d
Show file tree
Hide file tree
Showing 12 changed files with 79 additions and 34 deletions.
2 changes: 1 addition & 1 deletion notebooks/Catalog Tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -884,7 +884,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.10"
"version": "3.10.14"
}
},
"nbformat": 4,
Expand Down
2 changes: 1 addition & 1 deletion notebooks/Continuous Comparison Tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -710,7 +710,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.10"
"version": "3.10.14"
}
},
"nbformat": 4,
Expand Down
2 changes: 1 addition & 1 deletion notebooks/Tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1158,7 +1158,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.10"
"version": "3.10.14"
}
},
"nbformat": 4,
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ authors = [
requires-python = ">=3.8"
keywords = ["geospatial", "evaluations"]
license = {text = "MIT"}
version = "0.2.6"
version = "0.2.7"
dynamic = ["readme", "dependencies"]

[project.optional-dependencies]
Expand Down
7 changes: 3 additions & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
rioxarray>=0.13.4
dask==2023.5.0
xarray-spatial==0.3.5
dask>=2023.5.0,<2025
pandera==0.15.1
shapely==2.0.1
geocube>=0.3.3
pandas==2.0.2
odc-geo==0.4.1
pydantic==1.10.10
pydantic>=1.10.13
rio-cogeo==4.0.0
matplotlib==3.7.1
contextily==1.3.0
flox==0.7.2
xskillscore==0.0.24
pyogrio==0.7.2
pyogrio>=0.7.2,<=0.8.0
pystac-client==0.7.5
s3fs<=2023.12.1
7 changes: 6 additions & 1 deletion src/gval/catalogs/catalogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# __all__ = ['*']
__author__ = "Fernando Aristizabal"

import gc
from typing import Iterable, Optional, Callable, Tuple
import os

Expand Down Expand Up @@ -172,7 +173,7 @@ def loadxr(map, open_kwargs):
else:
raise ValueError("compare_type must be str or Callable")

# write agreement map to file
# Write agreement map to file
if (agreement_map_field is not None) & isinstance(
agreement_map, (xr.DataArray, xr.Dataset)
):
Expand All @@ -181,6 +182,10 @@ def loadxr(map, open_kwargs):
row[agreement_map_field], **agreement_map_write_kwargs
)

# Unfortunately necessary until a fix is found in xarray/rioxarray io
del candidate_map, benchmark_map, agreement_map
gc.collect()

return metrics_df

# make kwargs for dask apply
Expand Down
53 changes: 41 additions & 12 deletions src/gval/comparison/pairing_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,34 @@
from numbers import Number

import numpy as np
import numba as nb


@nb.vectorize(nopython=True)
from numba import vectorize, uint8, int32, int64, float32, float64, boolean


# Numba Type Definitions
one_param_function_types = [
uint8(uint8),
int32(int32),
int64(int64),
float32(float32),
float64(float64),
]
two_param_function_types = [
uint8(uint8, uint8),
int32(int32, int32),
int64(int64, int64),
float32(float32, float32),
float64(float64, float64),
]
not_natural_number_types = [
boolean(uint8, boolean),
boolean(int32, boolean),
int64(int64, boolean),
float32(float32, boolean),
float64(float64, boolean),
]


@vectorize(not_natural_number_types, nopython=True)
def _is_not_natural_number(
x: Number, raise_exception: bool
) -> bool: # pragma: no cover
Expand Down Expand Up @@ -49,7 +73,7 @@ def _is_not_natural_number(
return False # treated as natural for this use case

# checks for non-negative and whole number
elif (x < 0) | ((x - nb.int64(x)) != 0):
elif (x < 0) | ((x - int64(x)) != 0):
if raise_exception:
raise ValueError(
"Non natural number found (non-negative integers, excluding Inf) [0, 1, 2, 3, 4, ...)"
Expand All @@ -62,7 +86,7 @@ def _is_not_natural_number(
return False


@nb.vectorize(nopython=True)
@vectorize(two_param_function_types, nopython=True)
def cantor_pair(c: Number, b: Number) -> Number: # pragma: no cover
"""
Produces unique natural number for two non-negative natural numbers (0,1,2,...)
Expand Down Expand Up @@ -92,7 +116,7 @@ def cantor_pair(c: Number, b: Number) -> Number: # pragma: no cover
return 0.5 * (c**2 + c + 2 * c * b + 3 * b + b**2)


@nb.vectorize(nopython=True)
@vectorize(two_param_function_types, nopython=True)
def szudzik_pair(c: Number, b: Number) -> Number: # pragma: no cover
"""
Produces unique natural number for two non-negative natural numbers (0,1,2,3,...).
Expand Down Expand Up @@ -122,7 +146,7 @@ def szudzik_pair(c: Number, b: Number) -> Number: # pragma: no cover
return c**2 + c + b if c >= b else b**2 + c


@nb.vectorize(nopython=True)
@vectorize(one_param_function_types, nopython=True)
def _negative_value_transformation(x: Number) -> Number: # pragma: no cover
"""
Transforms negative values for use with pairing functions that only accept non-negative integers.
Expand All @@ -147,7 +171,7 @@ def _negative_value_transformation(x: Number) -> Number: # pragma: no cover
return 2 * x if x >= 0 else -2 * x - 1


@nb.vectorize(nopython=True)
@vectorize(two_param_function_types, nopython=True)
def cantor_pair_signed(c: Number, b: Number) -> Number: # pragma: no cover
"""
Output unique natural number for each unique combination of whole numbers using Cantor signed method.
Expand Down Expand Up @@ -177,7 +201,12 @@ def cantor_pair_signed(c: Number, b: Number) -> Number: # pragma: no cover
return cantor_pair(ct, bt)


@nb.vectorize(nopython=True)
# from typing import TypeVar
#
# T = TypeVar("T")


@vectorize(two_param_function_types, nopython=True)
def szudzik_pair_signed(c: Number, b: Number) -> Number: # pragma: no cover
"""
Output unique natural number for each unique combination of whole numbers using Szudzik signed method._summary_
Expand Down Expand Up @@ -386,10 +415,10 @@ def pairing_dict_fn(
"Value combination found not accounted for in pairing dictionary"
)

return nb.vectorize(nopython=True)(pairing_dict_fn)
return vectorize(two_param_function_types, nopython=True)(pairing_dict_fn)


@nb.vectorize(nopython=True)
@vectorize(two_param_function_types, nopython=True)
def difference(c: Number, b: Number) -> Number: # pragma: no cover
"""
Calculates the difference between candidate and benchmark.
Expand Down
14 changes: 10 additions & 4 deletions src/gval/comparison/tabulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,16 +91,20 @@ def _crosstab_2d_DataArrays(
is_dsk = True

agreement_map.name = "group"
ag_dtype = agreement_map.dtype

if is_dsk:
agreement_counts = xarray_reduce(
agreement_map,
agreement_map,
engine="numba",
expected_groups=dask.array.unique(agreement_map.data),
func="count",
)
else:
agreement_counts = xarray_reduce(agreement_map, agreement_map, func="count")
agreement_counts = xarray_reduce(
agreement_map, agreement_map, engine="numba", func="count"
)

def not_nan(number):
return not np.isnan(number)
Expand Down Expand Up @@ -129,13 +133,15 @@ def not_nan(number):
for x in filter(not_nan, agreement_counts.coords["group"].values)
],
"agreement_values": list(
filter(not_nan, agreement_counts.coords["group"].values.astype(float))
filter(
not_nan, agreement_counts.coords["group"].values.astype(ag_dtype)
)
),
"counts": [
x
for x, y in zip(
agreement_counts.values.astype(float),
agreement_counts.coords["group"].values.astype(float),
agreement_counts.values.astype(ag_dtype),
agreement_counts.coords["group"].values.astype(ag_dtype),
)
if not np.isnan(y)
],
Expand Down
12 changes: 6 additions & 6 deletions tests/cases_catalogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,15 +79,15 @@
pd.DataFrame(
{
"map_id_candidate": [
"s3://gval-test/candidate_continuous_0.tif",
"s3://gval-test/candidate_continuous_1.tif",
"s3://gval-test/candidate_continuous_1.tif",
f"{TEST_DATA_DIR}/candidate_continuous_0.tif",
f"{TEST_DATA_DIR}/candidate_continuous_1.tif",
f"{TEST_DATA_DIR}/candidate_continuous_1.tif",
],
"compare_id": ["compare1", "compare2", "compare2"],
"map_id_benchmark": [
"s3://gval-test/benchmark_continuous_0.tif",
"s3://gval-test/benchmark_continuous_1.tif",
"s3://gval-test/benchmark_continuous_1.tif",
f"{TEST_DATA_DIR}/benchmark_continuous_0.tif",
f"{TEST_DATA_DIR}/benchmark_continuous_1.tif",
f"{TEST_DATA_DIR}/benchmark_continuous_1.tif",
],
"value1_candidate": [1, 2, 2],
"value2_candidate": [5, 6, 6],
Expand Down
2 changes: 1 addition & 1 deletion tests/cases_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ def case_make_pairing_dict(
pairing_dict_fn_inputs = [
(1, 2, {(1, 2): 3}, 3),
(9, 10, {(9, 10.0): 1}, 1),
(-1, 10, {(-1, 10): np.nan}, np.nan),
(-1.0, 10.0, {(-1.0, 10.0): np.nan}, np.nan),
]


Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from gval.comparison.pairing_functions import PairingDict

# name of S3 for test data
TEST_DATA_S3_NAME = "gval-test"
TEST_DATA_S3_NAME = "gval"
TEST_DATA_DIR = f"s3://{TEST_DATA_S3_NAME}"


Expand Down
8 changes: 7 additions & 1 deletion tests/test_homogenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import xarray as xr
import numpy as np
import geopandas as gpd
from geopandas.testing import assert_geodataframe_equal

from gval.homogenize.spatial_alignment import (
_matching_crs,
Expand Down Expand Up @@ -191,7 +192,12 @@ def test_vectorize_raster_success(raster_map, expected):
vector_df = _vectorize_data(raster_data=raster_map)

assert isinstance(vector_df, gpd.GeoDataFrame)
assert vector_df.equals(expected)
assert_geodataframe_equal(
vector_df.sort_values("geometry", ignore_index=True),
expected.sort_values("geometry", ignore_index=True),
check_index_type=False,
check_dtype=False,
)


@parametrize_with_cases(
Expand Down

0 comments on commit 19f8c1d

Please sign in to comment.