Skip to content

Commit

Permalink
add command to audit databases
Browse files Browse the repository at this point in the history
  • Loading branch information
jkeifer committed Jun 3, 2024
1 parent 811c0e4 commit c0e9486
Show file tree
Hide file tree
Showing 3 changed files with 184 additions and 1 deletion.
168 changes: 168 additions & 0 deletions snodas/management/commands/auditdatabases.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
from collections.abc import Iterable
from dataclasses import asdict, dataclass
from datetime import date
from typing import Self

from django.conf import settings
from django.core.management.base import BaseCommand
from django.db import connection

from snodas import types
from snodas.snodas.db import get_raster_database
from snodas.snodas.fileinfo import Product, SNODASFileInfo


@dataclass
class RasterTracker:
swe: bool = False
depth: bool = False
runoff: bool = False
sublimation: bool = False
sublimation_blowing: bool = False
precip_solid: bool = False
precip_liquid: bool = False
average_temp: bool = False

@classmethod
def from_raster_db(
cls: type[Self],
rasters: Iterable[SNODASFileInfo],
) -> dict[date, Self]:
_rs: dict[date, list[SNODASFileInfo]] = {}
for raster in rasters:
try:
_rs[raster.datetime.date()].append(raster)
except KeyError:
_rs[raster.datetime.date()] = [raster]

return {
date_: cls(
**{r.product.value: True for r in rasters},
)
for date_, rasters in _rs.items()
}

@classmethod
def from_pg_rows(cls: type[Self], rows) -> dict[date, Self]:
return {
row[0]: cls(
swe=row[1],
depth=row[2],
runoff=row[3],
sublimation=row[4],
sublimation_blowing=row[5],
precip_solid=row[6],
precip_liquid=row[7],
average_temp=row[8],
)
for row in rows
}

def missing(self: Self) -> list[Product]:
return [Product(product) for product, has in asdict(self).items() if not has]


class Command(BaseCommand):
help = (
'Create the raster database for SNODAS COGs, AOI rasters, '
'and other required datasets.'
)

requires_system_checks = [] # type: ignore # noqa: RUF012
can_import_settings = True

def handle(self: Self, *_, **__) -> None:
self.raster_db = get_raster_database(settings.SNODAS_RASTERDB)
aoi_diff = self.diff_aois()
snodas_diff = self.diff_snodas()
diff = aoi_diff or snodas_diff

if diff:
print('⚠️ DATABASES DIFFER ⚠️. See above for details.') # noqa: T201
else:
print('✅ DATABASES MATCH ✅. Congratulations.') # noqa: T201

def diff_aois(self: Self) -> bool:
raster_db_aois = [aoi.station_triplet for aoi in self.raster_db.aoi_rasters()]
pg_aois = self.get_pg_aois()

raster_db_aoi_set: set[types.StationTriplet] = set()
for triplet in raster_db_aois:
if triplet in raster_db_aoi_set:
print(f"raster db: duplicate AOI '{triplet}'") # noqa: T201
else:
raster_db_aoi_set.add(triplet)

pg_aoi_set: set[types.StationTriplet] = set()
for triplet in pg_aois:
if triplet in pg_aoi_set:
print(f"postgres: duplicate AOI '{triplet}'") # noqa: T201
else:
pg_aoi_set.add(triplet)

aoi_diff: bool = False
for triplet in pg_aoi_set - raster_db_aoi_set:
aoi_diff = True
print(f"raster db: missing AOI '{triplet}'") # noqa: T201

for triplet in raster_db_aoi_set - pg_aoi_set:
aoi_diff = True
print(f"postgres: missing AOI '{triplet}'") # noqa: T201

if not aoi_diff:
print('raster db and postgres have same AOI sets') # noqa: T201

return aoi_diff

def diff_snodas(self: Self) -> bool:
rdb_rasters = RasterTracker.from_raster_db(
self.raster_db.snodas_rasters(),
)
pg_rasters = RasterTracker.from_pg_rows(self.get_pg_rasters())

for date_, raster in rdb_rasters.items():
for missing in raster.missing():
print(f"raster db: date {date_} missing '{missing}'") # noqa: T201

for date_, raster in pg_rasters.items():
for missing in raster.missing():
print(f"postgres: date {date_} missing '{missing}'") # noqa: T201

raster_diff: bool = False
for date_ in set(pg_rasters) - set(rdb_rasters):
raster_diff = True
print(f"raster db: missing raster '{date_}'") # noqa: T201

for date_ in set(rdb_rasters) - set(pg_rasters):
raster_diff = True
print(f"postgres: missing raster '{date_}'") # noqa: T201

return raster_diff

@staticmethod
def get_pg_aois() -> list[types.StationTriplet]:
with connection.cursor() as cursor:
cursor.execute(
'select awdb_id from pourpoint.pourpoint ' 'where polygon is not null',
)
return [types.StationTriplet(row[0]) for row in cursor.fetchall()]

@staticmethod
def get_pg_rasters():
with connection.cursor() as cursor:
cursor.execute(
"""
select
date,
swe is not null,
depth is not null,
runoff is not null,
sublimation is not null,
sublimation_blowing is not null,
precip_solid is not null,
precip_liquid is not null,
average_temp is not null
from snodas.raster
""",
)
return cursor.fetchall()
9 changes: 8 additions & 1 deletion snodas/snodas/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from snodas.snodas import constants
from snodas.snodas.aoi import AOI
from snodas.snodas.coordinates import Pixel, Tile
from snodas.snodas.fileinfo import Product
from snodas.snodas.fileinfo import Product, SNODASFileInfo
from snodas.snodas.input_rasters import SNODASInputRasterSet
from snodas.snodas.raster import DEM, AOIRaster, AreaRaster

Expand Down Expand Up @@ -385,6 +385,13 @@ def import_snodas_rasters(
for raster in rasters:
raster.write_cog(output_dir=output_dir, force=force)

def aoi_rasters(self: Self) -> Iterator[AOIRaster]:
yield from (AOIRaster.open(path) for path in self._aoi_rasters.glob('*.tif'))

def snodas_rasters(self: Self) -> Iterator[SNODASFileInfo]:
for date_dir in self._cogs.iterdir():
yield from (SNODASFileInfo(path) for path in date_dir.glob('*.tif'))


@cache
def get_raster_database(path: Path) -> RasterDatabase:
Expand Down
8 changes: 8 additions & 0 deletions snodas/snodas/raster.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from osgeo import gdal

from snodas import types
from snodas.snodas.constants import SNODAS_ORIGIN_TILE, TILE_PREFIX
from snodas.snodas.coordinates import Pixel, Tile

Expand Down Expand Up @@ -65,12 +66,17 @@ def __init__(self: Self, fileinfo: SNODASFileInfo) -> None:

@dataclass
class AOIRaster:
path: Path
array: numpy.typing.NDArray[numpy.float32]
intersected_tiles: list[Tile]
origin: Pixel
min_elevation: float
max_elevation: float

@property
def station_triplet(self: Self) -> types.StationTriplet:
return types.StationTriplet(self.path.stem.replace('_', ':'))

@classmethod
def open(
cls: type[Self],
Expand Down Expand Up @@ -99,6 +105,7 @@ def open(
del ds

return cls(
path=path,
array=array,
intersected_tiles=intersected_tiles,
origin=origin,
Expand Down Expand Up @@ -146,6 +153,7 @@ def from_aoi_raster(
aoi_raster.load_raster_tiles_into_array(area_raster, area)
return cls(
area=area,
path=aoi_raster.path,
array=aoi_raster.array,
intersected_tiles=aoi_raster.intersected_tiles,
origin=aoi_raster.origin,
Expand Down

0 comments on commit c0e9486

Please sign in to comment.