In [1]:
from pathlib import Path
import json
import socket

import rasterio
import rioxarray
import geopandas as gpd

from tqdm import tqdm

from pyagnps import soil_data_market as sdm
from pyagnps.utils import log_to_file, get_current_time

from sqlalchemy import create_engine, text as sql_text
from sqlalchemy.orm import sessionmaker
from sqlalchemy.exc import SQLAlchemyError

In [2]:
# DATABASE SETUP
credentials = Path("../../inputs/db_credentials.json")
with open(credentials, "r") as f:
    credentials = json.load(f)

user = credentials["user"]
password = credentials["password"]
host = credentials["host"]
port = credentials["port"]
database = credentials["database"]

# create a SQLAlchemy engine object
engine = create_engine(f"postgresql://{user}:{password}@{host}:{port}/{database}")

nodename = socket.gethostname()

In [3]:
raster_natsgo_path = Path('D:/AIMS/Datasets/Soil/gNATSGO/gNATSGO-mukey.tif')
path_to_thucs = Path('../../inputs/thucs/tophuc_S_M_40000_closed_holes_with_container_thuc_merged_bbox_area_first_kept.gpkg')
path_to_sapolygon = Path('../../inputs/soil/gNATSGO/gNATSGO_SAPOLYGON.gpkg')

In [151]:
# thucs = gpd.read_file(
#     path_to_thucs
# )  # GeoDataFrame containing the thucs and their geometry
# thucs = thucs.sort_values(by=["bbox_area_sqkm"], ascending=True)

# runlist = thucs["tophucid"].to_list()
# runlist = ['2030']
thuc_id = '2030'

In [152]:
# Collect thuc cells geometry from database
now = get_current_time()
print(f"{now}: {nodename}: {thuc_id}: Querying cells from database...")

query = f"SELECT * FROM thuc_{thuc_id}_annagnps_cell_ids"

with engine.connect() as conn:
    cells = gpd.read_postgis(sql=sql_text(query), con=conn, geom_col="geom")

utm = cells.estimate_utm_crs()
cells = cells.to_crs(utm)


2023-06-15-14-30-27: Luc-NCCHE-XPS: 2030: Querying cells from database...


OperationalError: (psycopg2.OperationalError) SSL SYSCALL error: EOF detected

[SQL: SELECT * FROM thuc_2030_annagnps_cell_ids]
(Background on this error at: https://sqlalche.me/e/20/e3q8)

In [24]:
boundary = cells.copy(deep=True)

boundary['geom'] = boundary['geom'].buffer(0)
boundary = boundary.unary_union
boundary = gpd.GeoDataFrame(geometry=[boundary], crs=utm)

In [25]:
boundary.explore()

In [26]:
# Select SAPOLYON using boundary
sapolygon = gpd.read_file(path_to_sapolygon, rows=0)
crs_sapolygon = sapolygon.crs

In [27]:
sapolygon = gpd.read_file(path_to_sapolygon, bbox=boundary.to_crs(crs_sapolygon))

In [28]:
sapolygon.explore(column='SOURCE')

In [29]:
cells_overlay = cells.overlay(sapolygon.to_crs(utm), how='intersection')

In [30]:
# Get the cells that are not covered by SSURGO
cells_to_update = cells_overlay.loc[cells_overlay['SOURCE'] != 'SSURGO',['dn','geometry']]
cells_to_update = cells_to_update.drop_duplicates()
cells_to_update = cells_to_update.rename(columns={"geometry": "geom"})
cells_to_update = cells_to_update.set_geometry('geom')

In [31]:
cells_to_update.head(5)

Unnamed: 0,dn,geom
0,871,"POLYGON ((563528.350 3850598.629, 563528.350 3..."
1,902,"POLYGON ((563978.350 3850118.629, 563978.350 3..."
2,901,"POLYGON ((565028.350 3849818.629, 565028.350 3..."
3,881,"POLYGON ((563858.350 3849968.629, 563858.350 3..."
4,873,"POLYGON ((563498.350 3850418.629, 563498.350 3..."


Doing the plurality analysis using the raster

In [148]:
# raster = rioxarray.open_rasterio(raster_natsgo_path)
import importlib

importlib.reload(sdm);

In [149]:
cells_tmp = sdm.assign_attr_zonal_stats_raster_layer(cells_to_update, raster_natsgo_path, agg_method='majority')

In [150]:
cells_tmp.explore(column='mukey', categorical=True)

In [130]:
cells_tmp.to_file('test.gpkg', driver='GPKG')

In [56]:
from pyproj import CRS

utms = CRS.from_epsg(32612)

In [54]:
utms

<Projected CRS: EPSG:32612>
Name: WGS 84 / UTM zone 12N
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: Between 114°W and 108°W, northern hemisphere between equator and 84°N, onshore and offshore. Canada - Alberta; Northwest Territories (NWT); Nunavut; Saskatchewan. Mexico. United States (USA).
- bounds: (-114.0, 0.0, -108.0, 84.0)
Coordinate Operation:
- name: UTM zone 12N
- method: Transverse Mercator
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [57]:
UTM = cells_overlay.estimate_utm_crs()

In [52]:
UTM

<Projected CRS: EPSG:32612>
Name: WGS 84 / UTM zone 12N
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: Between 114°W and 108°W, northern hemisphere between equator and 84°N, onshore and offshore. Canada - Alberta; Northwest Territories (NWT); Nunavut; Saskatchewan. Mexico. United States (USA).
- bounds: (-114.0, 0.0, -108.0, 84.0)
Coordinate Operation:
- name: UTM zone 12N
- method: Transverse Mercator
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [58]:
UTM == utms

True