In [1]:
import socket
import time
import json
from pathlib import Path

from tqdm import tqdm

import geopandas as gpd
import pandas as pd

from pyagnps import soil_data_market as sdm
from pyagnps.utils import log_to_file, get_current_time

from sqlalchemy import create_engine, text as sql_text
from sqlalchemy.orm import sessionmaker
from sqlalchemy.exc import SQLAlchemyError

In [2]:
# DATABASE SETUP
credentials = Path("../../inputs/db_credentials.json")
with open(credentials, "r") as f:
    credentials = json.load(f)

user = credentials["user"]
password = credentials["password"]
host = credentials["host"]
port = credentials["port"]
database = credentials["database"]

In [3]:
# path_to_thucs = Path(
#     "D:/AIMS/Datasets/THUCS_TopAGNPS_Delineations/40k_SM/tophuc_S_M_40000_closed_holes_with_container_thuc_merged_bbox_area_first_kept.gpkg"
# )

path_to_thucs = Path('../../inputs/thucs/tophuc_S_M_40000_closed_holes_with_container_thuc_merged_bbox_area_first_kept.gpkg')

path_to_gssurgo_gdb = Path("D:/AIMS/Datasets/Soil/gSSURGO/gSSURGO_CONUS_202210.gdb")

ssurgo_files_dir = Path('../../tmp/')

In [4]:
nodename = socket.gethostname()

In [5]:
ssurgo_files_dir.mkdir(parents=True, exist_ok=True)

In [6]:
# create a SQLAlchemy engine object
engine = create_engine(f"postgresql://{user}:{password}@{host}:{port}/{database}")

In [7]:
thucs = gpd.read_file(
    path_to_thucs
)  # GeoDataFrame containing the thucs and their geometry
thucs = thucs.sort_values(by=["bbox_area_sqkm"], ascending=False)

In [58]:
thuc_id = '1311' #'1804' '1962'

In [59]:
thucs[thucs['tophucid']==thuc_id].explore()

In [60]:
# Make sure the path exists
thucid_dir_name = f"thuc_{thuc_id}_ssurgo"
thuc_dir = ssurgo_files_dir / thucid_dir_name

if thuc_dir.exists():
    now = get_current_time()
    print(f"{now}: {nodename}: {thuc_id}: SKIPPING")
else:
    thuc_dir.mkdir(parents=True)

2023-06-06-13-39-18: Luc-NCCHE-XPS: 1311: SKIPPING


In [61]:
# Collect thuc cells geometry from database
try:
    now = get_current_time()
    print(f"{now}: {nodename}: {thuc_id}: Querying cells from database...")

    query = f"SELECT * FROM thuc_{thuc_id}_annagnps_cell_ids"

    with engine.connect() as conn:
        cells = gpd.read_postgis(sql=sql_text(query), con=conn, geom_col="geom")

    utm = cells.estimate_utm_crs()
    cells = cells.to_crs(utm)

except Exception as e:
    now = get_current_time()
    print(f"{now}: {nodename}: {thuc_id}: {e}")

2023-06-06-13-39-22: Luc-NCCHE-XPS: 1311: Querying cells from database...


In [13]:
# cells.to_file(f'../../tmp/thuc_{thuc_id}_ssurgo/cells_{thuc_id}.gpkg', driver='GPKG')

In [64]:
try:
    now = get_current_time()
    print(f"{now}: {nodename}: {thuc_id}: Getting gSSURGO data")
    
    geo_soil = gpd.read_file(path_to_gssurgo_gdb, driver='OpenFileGDB', layer='MUPOLYGON', bbox=cells)
    geo_soil = geo_soil.to_crs(utm)

except Exception as e:
    now = get_current_time()
    print(f"{now}: {nodename}: {thuc_id}: {e}")

2023-06-06-13-39-45: Luc-NCCHE-XPS: 1311: Getting gSSURGO data


In [57]:
geo_soil.empty

True

In [66]:
geo_soil.explore()

In [53]:
geo_soil.explore()

ValueError: Location values cannot contain NaNs.

In [15]:
# geo_soil.to_file(f'../../tmp/thuc_{thuc_id}_ssurgo/gssurgo_{thuc_id}.gpkg', driver='GPKG')

In [39]:
cells = cells.overlay(geo_soil, how='intersection')

In [40]:
cells.head(n=3)

Unnamed: 0,fid,dn,AREASYMBOL,SPATIALVER,MUSYM,MUKEY,Shape_Length,Shape_Area,geometry
0,272,322,MT091,4.0,WzC,348881,3317.273474,323209.87,"POLYGON ((530988.138 5427457.019, 530966.261 5..."
1,279,302,MT091,4.0,WzC,348881,3317.273474,323209.87,"POLYGON ((531438.138 5427457.019, 531438.138 5..."
2,286,313,MT091,4.0,WzC,348881,3317.273474,323209.87,"POLYGON ((531138.138 5427457.019, 531108.138 5..."


In [41]:
cells = cells.dissolve(by='dn').reset_index()

In [42]:
cells

Unnamed: 0,dn,geometry,fid,AREASYMBOL,SPATIALVER,MUSYM,MUKEY,Shape_Length,Shape_Area
0,302,"POLYGON ((531438.138 5427427.019, 531318.138 5...",279,MT091,4.0,WzC,348881,3317.273474,3.232099e+05
1,303,"POLYGON ((530718.138 5427457.019, 530688.138 5...",300,MT091,4.0,ZwE,348884,13197.221840,2.844126e+06
2,311,"POLYGON ((531648.138 5426377.019, 531648.138 5...",336,MT091,4.0,ZwE,348884,21274.200027,2.681258e+06
3,312,"POLYGON ((531708.138 5427468.671, 531708.138 5...",325,MT091,4.0,WzC,348881,3317.273474,3.232099e+05
4,313,"MULTIPOLYGON (((531198.138 5426947.019, 531198...",286,MT091,4.0,WzC,348881,3317.273474,3.232099e+05
...,...,...,...,...,...,...,...,...,...
192,1332,"POLYGON ((528108.138 5427307.019, 528078.138 5...",293,MT091,4.0,ZwE,348884,13197.221840,2.844126e+06
193,1341,"POLYGON ((527388.138 5427194.615, 527388.138 5...",308,MT091,4.0,ZwE,348884,13197.221840,2.844126e+06
194,1342,"MULTIPOLYGON (((527658.138 5427097.019, 527628...",301,MT091,4.0,ZwE,348884,13197.221840,2.844126e+06
195,1343,"POLYGON ((527349.086 5427187.019, 527328.138 5...",295,MT091,4.0,ZwE,348884,13197.221840,2.844126e+06


In [43]:
cells = cells[['dn', 'geometry']]

In [44]:
cells = cells.rename(columns={'geometry': 'geom'})
cells = cells.set_geometry('geom')

In [45]:
cells.head(n=3)

Unnamed: 0,dn,geom
0,302,"POLYGON ((531438.138 5427427.019, 531318.138 5..."
1,303,"POLYGON ((530718.138 5427457.019, 530688.138 5..."
2,311,"POLYGON ((531648.138 5426377.019, 531648.138 5..."


In [18]:
# cells.to_file(f'../../tmp/thuc_{thuc_id}_ssurgo/cells_in_gssurgo_{thuc_id}.gpkg', driver='GPKG')

In [19]:
geo_soil.head()

Unnamed: 0,AREASYMBOL,SPATIALVER,MUSYM,MUKEY,Shape_Length,Shape_Area,geometry
0,NM029,5.0,Dc,56019,2392.877101,182292.33,"MULTIPOLYGON (((232490.640 3522468.483, 232496..."
1,NM029,5.0,Dc,56019,1498.951588,74702.515,"MULTIPOLYGON (((234142.514 3521774.397, 234084..."
2,NM029,5.0,NT,56057,15313.271566,1389715.125,"MULTIPOLYGON (((232631.687 3523184.682, 232666..."
3,NM029,5.0,NT,56057,15892.871205,3346036.38,"MULTIPOLYGON (((236152.123 3523544.621, 236134..."
4,NM029,5.0,SU,56066,19680.460042,4021947.11,"MULTIPOLYGON (((231128.600 3527649.307, 231147..."


Plurality analysis

In [20]:
cells = sdm.assign_attr_plurality_vector_layer(cells, geo_soil, attr="MUKEY", bin_id="dn")
cells = cells.rename(columns={"dn": "cell_id", "MUKEY": "soil_id"})

In [21]:
cells

Unnamed: 0,cell_id,geom,soil_id
0,1011,"POLYGON ((233860.496 3519972.753, 233920.496 3...",56007
1,1021,"POLYGON ((234220.496 3520092.753, 234280.496 3...",56073
2,1023,"POLYGON ((234340.496 3519912.753, 234370.496 3...",56073
3,1031,"POLYGON ((234430.496 3519882.753, 234370.496 3...",56073
4,1033,"POLYGON ((235240.496 3519882.753, 235270.496 3...",56066
...,...,...,...
343,3001,"POLYGON ((240370.496 3519732.753, 240460.496 3...",56056
344,3041,"POLYGON ((240460.496 3519762.753, 240700.496 3...",56056
345,3121,"POLYGON ((240850.496 3519732.753, 240820.496 3...",56056
346,3122,"POLYGON ((240730.496 3519852.753, 240760.496 3...",56056


Populating Soil_ID in db

In [22]:
data_to_update = cells[["cell_id", "soil_id"]].to_dict(orient="records")

In [24]:
# create a session factory
Session = sessionmaker(bind=engine)
# create a new session
session = Session()
# create a transaction
transaction = session.begin()

try:
    # execute your update query here
    query = f"UPDATE thuc_{thuc_id}_annagnps_cell_data_section SET soil_id = :soil_id WHERE cell_id = :cell_id"
    print('Populating...')
    session.execute(sql_text(query), data_to_update)
    # commit the transaction
    transaction.commit()

except SQLAlchemyError as e:
    goodsofar = False
    # rollback the transaction on error
    transaction.rollback()
    now = get_current_time()
    print(f"{now}: {nodename}: {thuc_id}: Failed to update DB, rolling back...")

finally:
    # close the session
    session.close()
    print('Done!')

Populating...
Done!
