In [None]:
import socket
import time
import json
from pathlib import Path

from tqdm import tqdm

import geopandas as gpd
import pandas as pd

from pyagnps import soil_data_market as sdm
from pyagnps.utils import log_to_file, get_current_time

from sqlalchemy import create_engine, text as sql_text
from sqlalchemy.orm import sessionmaker
from sqlalchemy.exc import SQLAlchemyError

In [None]:
# DATABASE SETUP
credentials = Path("../../inputs/db_credentials.json")
with open(credentials, "r") as f:
    credentials = json.load(f)

user = credentials["user"]
password = credentials["password"]
host = credentials["host"]
port = credentials["port"]
database = credentials["database"]

In [None]:
# path_to_thucs = Path(
#     "D:/AIMS/Datasets/THUCS_TopAGNPS_Delineations/40k_SM/tophuc_S_M_40000_closed_holes_with_container_thuc_merged_bbox_area_first_kept.gpkg"
# )

path_to_thucs = Path('../../inputs/thucs/tophuc_S_M_40000_closed_holes_with_container_thuc_merged_bbox_area_first_kept.gpkg')

path_to_gssurgo_gdb = Path("D:/AIMS/Datasets/Soil/gSSURGO/gSSURGO_CONUS_202210.gdb")

ssurgo_files_dir = Path('../../tmp/')

In [None]:
nodename = socket.gethostname()

In [None]:
ssurgo_files_dir.mkdir(parents=True, exist_ok=True)

In [None]:
# create a SQLAlchemy engine object
engine = create_engine(f"postgresql://{user}:{password}@{host}:{port}/{database}")

In [None]:
thucs = gpd.read_file(
    path_to_thucs
)  # GeoDataFrame containing the thucs and their geometry
thucs = thucs.sort_values(by=["bbox_area_sqkm"], ascending=False)

In [None]:
thuc_id = '1957' #'1804' '1962' '1311'

In [None]:
thucs[thucs['tophucid']==thuc_id].explore()

In [None]:
# Make sure the path exists
thucid_dir_name = f"thuc_{thuc_id}_ssurgo"
thuc_dir = ssurgo_files_dir / thucid_dir_name

if thuc_dir.exists():
    now = get_current_time()
    print(f"{now}: {nodename}: {thuc_id}: SKIPPING")
else:
    thuc_dir.mkdir(parents=True)

In [None]:
# Collect thuc cells geometry from database
try:
    now = get_current_time()
    print(f"{now}: {nodename}: {thuc_id}: Querying cells from database...")

    query = f"SELECT * FROM thuc_{thuc_id}_annagnps_cell_ids"

    with engine.connect() as conn:
        cells = gpd.read_postgis(sql=sql_text(query), con=conn, geom_col="geom")

    utm = cells.estimate_utm_crs()
    cells = cells.to_crs(utm)

except Exception as e:
    now = get_current_time()
    print(f"{now}: {nodename}: {thuc_id}: {e}")

In [None]:
cells.explore()

In [None]:
# cells.to_file(f'../../tmp/thuc_{thuc_id}_ssurgo/cells_{thuc_id}.gpkg', driver='GPKG')

In [None]:
try:
    now = get_current_time()
    print(f"{now}: {nodename}: {thuc_id}: Getting gSSURGO data")
    
    geo_soil = gpd.read_file(path_to_gssurgo_gdb, driver='OpenFileGDB', layer='MUPOLYGON', bbox=cells)
    geo_soil = geo_soil.to_crs(utm)

except Exception as e:
    now = get_current_time()
    print(f"{now}: {nodename}: {thuc_id}: {e}")

In [None]:
import folium

m = folium.Map()

folium.GeoJson(geo_soil.to_crs('epsg:4326').to_json()).add_to(m)
folium.GeoJson(cells.to_crs('epsg:4326').to_json()).add_to(m)

In [None]:
m

In [None]:
geo_soil.explore()

In [None]:
# geo_soil.to_file(f'../../tmp/thuc_{thuc_id}_ssurgo/gssurgo_{thuc_id}.gpkg', driver='GPKG')

In [None]:
cells = cells.overlay(geo_soil, how='intersection')

In [None]:
cells.head(n=3)

In [None]:
cells = cells.dissolve(by='dn').reset_index()

In [None]:
cells

In [None]:
cells = cells[['dn', 'geometry']]

In [None]:
cells = cells.rename(columns={'geometry': 'geom'})
cells = cells.set_geometry('geom')

In [None]:
cells.head(n=3)

In [None]:
# cells.to_file(f'../../tmp/thuc_{thuc_id}_ssurgo/cells_in_gssurgo_{thuc_id}.gpkg', driver='GPKG')

In [None]:
geo_soil.head()

Plurality analysis

In [None]:
cells = sdm.assign_attr_plurality_vector_layer(cells, geo_soil, attr="MUKEY", bin_id="dn")
cells = cells.rename(columns={"dn": "cell_id", "MUKEY": "soil_id"})

In [None]:
cells

Populating Soil_ID in db

In [None]:
data_to_update = cells[["cell_id", "soil_id"]].to_dict(orient="records")

In [None]:
# create a session factory
Session = sessionmaker(bind=engine)
# create a new session
session = Session()
# create a transaction
transaction = session.begin()

try:
    # execute your update query here
    query = f"UPDATE thuc_{thuc_id}_annagnps_cell_data_section SET soil_id = :soil_id WHERE cell_id = :cell_id"
    print('Populating...')
    session.execute(sql_text(query), data_to_update)
    # commit the transaction
    transaction.commit()

except SQLAlchemyError as e:
    goodsofar = False
    # rollback the transaction on error
    transaction.rollback()
    now = get_current_time()
    print(f"{now}: {nodename}: {thuc_id}: Failed to update DB, rolling back...")

finally:
    # close the session
    session.close()
    print('Done!')