In [4]:
from pathlib import Path
import json
import socket

import pandas as pd
import geopandas as gpd

from tqdm import tqdm

from pyagnps import soil_data_market as sdm

from sqlalchemy import URL, create_engine, text as sql_text
from sqlalchemy.orm import sessionmaker
from sqlalchemy.exc import SQLAlchemyError

In [3]:
path_to_thucs = Path(
    "C:/Users/Luc/projects/aims/THUCS/tophuc_S_M_40000_closed_holes_with_container_thuc_merged_bbox_area_first_kept.gpkg"
)

thucs = gpd.read_file(path_to_thucs)

In [2]:
# DATABASE SETUP
credentials = Path("../../inputs/db_credentials.json")
with open(credentials, "r") as f:
    credentials = json.load(f)

user = credentials["user"]
password = credentials["password"]
host = credentials["host"]
port = credentials["port"]
database = credentials["database"]

url_object = URL.create(
    "postgresql",
    username=user,
    password=password,
    host=host,
    port=port,
    database=database
)

# create a SQLAlchemy engine object
engine = create_engine(url_object)

nodename = socket.gethostname()

In [23]:
for _, tuc in tqdm(thucs.iterrows(), total=thucs.shape[0]):

    thuc_id = tuc["tophucid"]

    if thuc_id == '4463':
        continue

    try:
        with engine.connect() as connection:
            query = f"""
                UPDATE thuc_{thuc_id}_annagnps_cell_data_section
                SET mgmt_field_id = CASE
                    WHEN mgmt_field_id = 'Open_Water' THEN 'Water'
                    WHEN mgmt_field_id = 'Background' THEN 'Nonag_Undefined'
                    WHEN mgmt_field_id = 'Perennial_Ice_Snow' THEN 'Water'
                    WHEN mgmt_field_id = 'Woody_Wetlands' THEN 'Wetlands'
                    WHEN cdl_value_2022 = -1 THEN 'Nonag_Undefined'
                    WHEN cdl_value_2022 IS NULL THEN 'Nonag_Undefined'
                    ELSE mgmt_field_id  -- Keep original value if no match
                END;

                UPDATE thuc_{thuc_id}_annagnps_cell_data_section
                SET cdl_value_2022 = -1 
                WHERE cdl_value_2022 IS NULL;
            """
            connection.execute(sql_text(query))
            connection.commit()

    except Exception as e:
        connection.rollback()
        print(f"Error updating THUC {thuc_id}")
        print(e)

100%|██████████| 4800/4800 [1:05:03<00:00,  1.23it/s] 


Check that it worked

In [39]:
thuc_id = '1451'
# query = 'SELECT * FROM usa_valid_soil_layers_data WHERE "Soil_ID" = 568326'
query = f"SELECT * FROM thuc_{thuc_id}_annagnps_cell_data_section ORDER BY cell_id"

df = pd.read_sql_query(sql=sql_text(query), con=engine.connect())

df['mgmt_field_id'].unique()

array(['Water', 'Developed_Open_Space', 'Mixed_Forest',
       'Grassland_Pasture'], dtype=object)

In [34]:
df

Unnamed: 0,cell_id,soil_id,mgmt_field_id,reach_id,reach_location_code,cell_area,time_of_conc,avg_elevation,rcn_calib_id,avg_land_slope,...,constant_usle_c_fctr,constant_usle_p_fctr,all_oc_calib_fctr,all_n_calib_fctr,all_p_calib_fctr,sheet_and_rill_erosion_calib_fctr,gullies_erosion_calib_fctr,input_units_code,soil_id_annagnps_valid,cdl_value_2022
0,22,2440777,Nonag_Undefined,2,1,14.94,,0.07,,0.00332,...,,,,,,,,1,0,0
1,23,128022,Nonag_Undefined,2,1,0.99,,-0.00,,0.00001,...,,,,,,,,1,0,-1
2,31,2440777,Herbaceous_Wetlands,3,0,10.35,,0.64,,0.01479,...,,,,,,,,1,0,195
3,33,2440777,Nonag_Undefined,3,1,6.12,,-0.00,,0.00001,...,,,,,,,,1,0,-1
4,42,2440777,Herbaceous_Wetlands,4,1,49.05,,0.13,,0.00351,...,,,,,,,,1,0,195
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1827,7441,128022,Nonag_Undefined,744,0,10.08,,0.73,,0.01250,...,,,,,,,,1,0,0
1828,7442,128022,Nonag_Undefined,744,1,0.99,,-0.00,,0.00001,...,,,,,,,,1,0,-1
1829,7451,128022,Nonag_Undefined,745,0,10.80,,0.20,,0.00603,...,,,,,,,,1,0,0
1830,7461,128022,Nonag_Undefined,746,0,10.08,,1.63,,0.02849,...,,,,,,,,1,0,0


In [35]:
df['mgmt_field_id'].unique()

array(['Nonag_Undefined', 'Herbaceous_Wetlands', 'Soybeans',
       'Mixed_Forest', 'Developed_Open_Space', 'Water', 'Corn',
       'Dbl_Crop_WinWht_Soybeans', 'Dbl_Crop_Barley_Soybeans',
       'Other_Hay_Non_Alfalfa', 'Grassland_Pasture', 'Winter_Wheat',
       'Dry_Beans', 'Greens', 'Alfalfa'], dtype=object)

In [26]:
df[df['mgmt_field_id'].isna()].head()

Unnamed: 0,cell_id,soil_id,mgmt_field_id,reach_id,reach_location_code,cell_area,time_of_conc,avg_elevation,rcn_calib_id,avg_land_slope,...,constant_usle_c_fctr,constant_usle_p_fctr,all_oc_calib_fctr,all_n_calib_fctr,all_p_calib_fctr,sheet_and_rill_erosion_calib_fctr,gullies_erosion_calib_fctr,input_units_code,soil_id_annagnps_valid,cdl_value_2022
