This notebook replicates the data flow of what a user would do when clicking on the AIMS web interface

Setting up database connection

In [2]:
from pathlib import Path
import json

import pyagnps

import geopandas as gpd
import pandas as pd

from tqdm import tqdm

from sqlalchemy import URL, create_engine, text as sql_text
from sqlalchemy.orm import sessionmaker

# DATABASE SETUP
credentials = Path("../../inputs/db_credentials.json")
with open(credentials, "r") as f:
    credentials = json.load(f)

user = credentials["user"]
password = credentials["password"]
host = credentials["host"]
port = credentials["port"]
database = credentials["database"]

url_object = URL.create(
    "postgresql",
    username=user,
    password=password,
    host=host,
    port=port,
    database=database
)

# create a SQLAlchemy engine object
engine = create_engine(url_object)

In [3]:
output_folder = Path('C:/Users/Luc/Desktop/GC_AIMS')
output_folder.mkdir(exist_ok=True)

User input: coordinates only

In [4]:
lon, lat = -89.91528, 34.23195

In [5]:
thuc = pd.read_sql_query(sql_text(f"SELECT thuc_near_run_id_tr({lon},{lat})"),con=engine.connect())
thuc_id = thuc.iloc[0].values[0]

### Geometry, reaches and cell data sections

Query cells geometry

In [6]:
cells_query = f"SELECT geom, cell_id FROM thuc_cell_geo_tr({lon},{lat}, '{thuc_id}')"

cells_geometry = gpd.read_postgis(sql=sql_text(cells_query), con=engine.connect(), geom_col='geom')
cells_list = cells_geometry['cell_id'].to_list()

cells_geometry.explore(column="cell_id")

Query reach geometry

In [7]:
reaches_query = f"SELECT geom, reach_id FROM thuc_reach_geo_tr({lon},{lat}, '{thuc_id}')"

reaches_geometry = gpd.read_postgis(sql=sql_text(reaches_query), con=engine.connect(), geom_col='geom')
reaches_list = reaches_geometry['reach_id'].to_list()

reaches_geometry.explore(column='reach_id')

Query Cell Data Section

In [8]:
query = f"SELECT * FROM thuc_{thuc_id}_annagnps_cell_data_section WHERE cell_id in {*cells_list,}"

df_cells = pd.read_sql_query(sql=sql_text(query), con=engine.connect())

In [9]:
df_cells.head()

Unnamed: 0,cell_id,soil_id,mgmt_field_id,reach_id,reach_location_code,cell_area,time_of_conc,avg_elevation,rcn_calib_id,avg_land_slope,...,delivery_ratio,constant_usle_c_fctr,constant_usle_p_fctr,all_oc_calib_fctr,all_n_calib_fctr,all_p_calib_fctr,sheet_and_rill_erosion_calib_fctr,gullies_erosion_calib_fctr,input_units_code,soil_id_annagnps_valid
0,956601,568332,Grassland_Pasture,95660,0,10.89,,108.81,,0.02668,...,,,,,,,,,1,1
1,956581,568332,Grassland_Pasture,95658,0,10.35,,105.94,,0.0296,...,,,,,,,,,1,1
2,956611,568326,Grassland_Pasture,95661,0,10.26,,112.79,,0.03681,...,,,,,,,,,1,0
3,956691,568332,Grassland_Pasture,95669,0,9.99,,115.65,,0.04749,...,,,,,,,,,1,1
4,956602,568326,Grassland_Pasture,95660,1,13.68,,102.71,,0.04329,...,,,,,,,,,1,0


Query Reach Data Section

In [10]:
query = f"SELECT * FROM thuc_{thuc_id}_annagnps_reach_data_section WHERE reach_id in {*reaches_list,}"

df_reaches = pd.read_sql_query(sql=sql_text(query), con=engine.connect())

In [11]:
df_reaches.head()

Unnamed: 0,reach_id,receiving_reach,vegetation_code,elevation,slope,mannings_n,infiltration_rate,hydraulic_geom_id,length,top_width,...,sand_scour_code,small_agg_scour_code,large_agg_scour_code,valley_clay_scour_code,valley_silt_scour_code,valley_sand_scour_code,valley_small_agg_scour_code,valley_large_agg_scour_code,delivery_ratio,input_units_code
0,95625,95624,,67.82,0.00684,,,,307.28,,...,,,,,,,,,,1
1,95626,95625,,70.63,0.0029,,,,1141.25,,...,,,,,,,,,,1
2,95627,95626,,71.8,0.00277,,,,144.85,,...,,,,,,,,,,1
3,95628,95627,,73.15,0.00441,,,,499.71,,...,,,,,,,,,,1
4,95629,95627,,72.26,0.00159,,,,506.98,,...,,,,,,,,,,1


Make reach data section "valid" for AnnAGNPS i.e. add an "OUTLET" line

In [12]:
reaches = set(df_reaches['reach_id'])
receiving_reaches = set(df_reaches['receiving_reach'])

outlet_reach = list(receiving_reaches - reaches)[0]
print(f"Outlet reach: {outlet_reach}")

outlet_row = df_reaches[df_reaches['receiving_reach']==outlet_reach].copy()
outlet_row['reach_id'] = outlet_reach
outlet_row['receiving_reach'] = 'OUTLET'
outlet_row['length'] = 0

df_reaches_valid = pd.concat([outlet_row, df_reaches], ignore_index=True)
df_reaches_valid.head()

Outlet reach: 95624


Unnamed: 0,reach_id,receiving_reach,vegetation_code,elevation,slope,mannings_n,infiltration_rate,hydraulic_geom_id,length,top_width,...,sand_scour_code,small_agg_scour_code,large_agg_scour_code,valley_clay_scour_code,valley_silt_scour_code,valley_sand_scour_code,valley_small_agg_scour_code,valley_large_agg_scour_code,delivery_ratio,input_units_code
0,95624,OUTLET,,67.82,0.00684,,,,0.0,,...,,,,,,,,,,1
1,95625,95624,,67.82,0.00684,,,,307.28,,...,,,,,,,,,,1
2,95626,95625,,70.63,0.0029,,,,1141.25,,...,,,,,,,,,,1
3,95627,95626,,71.8,0.00277,,,,144.85,,...,,,,,,,,,,1
4,95628,95627,,73.15,0.00441,,,,499.71,,...,,,,,,,,,,1


Merge geometries with data sections

In [28]:
df_cells[['cell_id','rusle_ls_fctr']]

Unnamed: 0,cell_id,rusle_ls_fctr
0,956601,0.511
1,956581,0.540
2,956611,0.725
3,956691,1.018
4,956602,0.788
...,...,...
183,956252,0.367
184,956262,0.787
185,956263,0.857
186,956253,0.699


In [13]:
cells_geometry = cells_geometry.merge(df_cells, on='cell_id')
reaches_geometry = reaches_geometry.merge(df_reaches, on='reach_id')

### Soil data

Query soil_data and soil_layers_daya for matching soil_id as well as raw soil data

In [14]:
soil_ids_list = df_cells['soil_id'].to_list()

In [15]:
query_soil = f"""SELECT * FROM usa_valid_soil_data WHERE "Soil_ID" in {*soil_ids_list,}"""
query_soil_layers = f"""SELECT * FROM usa_valid_soil_layers_data WHERE "Soil_ID" in {*soil_ids_list,}"""
query_raw = f"""SELECT * FROM raw_nrcs_soil_data WHERE "mukey" in {*soil_ids_list,}"""

df_soil_data = pd.read_sql_query(sql=sql_text(query_soil), con=engine.connect())
df_soil_layers_data = pd.read_sql_query(sql=sql_text(query_soil_layers), con=engine.connect())
df_raw = pd.read_sql_query(sql=sql_text(query_raw), con=engine.connect())

In [16]:
df_soil_data.head()

Unnamed: 0,Soil_ID,Hydrologic_Soil_Group,K_Factor,Albedo,Time_to_Consolidation,Impervious_Depth,Specific_Gravity,Initial_Soil_Conditions_ID,Soil_Name,Soil_Texture,Number_of_Soil_Layers,Input_Units_Code
0,568308,C,0.0566,0.3,,540.0,,,Calloway,Silt loam,3,1
1,568309,B,0.0645,0.3,,,,,Collins,Silt loam,2,1
2,568310,B,0.0645,0.3,,,,,Collins,Silt loam,2,1
3,568313,B,0.0566,0.3,,,,,Falaya,Silt loam,2,1
4,568314,B,0.0566,0.3,,,,,Falaya,Silt loam,2,1


In [17]:
df_soil_layers_data.head()

Unnamed: 0,Soil_ID,Layer_Number,Layer_Depth,Bulk_Density,Clay_Ratio,Silt_Ratio,Sand_Ratio,Rock_Ratio,Very_Fine_Sand_Ratio,CaCO3_Content,...,Base_Saturation,Unstable_Aggregate_Ratio,pH,Organic_Matter_Ratio,Organic_N_Ratio,Inorganic_N_Ratio,Organic_P_Ratio,Inorganic_P_Ratio,Soil_Structure_Code,Input_Units_Code
0,568308,1,640.0,1.55,0.2,0.686,0.114,,0.066,0.0,...,,,5.3,0.0125,,,,,,1
1,568308,2,1630.0,1.52,0.21,0.677,0.113,,0.066,0.0,...,,,5.3,0.0025,,,,,,1
2,568308,3,1730.0,1.57,0.24,0.667,0.093,,0.065,0.0,...,,,6.5,0.001,,,,,,1
3,568309,1,200.0,1.52,0.115,0.677,0.208,,0.134,0.0,...,,,5.0,0.0125,,,,,,1
4,568309,2,1570.0,1.52,0.115,0.677,0.208,,0.134,0.0,...,,,5.0,0.0015,,,,,,1


### Generate climate files

Get watershed centroid computed in terms of lat and lon

In [37]:
lon0, lat0 = cells_geometry.dissolve().centroid.x, cells_geometry.dissolve().centroid.y


  lon0, lat0 = cells_geometry.dissolve().centroid.x, cells_geometry.dissolve().centroid.y

  lon0, lat0 = cells_geometry.dissolve().centroid.x, cells_geometry.dissolve().centroid.y


#### Using NLDAS-2

Reset cells secondary climate file id

In [85]:
cells_geometry['secondary_climate_file_id'] = None

Identify NLDAS-2 grid

In [86]:
path_nldas_grid_centroids = Path("D:/AIMS/Datasets/Climate/NLDAS2/NLDAS2_GRID_CENTROIDS_epsg4326.gpkg")
nldas_centroids = gpd.read_file(path_nldas_grid_centroids)

In [87]:
cells_geometry = cells_geometry.sjoin_nearest(nldas_centroids)
cells_geometry['secondary_climate_file_id'] = cells_geometry['nldas2_grid_ID']
cells_geometry.drop(columns=['nldas2_grid_ID', 'index_right'], inplace=True)




In [88]:
cells_geometry.explore(column='secondary_climate_file_id', categorical=True)

Generate climate data for the unique NLDAS-2 grid ID featuring in the watershed 

In [104]:
wsh_nldas2_grid_pts = nldas_centroids[nldas_centroids['nldas2_grid_ID'].isin(cells_geometry['secondary_climate_file_id'].unique())]
wsh_nldas2_grid_pts = wsh_nldas2_grid_pts.to_crs('epsg:4326')

In [98]:
climate_data = {}

for feature in tqdm(wsh_nldas2_grid_pts.iterfeatures(), total=len(wsh_nldas2_grid_pts)):
    clim_id = feature['properties']['nldas2_grid_ID']
    x, y = feature['geometry']['coordinates']

    clm = pyagnps.climate.ClimateAnnAGNPSCoords(coords=(x,y), start="1980-01-01", end="1981-07-31", date_mode="local")
    df = clm.query_nldas2_generate_annagnps_climate_daily()

    climate_data[clim_id] = {
        'data': df, # DataFrame containing the climate data
        'climate_station': { # Climate station metadata
            'output_filepath': output_folder / f'climate_station_{clim_id}.csv',
            'climate_station_name': f"NLDAS-2 Grid ID {clim_id}",
            'beginning_climate_date': clm.start.strftime("%m/%d/%Y"),
            'ending_climate_date': clm.end.strftime("%m/%d/%Y"),
            'latitude': y,
            'longitude': x,
            'elevation': f"{cells_geometry.loc[cells_geometry['secondary_climate_file_id'] == clim_id, 'avg_elevation'].mean():0.2f}"
            }
    }

100%|██████████| 4/4 [00:17<00:00,  4.45s/it]


#### Using CMIP5 data

In [116]:
path_to_cmip5_historical_and_rcp45 = Path("D:/AIMS/Datasets/Climate/CMIP/CMIP5/MACAv2METDATA/CNRM-CM5/r1i1p1/")

clm_cmip5 = pyagnps.climate.ClimateAnnAGNPSCoords(coords=(None, None))

### Export everything to files

In [17]:
df_soil_data.to_csv(output_folder / 'gc_soil_data.csv', index=False)
df_soil_layers_data.to_csv(output_folder / 'gc_soil_layers_data.csv', index=False)
df_raw.to_csv(output_folder / 'raw_soil_data_gNATSGO.csv', index=False)
df_cells.to_csv(output_folder / 'gc_cell_data_section.csv', index=False)
df_reaches_valid.to_csv(output_folder / 'gc_reach_data_section.csv', index=False)

cells_geometry.to_file(output_folder / 'cells_geometry.gpkg', driver='GPKG', index=False)
reaches_geometry.to_file(output_folder / 'reaches_geometry.gpkg', driver='GPKG', index=False)

Export climate

(loop on ids of climate_data dictionary and write matching dataframes in a file with climate_daily)

In [None]:
for clim_id in climate_data:
    climate_data['clim_id']['data'].to_csv(output_folder / f"climate_daily_{clim_id}.csv", index=False, float_format=".2f")
    pyagnps.climate.generate_climate_station_file(**climate_data['climate_station'])