In [1]:
from pathlib import Path
import json
import pandas as pd
from sqlalchemy import URL, create_engine, text as sql_text

In [2]:
credentials = Path('../../inputs/db_credentials.json')
with open(credentials, 'r') as f:
    credentials = json.load(f)

user     = credentials['user']
password = credentials['password']
host     = credentials['host']
port     = credentials['port']
database = credentials['database']

In [5]:
# root_dir = Path('C:/Users/Luc/projects/pyagnps/inputs/soil/ALL_US_v3_SSURGO_STATSGO2_RSS')
root_dir = Path('D:/AIMS/Datasets/Soil/DATABASE_POPULATION_TASKS/SDM_QUERY_AND_NITA_PROCESSING/ALL_US_v3_SSURGO_STATSGO2_RSS/')

path_to_parquet_soil_data = root_dir / 'all_valid_soil_data.parquet'
soil_data_db_name = 'usa_valid_soil_data'

path_to_parquet_soil_layers_data = root_dir / 'all_valid_soil_layers_data.parquet'
soil_layers_data_db_name = 'usa_valid_soil_layers_data'

path_to_raw_soil_complete = root_dir / 'raw_query_data' / 'all_raw_soil_data_no_rvindicator_condition_STATSGO2_SSURGO_RSS.parquet'
raw_soil_complete_db_name = 'raw_nrcs_soil_data'

In [6]:
df_soil = pd.read_parquet(path_to_parquet_soil_data)
df_soil.head()

Unnamed: 0,Soil_ID,Hydrologic_Soil_Group,K_Factor,Albedo,Time_to_Consolidation,Impervious_Depth,Specific_Gravity,Initial_Soil_Conditions_ID,Soil_Name,Soil_Texture,Number_of_Soil_Layers,Input_Units_Code
0,50226,B,0.0487,0.3,,,,,Benka,Silt loam,3,1
1,50227,B,0.0487,0.3,,,,,Benka,Silt loam,3,1
2,50229,B,0.0487,0.3,,,,,Benka,Silt loam,3,1
3,50231,B,0.0487,0.3,,,,,Benka,Silt loam,3,1
4,50233,B,0.0487,0.23,,1270.0,,,Bodenburg,Silt loam,3,1


In [7]:
df_soil_layers = pd.read_parquet(path_to_parquet_soil_layers_data)
df_soil_layers.head()

Unnamed: 0,Soil_ID,Layer_Number,Layer_Depth,Bulk_Density,Clay_Ratio,Silt_Ratio,Sand_Ratio,Rock_Ratio,Very_Fine_Sand_Ratio,CaCO3_Content,...,Base_Saturation,Unstable_Aggregate_Ratio,pH,Organic_Matter_Ratio,Organic_N_Ratio,Inorganic_N_Ratio,Organic_P_Ratio,Inorganic_P_Ratio,Soil_Structure_Code,Input_Units_Code
0,50226,1,120.0,0.88,0.05,0.62,0.33,0.02,0.18,0.0,...,,,5.0,0.075,,,,,,1
1,50226,2,760.0,0.88,0.05,0.62,0.33,0.02,0.18,0.0,...,,,5.6,0.03,,,,,,1
2,50226,3,1520.0,1.43,0.02,0.06,0.92,0.1,0.05,0.0,...,,,6.1,0.005,,,,,,1
3,50227,1,120.0,0.88,0.05,0.62,0.33,0.02,0.18,0.0,...,,,5.0,0.075,,,,,,1
4,50227,2,760.0,0.88,0.05,0.62,0.33,0.02,0.18,0.0,...,,,5.6,0.03,,,,,,1


In [8]:
df_raw = pd.read_parquet(path_to_raw_soil_complete)
df_raw.head()

Unnamed: 0,saverest,areasymbol,areaname,musym,mukey,hydgrp,kwfact,albedodry_r,restrictiondepthr,partdensity,...,sandtotal_r,fragvol,sandvf_r,caco3_r,ksat_r,wthirdbar_r,wfifteenbar_r,om_r,ph1to1h2o_r,comppct_r
0,8/30/2022 7:52:33 PM,AK600,"Matanuska-Susitna Valley Area, Alaska",101,50226,B,,0.3,>200,1.4,...,,0.0,,0.0,26.0,45.0,20.0,60.0,4.3,90
1,8/30/2022 7:52:33 PM,AK600,"Matanuska-Susitna Valley Area, Alaska",101,50226,B,0.37,0.3,>200,2.65,...,33.0,2.0,18.0,0.0,9.17,35.0,10.0,7.5,5.0,90
2,8/30/2022 7:52:33 PM,AK600,"Matanuska-Susitna Valley Area, Alaska",101,50226,B,0.43,0.3,>200,2.65,...,33.0,2.0,18.0,0.0,9.17,40.0,10.0,3.0,5.6,90
3,8/30/2022 7:52:33 PM,AK600,"Matanuska-Susitna Valley Area, Alaska",101,50226,B,0.05,0.3,>200,2.65,...,92.0,10.0,5.0,0.0,28.23,10.0,2.0,0.5,6.1,90
4,8/30/2022 7:52:33 PM,AK600,"Matanuska-Susitna Valley Area, Alaska",102,50227,B,,0.3,>200,1.4,...,,0.0,,0.0,26.0,45.0,20.0,60.0,4.3,60


Create db connection engine

In [9]:
url_object = URL.create(
    "postgresql",
    username=user,
    password=password,
    host=host,
    port=port,
    database=database
)

# create a SQLAlchemy engine object
engine = create_engine(url_object)

Populate `usa_valid_soil_data` table

In [10]:
# create a new table in the database
with engine.connect() as conn:
    # start a transaction
    trans = conn.begin()

    try:
        # create a table with the same column names and data types as the DataFrame
        df_soil.to_sql(name=soil_data_db_name, con=conn, index=False, if_exists='replace')

        # commit the transaction if there were no errors
        trans.commit()

    except Exception as e:
        # rollback the transaction if there was an error
        trans.rollback()
        raise e

Populate `usa_valid_soil_layers_data` table

In [11]:
# create a new table in the database
with engine.connect() as conn:
    # start a transaction
    trans = conn.begin()

    try:
        # create a table with the same column names and data types as the DataFrame
        df_soil_layers.to_sql(name=soil_layers_data_db_name, con=conn, index=False, if_exists='replace')

        # commit the transaction if there were no errors
        trans.commit()

    except Exception as e:
        # rollback the transaction if there was an error
        trans.rollback()
        raise e

Populate `raw_nrcs_soil_data`

In [12]:
df_raw = df_raw.drop(columns=['saverest'])

In [13]:
# create a new table in the database
with engine.connect() as conn:
    # start a transaction
    trans = conn.begin()

    try:
        # create a table with the same column names and data types as the DataFrame
        df_raw.to_sql(name=raw_soil_complete_db_name, con=conn, index=False, if_exists='replace')

        # commit the transaction if there were no errors
        trans.commit()

    except Exception as e:
        # rollback the transaction if there was an error
        trans.rollback()
        raise e

Check that it worked

In [14]:
query = f"SELECT * FROM {soil_data_db_name} LIMIT 100"

with engine.connect() as conn:

    try:

        df_soil_check = pd.read_sql_query(sql=sql_text(query), con=conn)

    except Exception as e:

        raise e

In [15]:
df_soil_check

Unnamed: 0,Soil_ID,Hydrologic_Soil_Group,K_Factor,Albedo,Time_to_Consolidation,Impervious_Depth,Specific_Gravity,Initial_Soil_Conditions_ID,Soil_Name,Soil_Texture,Number_of_Soil_Layers,Input_Units_Code
0,50226,B,0.0487,0.30,,,,,Benka,Silt loam,3,1
1,50227,B,0.0487,0.30,,,,,Benka,Silt loam,3,1
2,50229,B,0.0487,0.30,,,,,Benka,Silt loam,3,1
3,50231,B,0.0487,0.30,,,,,Benka,Silt loam,3,1
4,50233,B,0.0487,0.23,,1270.0,,,Bodenburg,Silt loam,3,1
...,...,...,...,...,...,...,...,...,...,...,...,...
95,50326,,-999.0000,,,,,,,Muck,1,1
96,50327,B,0.0487,0.30,,,,,Tokositna,Silt loam,3,1
97,50328,B,0.0487,0.30,,,,,Tokositna,Silt loam,3,1
98,50329,B,0.0487,0.30,,,,,Tokositna,Silt loam,3,1


In [16]:
query = f"SELECT * FROM {soil_layers_data_db_name} LIMIT 100"

with engine.connect() as conn:

    try:

        df_soil_layers_check = pd.read_sql_query(sql=sql_text(query), con=conn)

    except Exception as e:

        raise e

In [17]:
df_soil_layers_check

Unnamed: 0,Soil_ID,Layer_Number,Layer_Depth,Bulk_Density,Clay_Ratio,Silt_Ratio,Sand_Ratio,Rock_Ratio,Very_Fine_Sand_Ratio,CaCO3_Content,...,Base_Saturation,Unstable_Aggregate_Ratio,pH,Organic_Matter_Ratio,Organic_N_Ratio,Inorganic_N_Ratio,Organic_P_Ratio,Inorganic_P_Ratio,Soil_Structure_Code,Input_Units_Code
0,50226,1,120.0,0.88,0.05,0.62,0.33,0.02,0.18,0.0,...,,,5.0,0.075,,,,,,1
1,50226,2,760.0,0.88,0.05,0.62,0.33,0.02,0.18,0.0,...,,,5.6,0.030,,,,,,1
2,50226,3,1520.0,1.43,0.02,0.06,0.92,0.10,0.05,0.0,...,,,6.1,0.005,,,,,,1
3,50227,1,120.0,0.88,0.05,0.62,0.33,0.02,0.18,0.0,...,,,5.0,0.075,,,,,,1
4,50227,2,760.0,0.88,0.05,0.62,0.33,0.02,0.18,0.0,...,,,5.6,0.030,,,,,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,50267,3,1520.0,1.20,0.02,0.44,0.54,0.10,0.30,0.0,...,,,5.6,0.010,,,,,,1
96,50268,1,130.0,0.80,0.03,0.64,0.33,0.08,0.18,0.0,...,,,5.0,0.060,,,,,,1
97,50268,2,310.0,0.80,0.03,0.64,0.33,0.08,0.18,0.0,...,,,5.0,0.020,,,,,,1
98,50268,3,1520.0,1.20,0.02,0.44,0.54,0.10,0.30,0.0,...,,,5.6,0.010,,,,,,1


In [18]:
query = f"SELECT * FROM {raw_soil_complete_db_name} LIMIT 100"

with engine.connect() as conn:

    try:

        df_raw_check = pd.read_sql_query(sql=sql_text(query), con=conn)

    except Exception as e:

        raise e

In [19]:
df_raw_check

Unnamed: 0,areasymbol,areaname,musym,mukey,hydgrp,kwfact,albedodry_r,restrictiondepthr,partdensity,compname,...,sandtotal_r,fragvol,sandvf_r,caco3_r,ksat_r,wthirdbar_r,wfifteenbar_r,om_r,ph1to1h2o_r,comppct_r
0,AK600,"Matanuska-Susitna Valley Area, Alaska",101,50226,B,,0.30,>200,1.40,Benka,...,,0.0,,0.0,26.00,45.0,20.0,60.0,4.3,90
1,AK600,"Matanuska-Susitna Valley Area, Alaska",101,50226,B,0.37,0.30,>200,2.65,Benka,...,33.0,2.0,18.0,0.0,9.17,35.0,10.0,7.5,5.0,90
2,AK600,"Matanuska-Susitna Valley Area, Alaska",101,50226,B,0.43,0.30,>200,2.65,Benka,...,33.0,2.0,18.0,0.0,9.17,40.0,10.0,3.0,5.6,90
3,AK600,"Matanuska-Susitna Valley Area, Alaska",101,50226,B,0.05,0.30,>200,2.65,Benka,...,92.0,10.0,5.0,0.0,28.23,10.0,2.0,0.5,6.1,90
4,AK600,"Matanuska-Susitna Valley Area, Alaska",102,50227,B,,0.30,>200,1.40,Benka,...,,0.0,,0.0,26.00,45.0,20.0,60.0,4.3,60
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,AK600,"Matanuska-Susitna Valley Area, Alaska",127,50257,A,,0.23,>200,1.40,Delyndia,...,,0.0,,0.0,26.00,45.0,20.0,60.0,5.0,50
96,AK600,"Matanuska-Susitna Valley Area, Alaska",127,50257,A,0.37,0.23,>200,2.65,Delyndia,...,33.0,2.0,18.0,0.0,9.17,35.0,10.0,6.0,5.0,50
97,AK600,"Matanuska-Susitna Valley Area, Alaska",127,50257,A,0.43,0.23,>200,2.65,Delyndia,...,33.0,2.0,18.0,0.0,9.17,40.0,10.0,3.0,5.6,50
98,AK600,"Matanuska-Susitna Valley Area, Alaska",127,50257,A,0.05,0.23,>200,2.65,Delyndia,...,92.0,16.0,5.0,0.0,28.23,10.0,2.0,1.0,6.5,50
