In [2]:
from pathlib import Path
import json
import pandas as pd
from sqlalchemy import create_engine, text as sql_text

In [3]:
credentials = Path('../../inputs/db_credentials.json')
with open(credentials, 'r') as f:
    credentials = json.load(f)

user     = credentials['user']
password = credentials['password']
host     = credentials['host']
port     = credentials['port']
database = credentials['database']

In [14]:
path_to_parquet_soil_data = Path('C:/Users/Luc/projects/pyagnps/outputs/soil_data_market/soil/ALL_US/all_valid_soil_data.parquet')
soil_data_db_name = 'usa_valid_soil_data'
path_to_parquet_soil_layers_data = Path('C:/Users/Luc/projects/pyagnps/outputs/soil_data_market/soil/ALL_US/all_valid_soil_layers_data.parquet')
soil_layers_data_db_name = 'usa_valid_soil_layers_data'

In [15]:
df_soil = pd.read_parquet(path_to_parquet_soil_data)

In [19]:
df_soil

Unnamed: 0,Soil_ID,Hydrologic_Soil_Group,K_Factor,Albedo,Time_to_Consolidation,Impervious_Depth,Specific_Gravity,Initial_Soil_Conditions_ID,Soil_Name,Soil_Texture,Number_of_Soil_Layers,Input_Units_Code
0,50226,B,0.0487,0.30,,,,,Benka,Silt loam,3,1
1,50227,B,0.0487,0.30,,,,,Benka,Silt loam,3,1
2,50229,B,0.0487,0.30,,,,,Benka,Silt loam,3,1
3,50231,B,0.0487,0.30,,,,,Benka,Silt loam,3,1
4,50233,B,0.0487,0.23,,1270.0,,,Bodenburg,Silt loam,3,1
...,...,...,...,...,...,...,...,...,...,...,...,...
307572,3222362,C,0.0724,0.30,,,,,Saltwells,Silt loam,7,1
307573,3222363,C,0.0566,0.23,,,,,Saltwells,Loam,7,1
307574,3222364,C,0.0645,0.23,,,,,Saltwells,Loam,6,1
307575,3222366,D,0.0421,0.23,,180.0,,,Rangecreek,Paragravelly fine sandy loam,2,1


In [None]:
df_soil_layers = pd.read_parquet(path_to_parquet_soil_layers_data)

In [20]:
df_soil_layers

Unnamed: 0,Soil_ID,Layer_Number,Layer_Depth,Bulk_Density,Clay_Ratio,Silt_Ratio,Sand_Ratio,Rock_Ratio,Very_Fine_Sand_Ratio,CaCO3_Content,...,Base_Saturation,Unstable_Aggregate_Ratio,pH,Organic_Matter_Ratio,Organic_N_Ratio,Inorganic_N_Ratio,Organic_P_Ratio,Inorganic_P_Ratio,Soil_Structure_Code,Input_Units_Code
0,50226,1,120.0,0.88,0.05,0.62,0.33,0.02,0.180,0.00,...,,,5.0,0.075,,,,,,1
1,50226,2,760.0,0.88,0.05,0.62,0.33,0.02,0.180,0.00,...,,,5.6,0.030,,,,,,1
2,50226,3,1520.0,1.43,0.02,0.06,0.92,0.10,0.050,0.00,...,,,6.1,0.005,,,,,,1
3,50227,1,120.0,0.88,0.05,0.62,0.33,0.02,0.180,0.00,...,,,5.0,0.075,,,,,,1
4,50227,2,760.0,0.88,0.05,0.62,0.33,0.02,0.180,0.00,...,,,5.6,0.030,,,,,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1099738,3222364,6,2000.0,1.62,0.32,0.58,0.10,,0.068,0.20,...,,,8.6,0.001,,,,,,1
1099739,3222366,1,50.0,1.54,0.15,0.15,0.70,0.25,0.171,0.04,...,,,7.8,0.005,,,,,,1
1099740,3222366,2,180.0,1.54,0.23,0.37,0.40,0.50,0.118,0.10,...,,,7.8,0.002,,,,,,1
1099741,3222367,1,50.0,1.58,0.29,0.61,0.10,0.15,0.061,0.20,...,,,8.2,0.005,,,,,,1


Populate `usa_valid_soil_data` table

In [16]:
# create a SQLAlchemy engine object
engine = create_engine(f'postgresql://{user}:{password}@{host}:{port}/{database}')

# create a new table in the database
with engine.connect() as conn:
    # start a transaction
    trans = conn.begin()

    try:
        # create a table with the same column names and data types as the DataFrame
        df_soil.to_sql(name=soil_data_db_name, con=conn, index=False, if_exists='replace')

        # commit the transaction if there were no errors
        trans.commit()

    except Exception as e:
        # rollback the transaction if there was an error
        trans.rollback()
        raise e

Populate `usa_valid_soil_layers_data` table

In [27]:
# create a SQLAlchemy engine object
engine = create_engine(f'postgresql://{user}:{password}@{host}:{port}/{database}')

# create a new table in the database
with engine.connect() as conn:
    # start a transaction
    trans = conn.begin()

    try:
        # create a table with the same column names and data types as the DataFrame
        df_soil_layers.to_sql(name=soil_layers_data_db_name, con=conn, index=False, if_exists='replace')

        # commit the transaction if there were no errors
        trans.commit()

    except Exception as e:
        # rollback the transaction if there was an error
        trans.rollback()
        raise e

Check that it worked

In [21]:
query = f"SELECT * FROM {soil_data_db_name}"

with engine.connect() as conn:

    try:

        df_soil_check = pd.read_sql_query(sql=sql_text(query), con=conn)

    except Exception as e:

        raise e

In [22]:
df_soil_check

Unnamed: 0,Soil_ID,Hydrologic_Soil_Group,K_Factor,Albedo,Time_to_Consolidation,Impervious_Depth,Specific_Gravity,Initial_Soil_Conditions_ID,Soil_Name,Soil_Texture,Number_of_Soil_Layers,Input_Units_Code
0,50226,B,0.0487,0.30,,,,,Benka,Silt loam,3,1
1,50227,B,0.0487,0.30,,,,,Benka,Silt loam,3,1
2,50229,B,0.0487,0.30,,,,,Benka,Silt loam,3,1
3,50231,B,0.0487,0.30,,,,,Benka,Silt loam,3,1
4,50233,B,0.0487,0.23,,1270.0,,,Bodenburg,Silt loam,3,1
...,...,...,...,...,...,...,...,...,...,...,...,...
307572,3222362,C,0.0724,0.30,,,,,Saltwells,Silt loam,7,1
307573,3222363,C,0.0566,0.23,,,,,Saltwells,Loam,7,1
307574,3222364,C,0.0645,0.23,,,,,Saltwells,Loam,6,1
307575,3222366,D,0.0421,0.23,,180.0,,,Rangecreek,Paragravelly fine sandy loam,2,1


In [28]:
query = f"SELECT * FROM {soil_layers_data_db_name}"

with engine.connect() as conn:

    try:

        df_soil_layers_check = pd.read_sql_query(sql=sql_text(query), con=conn)

    except Exception as e:

        raise e

In [29]:
df_soil_layers_check

Unnamed: 0,Soil_ID,Layer_Number,Layer_Depth,Bulk_Density,Clay_Ratio,Silt_Ratio,Sand_Ratio,Rock_Ratio,Very_Fine_Sand_Ratio,CaCO3_Content,...,Base_Saturation,Unstable_Aggregate_Ratio,pH,Organic_Matter_Ratio,Organic_N_Ratio,Inorganic_N_Ratio,Organic_P_Ratio,Inorganic_P_Ratio,Soil_Structure_Code,Input_Units_Code
0,50226,1,120.0,0.88,0.05,0.62,0.33,0.02,0.180,0.00,...,,,5.0,0.075,,,,,,1
1,50226,2,760.0,0.88,0.05,0.62,0.33,0.02,0.180,0.00,...,,,5.6,0.030,,,,,,1
2,50226,3,1520.0,1.43,0.02,0.06,0.92,0.10,0.050,0.00,...,,,6.1,0.005,,,,,,1
3,50227,1,120.0,0.88,0.05,0.62,0.33,0.02,0.180,0.00,...,,,5.0,0.075,,,,,,1
4,50227,2,760.0,0.88,0.05,0.62,0.33,0.02,0.180,0.00,...,,,5.6,0.030,,,,,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1099738,3222364,6,2000.0,1.62,0.32,0.58,0.10,,0.068,0.20,...,,,8.6,0.001,,,,,,1
1099739,3222366,1,50.0,1.54,0.15,0.15,0.70,0.25,0.171,0.04,...,,,7.8,0.005,,,,,,1
1099740,3222366,2,180.0,1.54,0.23,0.37,0.40,0.50,0.118,0.10,...,,,7.8,0.002,,,,,,1
1099741,3222367,1,50.0,1.58,0.29,0.61,0.10,0.15,0.061,0.20,...,,,8.2,0.005,,,,,,1


Populate original data table

In [9]:
df_raw = pd.read_csv(Path('../../inputs/soil/soil_data_US_with_mukey.csv'))
df_raw = df_raw.drop(columns=['saverest'])

In [11]:
# create a SQLAlchemy engine object
engine = create_engine(f'postgresql://{user}:{password}@{host}:{port}/{database}')

# create a new table in the database
with engine.connect() as conn:
    # start a transaction
    trans = conn.begin()

    try:
        # create a table with the same column names and data types as the DataFrame
        df_raw.to_sql(name='raw_nrcs_soil_data', con=conn, index=False, if_exists='replace')

        # commit the transaction if there were no errors
        trans.commit()

    except Exception as e:
        # rollback the transaction if there was an error
        trans.rollback()
        raise e

In [12]:
query = f"SELECT * FROM raw_nrcs_soil_data"

with engine.connect() as conn:

    try:

        df_raw_check = pd.read_sql_query(sql=sql_text(query), con=conn)

    except Exception as e:

        raise e

In [13]:
df_raw_check

Unnamed: 0,areasymbol,areaname,musym,mukey,hydgrp,kwfact,albedodry_r,restrictiondepthr,partdensity,compname,...,sandtotal_r,fragvol,sandvf_r,caco3_r,ksat_r,wthirdbar_r,wfifteenbar_r,om_r,ph1to1h2o_r,comppct_r
0,AK600,"Matanuska-Susitna Valley Area, Alaska",101,50226,B,,0.30,>200,1.40,Benka,...,,0.0,,0.0,26.00,45.0,20.0,60.00,4.3,90
1,AK600,"Matanuska-Susitna Valley Area, Alaska",101,50226,B,0.37,0.30,>200,2.65,Benka,...,33.0,2.0,18.0,0.0,9.17,35.0,10.0,7.50,5.0,90
2,AK600,"Matanuska-Susitna Valley Area, Alaska",101,50226,B,0.43,0.30,>200,2.65,Benka,...,33.0,2.0,18.0,0.0,9.17,40.0,10.0,3.00,5.6,90
3,AK600,"Matanuska-Susitna Valley Area, Alaska",101,50226,B,0.05,0.30,>200,2.65,Benka,...,92.0,10.0,5.0,0.0,28.23,10.0,2.0,0.50,6.1,90
4,AK600,"Matanuska-Susitna Valley Area, Alaska",102,50227,B,,0.30,>200,1.40,Benka,...,,0.0,,0.0,26.00,45.0,20.0,60.00,4.3,60
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1230561,WY737,"Sweetwater County Area, Wyoming",9325,2925093,A,0.32,0.23,165,2.65,Bodorumpe,...,80.0,2.0,25.4,3.0,100.00,13.8,4.6,0.17,8.6,50
1230562,WY737,"Sweetwater County Area, Wyoming",9325,2925093,A,,0.23,165,,Bodorumpe,...,,,,,0.20,,,,,50
1230563,WY737,"Sweetwater County Area, Wyoming",9A,2572297,B,0.28,0.23,>200,,Quealman,...,70.0,4.0,19.0,7.0,30.00,16.7,7.4,1.50,8.0,85
1230564,WY737,"Sweetwater County Area, Wyoming",9A,2572297,B,0.24,0.23,>200,,Quealman,...,70.0,4.0,10.3,7.0,30.00,15.2,6.0,0.25,8.0,85
