# Soil Survey Analysis
Uses STATSGO2 dataset https://www.nrcs.usda.gov/resources/data-and-reports/description-of-statsgo2-database

These data provide information about soil features on or near the surface of the Earth. Data were collected as part of the National Cooperative Soil Survey. These data are intended for geographic display and analysis at the state, regional, and national level. The data should be displayed and analyzed at scales appropriate for 1:250,000-scale data.

Time Period: 2000 - 10th January 2011

In [13]:
import pandas as pd

In [12]:
base_dir = "./wss_gsmsoil_AZ_2016_10_13/tabular/"

In [6]:
with open("./wss_gsmsoil_AZ_2016_10_13/tabular/ccrpyd.txt") as f:
    data = f.readlines()

In [10]:
data[0].strip().split("|")

['"Alfalfa hay"',
 '"Tons"',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '"14178501"',
 '"30077445"']

chorizon## Soil PH

In [15]:
tablename = "chorizon"
column_physical_names = [
    "hzname",  # [cite: 41, 42]
    "desgndisc",  # [cite: 43, 44]
    "desgnmaster",  # [cite: 44, 45, 46]
    "desgnmasterprime",  # [cite: 47, 48]
    "desgnvert",  # [cite: 48, 49, 50]
    "hzdept_l",  # [cite: 50, 51, 52]
    "hzdept_r",  # [cite: 50, 51, 52]
    "hzdept_h",  # [cite: 50, 51, 52]
    "hzdepb_l",  # [cite: 53, 54]
    "hzdepb_r",  # [cite: 53, 54]
    "hzdepb_h",  # [cite: 53, 54]
    "hzthk_l",  # [cite: 55, 56, 57, 58]
    "hzthk_r",  # [cite: 55, 56, 57, 58]
    "hzthk_h",  # [cite: 55, 56, 57, 58]
    "fraggt10_l",  # [cite: 59, 60, 61]
    "fraggt10_r",  # [cite: 59, 60, 61]
    "fraggt10_h",  # [cite: 59, 60, 61]
    "frag3to10_l",  # [cite: 64, 65, 66, 67]
    "frag3to10_r",  # [cite: 64, 65, 66, 67]
    "frag3to10_h",  # [cite: 64, 65, 66, 67]
    "sieveno4_l",  # [cite: 68, 69, 70, 71]
    "sieveno4_r",  # [cite: 68, 69, 70, 71]
    "sieveno4_h",  # [cite: 68, 69, 70, 71]
    "sieveno10_l",  # [cite: 72, 73, 74]
    "sieveno10_r",  # [cite: 72, 73, 74]
    "sieveno10_h",  # [cite: 72, 73, 74]
    "sieveno40_l",  # [cite: 75, 76, 77]
    "sieveno40_r",  # [cite: 75, 76, 77]
    "sieveno40_h",  # [cite: 75, 76, 77]
    "sieveno200_l",  # [cite: 78, 79, 80]
    "sieveno200_r",  # [cite: 78, 79, 80]
    "sieveno200_h",  # [cite: 78, 79, 80]
    "sandtotal_l",  # [cite: 81, 82, 83]
    "sandtotal_r",  # [cite: 81, 82, 83]
    "sandtotal_h",  # [cite: 81, 82, 83]
    "sandvc_l",  # [cite: 84, 85, 86]
    "sandvc_r",  # [cite: 84, 85, 86]
    "sandvc_h",  # [cite: 84, 85, 86]
    "sandco_l",  # [cite: 89, 90, 91, 92]
    "sandco_r",  # [cite: 89, 90, 91, 92]
    "sandco_h",  # [cite: 89, 90, 91, 92]
    "sandmed_l",  # [cite: 93, 94, 95, 96]
    "sandmed_r",  # [cite: 93, 94, 95, 96]
    "sandmed_h",  # [cite: 93, 94, 95, 96]
    "sandfine_l",  # [cite: 97, 98, 99]
    "sandfine_r",  # [cite: 97, 98, 99]
    "sandfine_h",  # [cite: 97, 98, 99]
    "sandvf_l",  # [cite: 100, 101, 102]
    "sandvf_r",  # [cite: 100, 101, 102]
    "sandvf_h",  # [cite: 100, 101, 102]
    "silttotal_l",  # [cite: 103, 104, 105]
    "silttotal_r",  # [cite: 103, 104, 105]
    "silttotal_h",  # [cite: 103, 104, 105]
    "siltco_l",  # [cite: 106, 107, 108]
    "siltco_r",  # [cite: 106, 107, 108]
    "siltco_h",  # [cite: 106, 107, 108]
    "siltfine_l",  # [cite: 109, 110, 111]
    "siltfine_r",  # [cite: 109, 110, 111]
    "siltfine_h",  # [cite: 109, 110, 111]
    "claytotal_l",  # [cite: 114, 115, 116, 117]
    "claytotal_r",  # [cite: 114, 115, 116, 117]
    "claytotal_h",  # [cite: 114, 115, 116, 117]
    "claysizedcarb_l",  # [cite: 118, 119, 120]
    "claysizedcarb_r",  # [cite: 118, 119, 120]
    "claysizedcarb_h",  # [cite: 118, 119, 120]
    "om_l",  # [cite: 121, 122, 123]
    "om_r",  # [cite: 121, 122, 123]
    "om_h",  # [cite: 121, 122, 123]
    "dbtenthbar_l",  # [cite: 124, 125]
    "dbtenthbar_r",  # [cite: 124, 125]
    "dbtenthbar_h",  # [cite: 124, 125]
    "dbthirdbar_l",  # [cite: 126, 127]
    "dbthirdbar_r",  # [cite: 126, 127]
    "dbthirdbar_h",  # [cite: 126, 127]
    "dbfifteenbar_l",  # [cite: 128, 129]
    "dbfifteenbar_r",  # [cite: 128, 129]
    "dbfifteenbar_h",  # [cite: 128, 129]
    "dbovendry_l",  # [cite: 130, 131, 132]
    "dbovendry_r",  # [cite: 130, 131, 132]
    "dbovendry_h",  # [cite: 130, 131, 132]
    "partdensity",  # [cite: 133, 134]
    "ksat_l",  # [cite: 137, 138, 139]
    "ksat_r",  # [cite: 137, 138, 139]
    "ksat_h",  # [cite: 137, 138, 139]
    "awc_l",  # [cite: 140, 141, 142, 143]
    "awc_r",  # [cite: 140, 141, 142, 143]
    "awc_h",  # [cite: 140, 141, 142, 143]
    "wtenthbar_l",  # [cite: 144, 145, 146]
    "wtenthbar_r",  # [cite: 144, 145, 146]
    "wtenthbar_h",  # [cite: 144, 145, 146]
    "wthirdbar_l",  # [cite: 147, 148, 149]
    "wthirdbar_r",  # [cite: 147, 148, 149]
    "wthirdbar_h",  # [cite: 147, 148, 149]
    "wfifteenbar_l",  # [cite: 150, 151, 152]
    "wfifteenbar_r",  # [cite: 150, 151, 152]
    "wfifteenbar_h",  # [cite: 150, 151, 152]
    "wsatiated_l",  # [cite: 153, 154, 155]
    "wsatiated_r",  # [cite: 153, 154, 155]
    "wsatiated_h",  # [cite: 153, 154, 155]
    "lep_l",  # [cite: 156, 157, 158, 159]
    "lep_r",  # [cite: 156, 157, 158, 159]
    "lep_h",  # [cite: 156, 157, 158, 159]
    "ll_l",  # [cite: 162, 163, 164]
    "ll_r",  # [cite: 162, 163, 164]
    "ll_h",  # [cite: 162, 163, 164]
    "pi_l",  # [cite: 165, 166, 167, 168]
    "pi_r",  # [cite: 165, 166, 167, 168]
    "pi_h",  # [cite: 165, 166, 167, 168]
    "aashind_l",  # [cite: 167, 168, 169, 170, 171]
    "aashind_r",  # [cite: 167, 168, 169, 170, 171]
    "aashind_h",  # [cite: 167, 168, 169, 170, 171]
    "kwfact",  # [cite: 171, 172]
    "kffact",  # [cite: 172, 173]
    "caco3_l",  # [cite: 173, 174, 175]
    "caco3_r",  # [cite: 173, 174, 175]
    "caco3_h",  # [cite: 173, 174, 175]
    "gypsum_l",  # [cite: 176, 177, 178]
    "gypsum_r",  # [cite: 176, 177, 178]
    "gypsum_h",  # [cite: 176, 177, 178]
    "sar_l",  # [cite: 179, 180, 181, 182]
    "sar_r",  # [cite: 179, 180, 181, 182]
    "sar_h",  # [cite: 179, 180, 181, 182]
    "ec_l",  # [cite: 185, 186, 187]
    "ec_r",  # [cite: 185, 186, 187]
    "ec_h",  # [cite: 185, 186, 187]
    "cec7_l",  # [cite: 188, 189, 190, 191]
    "cec7_r",  # [cite: 188, 189, 190, 191]
    "cec7_h",  # [cite: 188, 189, 190, 191]
    "ecec_l",  # [cite: 191, 192, 193, 194]
    "ecec_r",  # [cite: 191, 192, 193, 194]
    "ecec_h",  # [cite: 191, 192, 193, 194]
    "sumbases_l",  # [cite: 193, 194, 195, 196, 197]
    "sumbases_r",  # [cite: 193, 194, 195, 196, 197]
    "sumbases_h",  # [cite: 193, 194, 195, 196, 197]
    "ph1to1h2o_l",  # [cite: 197, 198, 199, 200]
    "ph1to1h2o_r",  # [cite: 197, 198, 199, 200]
    "ph1to1h2o_h",  # [cite: 197, 198, 199, 200]
    "ph01mcacl2_l",  # [cite: 201, 202, 203, 204]
    "ph01mcacl2_r",  # [cite: 201, 202, 203, 204]
    "ph01mcacl2_h",  # [cite: 201, 202, 203, 204]
    "freeiron_l",  # [cite: 205, 206, 207, 208, 209]
    "freeiron_r",
    "freeiron_h",
    "feoxalate_l",
    "feoxalate_r",
    "feoxalate_h",
    "extracid_l",
    "extracid_r",
    "extracid_h",
    "extral_l",
    "extral_r",
    "extral_h",
    "aloxalate_l",
    "aloxalate_r",
    "aloxalate_h",
    "pbray1_l",
    "pbray1_r",
    "pbray1_h",
    "poxalate_l",
    "poxalate_r",
    "poxalate_h",
    "ph2osoluble_l",
    "ph2osoluble_r",
    "ph2osoluble_h",
    "ptotal_l",
    "ptotal_r",
    "ptotal_h",
    "excavdifcl",
    "excavdifms",
    "cokey",
    "chkey",
]

In [18]:
df_1 = pd.read_csv(
    base_dir + tablename + ".txt", 
    delimiter="|",
    names=column_physical_names
)

In [20]:
df_1.head()

Unnamed: 0,hzname,desgndisc,desgnmaster,desgnmasterprime,desgnvert,hzdept_l,hzdept_r,hzdept_h,hzdepb_l,hzdepb_r,...,ph2osoluble_l,ph2osoluble_r,ph2osoluble_h,ptotal_l,ptotal_r,ptotal_h,excavdifcl,excavdifms,cokey,chkey
0,H1,,H,,1.0,,0,,,25,...,,,,,,,,,14178503,40777858
1,H2,,H,,2.0,,25,,,152,...,,,,,,,,,14178503,40777859
2,H1,,H,,1.0,,0,,,36,...,,,,,,,,,14178502,40777855
3,H2,,H,,2.0,,36,,,56,...,,,,,,,,,14178502,40777856
4,H3,,H,,3.0,,56,,,152,...,,,,,,,,,14178502,40777857


pH level using pH CaCI2 method based on this url https://www.dpi.nsw.gov.au/__data/assets/pdf_file/0003/167187/soil-ph.pdf

In [31]:
df_1.ph01mcacl2_l

count    0.0
mean     NaN
std      NaN
min      NaN
25%      NaN
50%      NaN
75%      NaN
max      NaN
Name: ph01mcacl2_l, dtype: float64

In [25]:
df_1["ph01mcacl2_l"].dtype

dtype('float64')

In [33]:
df_2 = pd.read_csv(
    "/Users/johnlingi/Downloads/AZ659/tabular/" + tablename + ".txt", 
    delimiter="|",
    names=column_physical_names
)

In [35]:
df_2.ph01mcacl2_l.describe()

count    5.000000
mean     7.080000
std      0.303315
min      6.800000
25%      7.000000
50%      7.000000
75%      7.000000
max      7.600000
Name: ph01mcacl2_l, dtype: float64