# Intro

{Fill in with information about this notebook}

# Set Up notebook 

In [2]:
#Import modules
import numpy as np #Data manipulation
import pandas as pd #Point data manipulation and organization
import xarray as xr #Raster data manipulation and organization

import pathlib  #For filepaths, io, etc.
import os       #For several system-based commands
import datetime #For manipulation of time data, including file creation/modification times
import json     #For dictionary io, etc.

import matplotlib.pyplot as plt #For plotting and data vizualization
import geopandas as gpd         #For organization and manipulation of vector data in space (study area and some data points)
import rioxarray as rxr         #For orgnaization and manipulation of raster data
from scipy import interpolate
import shapely                  #For converting coordinates to point geometry
#Not sure if this cell is needed

In [3]:
#Scripts with functions made for this specific application
import w4h
import pathlib
import os
#Variables needed throughout, best to just assign now
todayDate, dateSuffix = w4h.getCurrentDate() 
repoDir = pathlib.Path(os.getcwd())

In [4]:
directoryDir = r'\\isgs-sinkhole.ad.uillinois.edu\geophysics\Balikian\BedrockWellData\Wells\RawWellData_OracleDatabase\TxtData\\'[:-1]
downholeDataPATH, headerDataPATH, xyzInPATH  = w4h.filesSetup(db_dir=directoryDir)
headerDataIN, downholeDataIN = w4h.readRawTxtData(downholefile=downholeDataPATH, headerfile=headerDataPATH) #Functions to read data into dataframes. Also excludes extraneous columns, and drops header data with no location information
xyzDataIN = w4h.readXYZData(xyzfile=xyzInPATH)
downholeData = w4h.defineDataTypes(downholeDataIN, dtypeFile='downholeDataTypes.txt') #Define datatypes of each column of the new dataframes
headerData = w4h.defineDataTypes(headerDataIN, dtypeFile='headerDataTypes.txt')#Define datatypes of each column of the new dataframes
xyzData = w4h.defineDataTypes(xyzDataIN, dtypeFile='xyzDataTypes.txt')
studyAreaPath = r"\\isgs-sinkhole.ad.uillinois.edu\geophysics\Balikian\ISWS_HydroGeo\WellDataAutoClassification\SampleData\ESL_StudyArea_5mi.shp"
studyAreaIN = w4h.read_study_area(studyAreaPath)
modelGridPath = r"\\isgs-sinkhole.ad.uillinois.edu\geophysics\Balikian\ISWS_HydroGeo\WellDataAutoClassification\SampleData\grid_625_raster.tif"
surfaceElevPath = r"\\isgs-sinkhole.ad.uillinois.edu\geophysics\Balikian\ISWS_HydroGeo\WellDataAutoClassification\SampleData\ILStateLidar_ClipExtentESL.tif"
bedrockElevPath = r"\\isgs-sinkhole.ad.uillinois.edu\geophysics\Balikian\ISWS_HydroGeo\WellDataAutoClassification\SampleData\ESLBedrock.tif"
modelGrid = w4h.read_grid(datapath=modelGridPath, grid_type='model', studyArea=studyAreaIN,  read_grid=True, clip2SA=True)#, gridcrs='EPSG:26715', studyAreacrs='EPSG:26715')
surfaceElevGridIN = w4h.read_grid(datapath=surfaceElevPath, grid_type='surface', studyArea=studyAreaIN, use_service=False, clip2SA=True)
bedrockElevGridIN = w4h.read_grid(datapath=bedrockElevPath, grid_type='bedrock', studyArea=studyAreaIN, use_service=False, clip2SA=True)
#Code here for adding in control points
headerData = w4h.addElevtoHeader(xyzData, headerData) #This probably needs to be updated
headerData = w4h.coords2Geometry(df=headerData, xCol='LONGITUDE', yCol='LATITUDE', crs='EPSG:4269')
headerData = w4h.clipHeader2StudyArea(studyarea=studyAreaIN, headerdata=headerData, headerCRS='EPSG:4269')
downholeData = w4h.removeNonlocatedData(downholeData, headerData)
headerData = w4h.removenotopo(df=headerData, printouts=True)
donwholeData = w4h.dropnodepth(downholeData, printouts=True) #Drop records with no depth information
donwholeData = w4h.dropbaddepth(downholeData, printouts=True)#Drop records with bad depth information (i.e., top depth > bottom depth) (Also calculates thickness of each record)
downholeData = w4h.dropnoformation(downholeData, printouts=True)
downholeData.reset_index(inplace=True,drop=True) #These may not be necessary
headerData.reset_index(inplace=True,drop=True) #These may not be necessary
downholeData = pd.merge(left = downholeData, right = headerData, on='API_NUMBER')
specTermsPATH, startTermsPATH = w4h.searchTermFilePaths(dictdir=str(repoDir)+'/resources/', specStartPattern='*SearchTerms-Specific*', startGlobPattern = '*SearchTerms-Start*')
specTerms = w4h.read_dictionary_terms(dict_file=specTermsPATH)
startTerms = w4h.read_dictionary_terms(dict_file=startTermsPATH)
oldDictPath = r"\\isgs-sinkhole.ad.uillinois.edu\geophysics\Balikian\WellData\Dictionaries\DICTIONARY_Updated-06-2018.csv"
oldDict = w4h.read_dictionary_terms(dict_file=oldDictPath, cols={'DESCRIPTION':'FORMATION', 'LITHOLOGY':'INTERPRETATION'}, class_flag=1)
specTerms = pd.concat([specTerms, oldDict])
specTerms.drop_duplicates(subset='FORMATION', inplace=True)
specTerms.reset_index(inplace=True, drop=True)
downholeData = w4h.specificDefine(downholeData, specTerms, printouts=True)
classifedDF, searchDF = w4h.splitDefined(downholeData)
searchDF = w4h.startDefine(df=searchDF, starterms=startTerms, printouts=True)
downholeData = w4h.remergeData(classifieddf=classifedDF, searchdf=searchDF)
classifedDF, searchDF = w4h.splitDefined(downholeData)
searchDF = w4h.depthDefine(searchDF, thresh=550, printouts=True)
downholeData = w4h.remergeData(classifieddf=classifedDF, searchdf=searchDF)
downholeData = w4h.fillUnclassified(downholeData)
#dictDir = "\\\\isgs-sinkhole\\geophysics\\Balikian\\ISWS_HydroGeo\\WellDataAutoClassification\\SupportingDocs\\"
targetInterpDF = w4h.readLithologies()
downholeData = w4h.mergeLithologies(downholedata=downholeData, targinterps=targetInterpDF)
wellsDF = w4h.get_unique_wells(downholeData)
downholeData = w4h.sort_dataframe(df=downholeData, sort_cols=['API_NUMBER','TOP'], remove_nans=True)
inGrids = [bedrockElevGridIN, surfaceElevGridIN]
bedrockGrid, surfaceGrid = w4h.alignRasters(unalignedGrids=inGrids, modelgrid=modelGrid)
driftThickGrid, layerThickGrid = w4h.get_drift_thick(surface=surfaceGrid, bedrock=bedrockGrid, noLayers=9, plotData=False)
headerData = w4h.sample_raster_points(raster=bedrockGrid, ptDF=headerData, newColName='BEDROCK_ELEV_FT')
headerData = w4h.sample_raster_points(raster=surfaceGrid, ptDF=headerData, newColName='SURFACE_ELEV_FT')
headerData = w4h.sample_raster_points(raster=driftThickGrid, ptDF=headerData, newColName='BEDROCK_DEPTH_FT')
headerData = w4h.sample_raster_points(raster=layerThickGrid, ptDF=headerData, newColName='LAYER_THICK_FT')
headerData = w4h.get_layer_depths(well_metadata=headerData, no_layers=9)
downholeData_layerInfo = w4h.merge_tables(data_df=downholeData,  data_cols=None, header_cols=None, header_df=headerData,on='API_NUMBER', how='inner', auto_pick_cols=True)
#downholeData = downholeData_layerInfo.copy()
resdf = w4h.layer_target_thick(downholeData_layerInfo, layers=9, outfile_prefix='CoarseFine')
layers_data = w4h.layer_interp(points=resdf, layers=9, grid=modelGrid, method='lin')
out_dir = r"\\isgs-sinkhole.ad.uillinois.edu\geophysics\Balikian\ISWS_HydroGeo\WellDataAutoClassification\ProcessedData"
w4h.export_grids(layers_data, out_path=out_dir)
#downholeData.to_csv(str(repoDir)+'/out/downholeData_cleaned'+dateSuffix+'.csv',index_label='ID')
#headerData.to_csv(str(repoDir)+'/out/headerData_cleaned'+dateSuffix+'.csv',index_label='ID')

Most Recent version of this file is : ISGS_DOWNHOLE_DATA_2023-01-06.txt
Most Recent version of this file is : ISGS_HEADER_2023-01-06.txt
Most Recent version of this file is : xyzData.csv
Using the following files:

\\isgs-sinkhole.ad.uillinois.edu\geophysics\Balikian\BedrockWellData\Wells\RawWellData_OracleDatabase\TxtData\ISGS_DOWNHOLE_DATA_2023-01-06.txt
\\isgs-sinkhole.ad.uillinois.edu\geophysics\Balikian\BedrockWellData\Wells\RawWellData_OracleDatabase\TxtData\ISGS_HEADER_2023-01-06.txt
\\isgs-sinkhole.ad.uillinois.edu\geophysics\Balikian\BedrockWellData\Wells\RawWellData_OracleDatabase\TxtData\xyzData.csv
Downhole Data has 3054409 valid well records.
Header Data has 636855 unique wells with valid location information.


  df.iloc[:,i] = dfIN.iloc[:,i].astype(dtypes[dfIN.iloc[:,i].name])
  return GeometryArray(vectorized.points_from_xy(x, y, z), crs=crs)


2998078 records removed without location information.
56331 wells remain from 7188 located wells in study area.
Well records removed: 0
Number of rows before dropping those without surface elevation information: 8150
Number of rows after dropping those without surface elevation information: 8150
Number of rows before dropping those without record depth information: 56331
Number of rows after dropping those without record depth information: 55747
Number of well records without formation information deleted: 584
Number of rows before dropping those with obviously bad depth information: 56331
Number of rows after dropping those with obviously bad depth information: 55725
Well records deleted: 606
Number of rows before dropping those without FORMATION information: 56331
Number of rows after dropping those without FORMATION information: 56331
Well records deleted: 0
Most Recent version of this file is : SearchTerms-Specific_2022-11-16_essCols.csv
Most Recent version of this file is : Search

  d.iloc[:,i] = d.iloc[:,i].astype(dict_termDtypes[d.iloc[:,i].name])
  d.iloc[:,i] = d.iloc[:,i].astype(dict_termDtypes[d.iloc[:,i].name])
  d.iloc[:,i] = d.iloc[:,i].astype(dict_termDtypes[d.iloc[:,i].name])


0                                clay brown
1                     clay dark brown, wood
2                     clay gray sandy, soft
3                         clay gray, sticky
4                 sandrock gray hard, dirty
                        ...                
56326               brown silt (levee fill)
56327                             gray clay
56328    gray clay (with thin sand streaks)
56329                       gray, fine sand
56330         gray, mottled with brown clay
Name: FORMATION, Length: 56331, dtype: object
0                                clay brown
1                     clay dark brown, wood
2                     clay gray sandy, soft
3                         clay gray, sticky
4                 sandrock gray hard, dirty
                        ...                
56326               brown silt (levee fill)
56327                             gray clay
56328    gray clay (with thin sand streaks)
56329                       gray, fine sand
56330         gray, mottled wi

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['CLASS_FLAG'].where(~df['FORMATION'].str.startswith(s,na=False),4,inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['INTERPRETATION'].where(~df['FORMATION'].str.startswith(s,na=False),starterms.loc[i,'INTERPRETATION'],inplace=True)


Records classified with start search term: 3586
Records classified with start search term: 14.49% of remaining data
Records classified as bedrock that were deeper than 550': 359
This represents 1.7% of the unclassified data in this dataframe.
Number of unique wells in downholeData: 7188
BEDROCK_ELEV_FT sampling should be done by 17:39
SURFACE_ELEV_FT sampling should be done by 17:39
BEDROCK_DEPTH_FT sampling should be done by 17:39
LAYER_THICK_FT sampling should be done by 17:39
API_NUMBER
LATITUDE
REMOVING LATITUDE
LONGITUDE
REMOVING LONGITUDE
BEDROCK_ELEV_FT
SURFACE_ELEV_FT
BEDROCK_DEPTH_FT
LAYER_THICK_FT
ELEV_FT
REMOVING ELEV_FT
LONGITUDE_PROJ
LATITUDE_PROJ
DEPTH_FT_LAYER1
DEPTH_FT_LAYER2
DEPTH_FT_LAYER3
DEPTH_FT_LAYER4
DEPTH_FT_LAYER5
DEPTH_FT_LAYER6
DEPTH_FT_LAYER7
DEPTH_FT_LAYER8
DEPTH_FT_LAYER9
ELEV_FT_LAYER1
ELEV_FT_LAYER2
ELEV_FT_LAYER3
ELEV_FT_LAYER4
ELEV_FT_LAYER5
ELEV_FT_LAYER6
ELEV_FT_LAYER7
ELEV_FT_LAYER8
ELEV_FT_LAYER9
geometry
REMOVING geometry
Index(['API_NUMBER', 'TAB