In [1]:
import sys
sys.path.append('..')

In [2]:
import numpy as np
import pandas as pd
import netCDF4 as nc
import matplotlib.pyplot as plt
import utils
import utils_preprocess
import utils_spatial_interpolation
import utils_spatial_analysis

from utils_satellite_imputation import satellite_imputation
from utils_iterative_refinement import iterative_refinement

2023-12-17 02:35:22.914576: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-17 02:35:22.949601: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-17 02:35:22.950254: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Step 0: Load shapefile

In [None]:
# Load the shapefile
path_shape = '/home/saul/workspace/groundwater_well_imputation/groundwater_imputation_api/src/imputation_api/artifacts/aquifer_shapes/Beryl_Enterprise.shp'
aquifer_shape = utils.load_shapefile(path=path_shape)

# STEP 1: Preprocessing PDSI and GLDAS data into Tabular Format

In [None]:
# Process the pdsi netcdf files to obtain tabular data pickle file
pdsi_source_directory = r'C:\Users\saulg\Desktop\Remote_Data\pdsi'
pdsi_target_directory = r'C:\Users\saulg\Desktop\Remote_Data\pdsi_tabular'

utils_preprocess.process_pdsi_data(
    source_directory=pdsi_source_directory, 
    target_directory=pdsi_target_directory,
    date_start='01/01/1850',
    date_end='12/31/2020',
    )

In [None]:
# Process the gldas netcdf files to obtain tabular data pickle file
gldas_source_directory = r'C:\Users\saulg\Desktop\Remote_Data\GLDAS'
gldas_target_directory = r'C:\Users\saulg\Desktop\Remote_Data\gldas_tabular'

utils_preprocess.process_gldas_data(
    source_directory=gldas_source_directory, 
    target_directory=gldas_target_directory,
    )

# Step 2: Transform PDSI, GLDAS, and Well Observations into format for ML

### Process PDSI

In [None]:
# Parse pdsi data and save it
directory_pdsi = r"/mnt/c/Users/saulg/Desktop/Remote_Data/pdsi_tabular"

pdsi:dict = utils.pull_relevant_data(
    shape=aquifer_shape, 
    dataset_name="PDSI", 
    dataset_directory=directory_pdsi
    )

utils.save_pickle(
    data=pdsi, 
    file_name="pdsi_data.pickle", 
    directory="/home/saul/workspace/Well_Imputation/groundwater_imputation_api/src/imputation_api/artifacts/dataset_outputs",
    protocol=3)

### Process GLDAS

In [None]:
# Parse the GLDAS data and save it
directory_gldas = r"/mnt/c/Users/saulg/Desktop/Remote_Data/gldas_tabular"

gldas:dict = utils.pull_relevant_data(
    shape=aquifer_shape, 
    dataset_name="GLDAS", 
    dataset_directory=directory_gldas
    )

utils.save_pickle(
    data=gldas, 
    file_name="gldas_data.pickle", 
    directory="/home/saul/workspace/Well_Imputation/groundwater_imputation_api/src/imputation_api/artifacts/dataset_outputs",
    protocol=3)

### Process Well Observations

In [None]:
# Process well data from csv files
well_locations = pd.read_csv("/home/saul/workspace/groundwater_well_imputation/groundwater_imputation_api/src/imputation_api/artifacts/aquifer_data/EscalanteBerylLocation.csv")
well_timeseries = pd.read_csv("/home/saul/workspace/groundwater_well_imputation/groundwater_imputation_api/src/imputation_api/artifacts/aquifer_data/EscalanteBerylTimeseries.csv")

data:dict = utils.transform_well_data(
    well_timeseries=well_timeseries, 
    well_locations=well_locations,
    timeseries_name="timeseries",
    locations_name="locations",
    )

utils.save_pickle(
    data=data, 
    file_name="beryl_enterprise_data.pickle", 
    directory="/home/saul/workspace/Well_Imputation/groundwater_imputation_api/src/imputation_api/artifacts/dataset_outputs", 
    protocol=3,
    )


In [None]:
# Plot the timeseries data to see if it looks reasonable
plt.plot(data["timeseries"], '-.')
plt.show()

# Step 3: Develop initial imputation model

In [None]:
satellite_imputation(
    aquifer_name = "Beryl Enterprise",
    pdsi_pickle = "/home/saul/workspace/groundwater_well_imputation/groundwater_imputation_api/src/imputation_api/artifacts/dataset_outputs/pdsi_data.pickle",
    gldas_pickle = "/home/saul/workspace/groundwater_well_imputation/groundwater_imputation_api/src/imputation_api/artifacts/dataset_outputs/gldas_data.pickle",
    well_data_pickle = "/home/saul/workspace/groundwater_well_imputation/groundwater_imputation_api/src/imputation_api/artifacts/dataset_outputs/beryl_enterprise_data.pickle",
    output_file = "beryl_enterpris_imputation_satellite.pickle",
    timeseries_name="timeseries",
    locations_name="locations",
    validation_split = 0.3,
    folds = 5,
    )

# Step 4: Develop iterative refinement model

In [3]:
iterative_refinement(
    aquifer_name = "Beryl Enterprise",
    imputed_data_pickle = "beryl_enterpris_imputation_satellite.pickle",
    output_file = "beryl_enterprise_iterative.pickle",
    validation_split = 0.3,
    folds = 5,
    feature_threshold = 0.60,
    )

INFO:root:Starting iteration 1 of 2
INFO:utils:Pickle file 'beryl_enterpris_imputation_satellite.pickle' loaded successfully from '/home/saul/workspace/groundwater_well_imputation/groundwater_imputation_api/src/imputation_api/artifacts/dataset_outputs'
  0%|          | 0/2 [00:00<?, ?it/s]INFO:root:Starting imputation for well: 373338113431502
INFO:root:Starting k-fold cross validation for well: 373338113431502
2023-12-17 02:35:34.303299: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:09:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-12-17 02:35:34.325741: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1960] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your



INFO:root:Finished k-fold cross validation for well: 373338113431502
INFO:root:Starting model training for well: 373338113431502




INFO:root:Finished model training for well: 373338113431502
 50%|█████     | 1/2 [00:17<00:17, 17.64s/it]INFO:root:Starting imputation for well: 373418113430601
INFO:root:Starting k-fold cross validation for well: 373418113430601




INFO:root:Finished k-fold cross validation for well: 373418113430601
INFO:root:Starting model training for well: 373418113430601




INFO:root:Finished model training for well: 373418113430601
INFO:utils:Pickle file 'beryl_enterprise_iterative_1.pickle' saved successfully to '/home/saul/workspace/groundwater_well_imputation/groundwater_imputation_api/src/imputation_api/artifacts/dataset_outputs'
INFO:root:Finished imputation for Beryl Enterprise aquifer
INFO:root:Added the following data to the data dictionary: Data, Predictions, Locations, Metrics, Correlations
INFO:root:Saved data dictionary to beryl_enterprise_iterative_1.pickle
INFO:root:Starting iteration 2 of 2
INFO:utils:Pickle file 'beryl_enterpris_imputation_satellite.pickle' loaded successfully from '/home/saul/workspace/groundwater_well_imputation/groundwater_imputation_api/src/imputation_api/artifacts/dataset_outputs'
  0%|          | 0/2 [00:00<?, ?it/s]INFO:root:Starting imputation for well: 373338113431502
INFO:root:Starting k-fold cross validation for well: 373338113431502




INFO:root:Finished k-fold cross validation for well: 373338113431502
INFO:root:Starting model training for well: 373338113431502




INFO:root:Finished model training for well: 373338113431502
 50%|█████     | 1/2 [00:18<00:18, 18.48s/it]INFO:root:Starting imputation for well: 373418113430601
INFO:root:Starting k-fold cross validation for well: 373418113430601




INFO:root:Finished k-fold cross validation for well: 373418113430601
INFO:root:Starting model training for well: 373418113430601




INFO:root:Finished model training for well: 373418113430601
INFO:utils:Pickle file 'beryl_enterprise_iterative_2.pickle' saved successfully to '/home/saul/workspace/groundwater_well_imputation/groundwater_imputation_api/src/imputation_api/artifacts/dataset_outputs'
INFO:root:Finished imputation for Beryl Enterprise aquifer
INFO:root:Added the following data to the data dictionary: Data, Predictions, Locations, Metrics, Correlations
INFO:root:Saved data dictionary to beryl_enterprise_iterative_2.pickle


# Step 5: Analyze spatial characteristics of imputation model

In [None]:
utils_spatial_interpolation.kriging_interpolation(
    data_pickle_path = "/home/saul/workspace/groundwater_well_imputation/groundwater_imputation_api/src/imputation_api/artifacts/dataset_outputs/beryl_enterpris_imputation_iteration_1.pickle",
    shape_file_path = '/home/saul/workspace/groundwater_well_imputation/groundwater_imputation_api/src/imputation_api/artifacts/aquifer_shapes/Beryl_Enterprise.shp',
    n_x_cells=100,
    influence_distance=0.125,
    monthly_time_step=1,
    netcdf_filename="beryl_enterprise_spatial_analysis_iteration_1.nc",
    directory="/home/saul/workspace/groundwater_well_imputation/groundwater_imputation_api/src/imputation_api/artifacts/dataset_outputs"
    )

# Step 6: Calculate Storage Change

In [None]:
raster = nc.Dataset(
    "/home/saul/workspace/groundwater_well_imputation/groundwater_imputation_api/src/imputation_api/artifacts/dataset_outputs/beryl_enterprise_spatial_analysis_iteration_1.nc",
    'r',
    )

spatial_analysis = utils_spatial_analysis.StorageChangeCalculator(
    units="English",
    storage_coefficient=0.2,
    anisotropic="x",
)
storage_change = spatial_analysis.calulate_storage_curve(
    raster=raster, 
    #date_range_filter=("1948-01-01", "1978-01-01"), # if you need to filter dates within of original time range
    )

plt.plot(storage_change, '-.')