In [1]:
import os
import glob
import numpy as np
import pandas as pd

import geopandas as gpd
from shapely.geometry import Point

In [2]:
# Directory containing the CSV files
directory_path = r'C:\Users\betebari\Documents\C2VSim_Texture\AEM supporting data\coarse-Fractions'

# Get a list of all CSV files in the directory starting with "updated"
file_paths = glob.glob(os.path.join(directory_path, '*.csv'))

# Initialize an empty list to collect DataFrames
dataframes = []

for file_path in file_paths:
    try:
        # Read the CSV file with low_memory=False to suppress DtypeWarning
        df = pd.read_csv(file_path, encoding='latin1', low_memory=False)
        
        # Append the DataFrame to the list
        dataframes.append(df)
    except Exception as e:
        print(f"An error occurred while reading {file_path}: {e}")

# Concatenate all DataFrames in the list
combined_df = pd.concat(dataframes, ignore_index=True)

# Drop the specified columns
combined_df = combined_df.drop(columns=['GeophCTstd  ','GeophCFstd','MeanRes', 'GeophCTstd', 'Upper'\
                   , 'Lower', 'LINE_NO_pa','geometry','Interval'], errors='ignore')

# Drop rows where the GeophCF column has the value -9999 or NaN
combined_df = combined_df[combined_df['GeophCF'].notna() & (combined_df['GeophCF'] != -9999)]

# Multiply the GeophCF column by 100
combined_df['GeophCF'] = combined_df['GeophCF'] * 100

# Now the DataFrame has the renamed columns
print(combined_df)

         ModIndex       UTMX       UTMY   Topo  IntvBegin  IntvEnd  GeophCT  \
0               1 -189077.59   85408.04  109.0       0.00     5.00     4.83   
1               1 -189077.59   85408.04  109.0       5.00    10.00     4.66   
2               1 -189077.59   85408.04  109.0      10.00    15.00     4.79   
3               1 -189077.59   85408.04  109.0      15.00    21.00     5.76   
4               1 -189077.59   85408.04  109.0      21.00    27.00     5.81   
...           ...        ...        ...    ...        ...      ...      ...   
6867902     37652    7576.27 -180182.39   64.5     234.24   268.54     0.03   
6867903     37653    7603.90 -180163.55   64.5     234.24   268.54     2.62   
6867904     37654    7631.19 -180144.38   64.5     234.24   268.54     6.20   
6867905     37655    7658.25 -180124.52   64.5     234.24   268.54     6.67   
6867906     37656    7685.33 -180104.06   64.5     234.24   268.54     8.32   

         GeophCF   LINE_NO  
0           96.6  4110

In [3]:
# Unit Conversion meters to feet
combined_df['GSE_ft'] = combined_df['Topo'] * 3.28084
combined_df['INTERVALSTART'] = combined_df['IntvBegin'] * 3.28084
combined_df['INTERVALEND'] = combined_df['IntvEnd'] * 3.28084

In [4]:
# Drop specified columns
combined_df = combined_df.drop(columns=['Topo','IntvBegin','IntvEnd'])

In [5]:
# Rename column 'GeophCF' to 'CoarseFraction'
combined_df = combined_df.rename(columns={'GeophCF': 'AverageCoarseFraction'})

# Create the new 'WCRNUMBER' column by concatenating 'LINE_NO' and 'ModIndex' with a dash
combined_df['WCRNUMBER'] = combined_df['LINE_NO'].astype(str) + '-' + combined_df['ModIndex'].astype(str)

In [6]:
# Convert DataFrame to GeoDataFrame
combined_df = gpd.GeoDataFrame(
    combined_df,
    geometry=gpd.points_from_xy(combined_df['UTMX'], combined_df['UTMY']),
    crs="EPSG:3310"  # Original CRS
)

# WCR's locations 2 shapefile
combined_df.to_file('AEM_CF.shp', driver='ESRI Shapefile')

  combined_df.to_file('AEM_CF.shp', driver='ESRI Shapefile')


In [7]:
# Reproject to EPSG:26910
gdf_AEM_CF = combined_df.to_crs("EPSG:26910")

# Extract UTM X and Y coordinates from the geometry column
gdf_AEM_CF['UTMX'] = gdf_AEM_CF.geometry.x
gdf_AEM_CF['UTMY'] = gdf_AEM_CF.geometry.y

# Save or use the reprojected GeoDataFrame
print(gdf_AEM_CF)

# Optionally save the reprojected GeoDataFrame to a new file
gdf_AEM_CF.to_file("AEM_CF_26910.shp")

         ModIndex           UTMX          UTMY  GeophCT  \
0               1  571371.691816  4.291036e+06     4.83   
1               1  571371.691816  4.291036e+06     4.66   
2               1  571371.691816  4.291036e+06     4.79   
3               1  571371.691816  4.291036e+06     5.76   
4               1  571371.691816  4.291036e+06     5.81   
...           ...            ...           ...      ...   
6867902     37652  776651.190378  4.032220e+06     0.03   
6867903     37653  776678.278108  4.032240e+06     2.62   
6867904     37654  776705.015078  4.032260e+06     6.20   
6867905     37655  776731.500285  4.032281e+06     6.67   
6867906     37656  776757.986906  4.032302e+06     8.32   

         AverageCoarseFraction   LINE_NO     GSE_ft  INTERVALSTART  \
0                         96.6  411001.0  357.61156       0.000000   
1                         93.3  411001.0  357.61156      16.404200   
2                         95.7  411001.0  357.61156      32.808400   
3          

  gdf_AEM_CF.to_file("AEM_CF_26910.shp")


In [8]:
# Load the aquifer boundary shapefile
aquifer_boundary = gpd.read_file("C:/Users/betebari/Documents/C2VSim_Texture/OSWCR/central_val_buf_5mil_utm10n.shp")

# Load the shapefile to be clipped (AEM_CF_26910.shp)
gdf_AEM_CF = gpd.read_file("AEM_CF_26910.shp")

# Ensure both GeoDataFrames have the same CRS (coordinate reference system)
if aquifer_boundary.crs != gdf_AEM_CF.crs:
    gdf_AEM_CF = gdf_AEM_CF.to_crs(aquifer_boundary.crs)

# Clip the AEM_CF shapefile with the aquifer boundary
clipped_gdf = gpd.clip(gdf_AEM_CF, aquifer_boundary)

# Save the clipped GeoDataFrame to a new shapefile
clipped_gdf.to_file("AEM_CF_Clipped.shp")

print("Clipping complete. Clipped shapefile saved as 'AEM_CF_Clipped.shp'")

Clipping complete. Clipped shapefile saved as 'AEM_CF_Clipped.shp'


In [9]:
# Rename column 'GeophCF' to 'CoarseFraction'
clipped_gdf = clipped_gdf.rename(columns={'AverageCoa': 'AverageCoarseFraction'})
clipped_gdf = clipped_gdf.rename(columns={'INTERVALST': 'INTERVALSTART'})
clipped_gdf = clipped_gdf.rename(columns={'INTERVALEN': 'INTERVALEND'})

print(clipped_gdf)

         ModIndex           UTMX          UTMY  GeophCT  \
4289099      1548  870836.066828  3.870833e+06    18.72   
4030928      1548  870836.066828  3.870833e+06     6.99   
3498922      1548  870836.066828  3.870833e+06     0.01   
3605668      1548  870836.066828  3.870833e+06     0.01   
3978113      1548  870836.066828  3.870833e+06     4.66   
...           ...            ...           ...      ...   
1549349     78718  570624.467431  4.500909e+06     5.07   
1549348     78718  570624.467431  4.500909e+06     2.74   
1549347     78718  570624.467431  4.500909e+06     1.63   
1549346     78718  570624.467431  4.500909e+06     1.16   
1549356     78718  570624.467431  4.500909e+06    10.55   

         AverageCoarseFraction   LINE_NO       GSE_ft  INTERVALSTART  \
4289099                   80.4       NaN  3508.530296     506.299229   
4030928                   57.3       NaN  3508.530296     242.749352   
3498922                    0.1       NaN  3508.530296      16.404200   
360

In [None]:
# The table provided as a DataFrame for mapping purposes
coarse_fraction_table = pd.DataFrame({
    'Coarse Fraction Range': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
    'Average Hydraulic Conductivity (ft/day)': [0.136152327, 2.134365227, 13.42049058, 34.51476016, 87.73306091, 
                                                111.1681744, 124.2637375, 92.77831111, 13.80333333, 13.80333333],
    'Avg Specific Yield (%)': [6.538461538, 7.71875, 7, 11.75, 11.83333333, 
                               18.625, 17, 20, 18, 15],
    'Avg Kv (ft/day)': [0.0020002, 0.0050005, 0.02750275, 0.050005, 0.050005, 
                        0.350035, 0.4252675, 0.5005, 0.5005, 0.5005]
})

# Assuming clipped_gdf already contains 'AverageCoarseFraction'
# We will interpolate the values based on 'AverageCoarseFraction'

def interpolate_values(row):
    coarse_fraction = row['AverageCoarseFraction']
    
    # Interpolate for each of the columns based on coarse fraction
    avg_k = np.interp(coarse_fraction, coarse_fraction_table['Coarse Fraction Range'], 
                      coarse_fraction_table['Average Hydraulic Conductivity (ft/day)'])
    
    avg_sy = np.interp(coarse_fraction, coarse_fraction_table['Coarse Fraction Range'], 
                       coarse_fraction_table['Avg Specific Yield (%)'])
    
    avg_kv = np.interp(coarse_fraction, coarse_fraction_table['Coarse Fraction Range'], 
                       coarse_fraction_table['Avg Kv (ft/day)'])
    
    return pd.Series([avg_k, avg_sy, avg_kv], 
                     index=['Average Hydraulic Conductivity (ft/day)', 'Avg Specific Yield (%)', 'Avg Kv (ft/day)'])

# Apply the interpolation to each row in clipped_gdf
clipped_gdf[['Average Hydraulic Conductivity (ft/day)', 'Avg Specific Yield (%)', 'Avg Kv (ft/day)']] = clipped_gdf.apply(interpolate_values, axis=1)

# Now your clipped_gdf will have interpolated values for Hydraulic Conductivity, Specific Yield, and Kv
print(clipped_gdf[['Average Hydraulic Conductivity (ft/day)', 'Avg Specific Yield (%)', 'Avg Kv (ft/day)']])


In [None]:
# Get the number of unique values in the 'WCRNUMBER' column
unique_ModIndex_count = clipped_gdf['WCRNUMBER'].nunique()
print(f"Number of unique WCRNUMBER values: {unique_ModIndex_count}")
print(clipped_gdf)

In [None]:
# Drop specified columns
clipped_gdf = clipped_gdf.drop(columns=['geometry'])

In [None]:
# Save the updated DataFrame to a new CSV file
output_file = 'C:/Users/betebari/Documents/C2VSim_Texture/OSWCR/7-updated_AEM_CF.csv'
clipped_gdf.to_csv(output_file, index=False)

print(f"combined CSV file saved as '{output_file}'")