<a href="https://colab.research.google.com/github/BYU-Hydroinformatics/gwbf-notebooks/blob/main/3_GLSB_filter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#1.Distance Buffer

In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

def filter_wells_by_distance(relationships_df, distance_buffer=500):
    """
    Step 1: Filter wells based on distance to stream

    Parameters:
    -----------
    relationships_df : DataFrame
        Contains Well_ID, Reach_ID, Distance_to_Reach, etc.
    distance_buffer : float
        Maximum distance (meters) from well to stream

    Returns:
    --------
    DataFrame of wells within buffer distance
    """
    wells_in_buffer = relationships_df[
        relationships_df['Distance_to_Reach'] < distance_buffer
    ].copy()

    print("Distance Filter Summary:")
    print(f"Total wells: {len(relationships_df)}")
    print(f"Wells within {distance_buffer}m: {len(wells_in_buffer)}")
    print(f"Distance range: {wells_in_buffer['Distance_to_Reach'].min():.1f}m to {wells_in_buffer['Distance_to_Reach'].max():.1f}m")


    return wells_in_buffer



In [2]:
# Read input data
relationships_df = pd.read_csv('well_reach_relationships_final.csv')

In [3]:
# Step 1: Filter by distance
wells_in_distance_buffer = filter_wells_by_distance(
    relationships_df,
    distance_buffer=500
)

Distance Filter Summary:
Total wells: 8752
Wells within 500m: 4002
Distance range: 0.0m to 499.9m


In [4]:
wells_in_distance_buffer.head()

Unnamed: 0,Well_ID,Reach_ID,Reach_Elevation,Distance_to_Reach,Downstream_Gage
0,381033113480701,710579638.0,2018.0,15.997118,
1,381037113474001,710579638.0,2018.0,387.946514,
3,381236113485601,710258231.0,2033.5,95.71774,
4,382113113435401,710549872.0,1770.0,375.764745,
8,382445113501401,710272109.0,1959.5,1.905797,


# 2.Elevation buffer

In [5]:
import pandas as pd
try:
  final_measurements_delta = pd.read_csv('final_measurements_delta.csv')
  print(final_measurements_delta.head()) # Display the first few rows
except FileNotFoundError:
  print("Error: 'final_measurements_delta.csv' not found. Please check the filename and path.")


           Well_ID        Date       WTE  Delta_WTE
0  381033113480701  2012-09-06  7092.990      0.000
1  381037113474001  2012-09-06  7175.950      0.000
2  381152113442801  1995-11-22  6200.000      0.000
3  381236113485601  2014-07-23  7151.000      0.000
4  382113113435401  2018-05-25  5397.996      2.046


In [6]:
# Use the 'isin' method to filter final_measurements_delta
filtered_measurements = final_measurements_delta[
    final_measurements_delta['Well_ID'].isin(wells_in_distance_buffer['Well_ID'])
]

print(filtered_measurements.head())

           Well_ID        Date       WTE  Delta_WTE
0  381033113480701  2012-09-06  7092.990      0.000
1  381037113474001  2012-09-06  7175.950      0.000
3  381236113485601  2014-07-23  7151.000      0.000
4  382113113435401  2018-05-25  5397.996      2.046
5  382113113435401  2018-05-24  5397.995      2.045


In [8]:
def analyze_wte_measurements(wells_in_buffer, filtered_measurements, wte_buffer=10):
    """
    Step 2: Analyze WTE measurements using elevation threshold for all measurements

    Args:
        wells_in_buffer (pd.DataFrame): DataFrame with well information and reach data
        filtered_measurements (pd.DataFrame): DataFrame with WTE measurements and Delta_WTE
        wte_buffer (float): Buffer distance in meters

    Returns:
        pd.DataFrame: DataFrame with selected columns and filtered data
    """
    print("\nStarting WTE Analysis...")

    # 1. Merge all measurements with wells_in_buffer to get reach information
    merged_data = filtered_measurements.merge(
        wells_in_buffer,
        on='Well_ID',
        how='inner'
    )

    # 2. Apply elevation threshold filter to all measurements
    # Convert WTE to meters if needed (multiply by 0.3048)
    elevation_mask = merged_data['WTE'] * 0.3048 > (merged_data['Reach_Elevation'] - wte_buffer)
    filtered_data = merged_data[elevation_mask]

    # 3. Select and reorder required columns
    result = filtered_data[['Reach_ID', 'Reach_Elevation', 'Distance_to_Reach',
                          'Downstream_Gage', 'Well_ID', 'Date', 'WTE', 'Delta_WTE']]

    print("\nWTE Analysis Summary:")
    print(f"Total wells in buffer: {len(wells_in_buffer)}")
    print(f"Number of measurements exceeding threshold: {len(result)}")
    print(f"Number of unique wells in filtered data: {result['Well_ID'].nunique()}")

    return result

In [9]:
result_df = analyze_wte_measurements(wells_in_distance_buffer, filtered_measurements)


Starting WTE Analysis...

WTE Analysis Summary:
Total wells in buffer: 4002
Number of measurements exceeding threshold: 2696
Number of unique wells in filtered data: 13


In [10]:
result_df.head()

Unnamed: 0,Reach_ID,Reach_Elevation,Distance_to_Reach,Downstream_Gage,Well_ID,Date,WTE,Delta_WTE
0,710579638.0,2018.0,15.997118,,381033113480701,2012-09-06,7092.99,0.0
1,710579638.0,2018.0,387.946514,,381037113474001,2012-09-06,7175.95,0.0
2,710258231.0,2033.5,95.71774,,381236113485601,2014-07-23,7151.0,0.0
94047,710537950.0,1691.5,312.526215,,384025114032601,2010-03-17,5520.87,0.0
98891,710549851.0,1668.0,111.400463,,384236114042701,1953-08-03,5518.0,0.0


In [11]:
result_df.to_csv('result.csv', index=False)