### Purpose

The purpose of this notebook is to quantify spatial relationships between oil and gas wells and saltwater injection wells (SWIW) across the study area.

### Load Libraries

In [1]:
import pandas as pd
import numpy as np
from geopy.distance import geodesic
import geopandas as gpd
from shapely.geometry import Point
from tqdm import tqdm

### Load Data

In [2]:
well_info = pd.read_csv('../data/final/well_information_final.csv')
permits = pd.read_csv('../data/final/permit_plug_list_final.csv')

### Create GeoDataFrames for each dataset

In [3]:
well_info = well_info.dropna(subset=['WELL LATITUDE', 'WELL LONGITUDE'])
permits = permits.dropna(subset=['WH_LAT', 'WH_LONG'])

well_gdf = gpd.GeoDataFrame(
    well_info,
    geometry=gpd.points_from_xy(
        well_info['WELL LONGITUDE'], well_info['WELL LATITUDE']
    ),
    crs='EPSG:4326'
)

permit_gdf = gpd.GeoDataFrame(
    permits,
    geometry=gpd.points_from_xy(
        permits['WH_LONG'], permits['WH_LAT']
    ),
    crs='EPSG:4326'
)


### Identify (SWIW)

In [4]:
saltwater_gdf = permit_gdf[
    permit_gdf['PROP_WELL_CLASS'].str.contains('SWIW', case=False, na=False)
]

print(f"Total wells: {len(well_gdf)}")
print(f"Saltwater wells: {len(saltwater_gdf)}")

Total wells: 73607
Saltwater wells: 303


### Compute pairwise distances between wells

In [9]:
def nearest_distance(source_gdf, target_gdf, src_col, target_col):
    target_sindex = target_gdf.sindex
    nearest_list = []

    for idx, src_row in tqdm(source_gdf.iterrows(), total=len(source_gdf), desc=f'Finding nearest {target_col}'):
        nearest_idx = list(target_sindex.nearest(src_row.geometry, 1))
        nearest_geom = target_gdf.iloc[nearest_idx[0]].geometry

        distance_km = src_row.geometry.distance(nearest_geom) * 111

        nearest_list.append({
            src_col: src_row[src_col],
            f'Nearest_{target_col}_km': distance_km
        })

    return pd.DataFrame(nearest_list)

### Compute Nearest Wells and Export

In [11]:
nearest_swiw_df = nearest_distance(well_gdf, saltwater_gdf, 'API WELL NUMBER', 'SWIW')
nearest_swiw_df.to_csv('../data/distance/well_to_nearest_saltwater.csv', index=False)

nearest_well_df = nearest_distance(well_gdf, well_gdf, 'API WELL NUMBER', 'Well')
nearest_well_df.to_csv('../data/distance/well_to_nearest_well.csv', index=False)

Finding nearest SWIW: 100%|█████████████| 73607/73607 [01:11<00:00, 1027.96it/s]
Finding nearest Well: 100%|█████████████| 73607/73607 [01:11<00:00, 1033.17it/s]
