In [None]:
# Import required libraries
import geopandas as gpd
import pandas as pd
from shapely.ops import nearest_points
import networkx as nx
from shapely.geometry import Point, MultiPoint, LineString, MultiLineString, GeometryCollection
import requests
import zipfile
import os
import io
from io import StringIO
from io import BytesIO
from matplotlib import pyplot as plt
from shapely.ops import snap, nearest_points

In [None]:
# User defined variables
my_training_state = 'MT' # State USPS abbreviation
state_name = 'Montana'
my_nas_id = 5
# IA = 19; ID = 16; IL = 17; MN = 27; MO = 29; MT = 30; OR = 41;  WA = 53; WI = 55
state_fips = '30' # Replace last 2 digits with your state's FIP code
my_path = 'data/' + my_training_state + '/' # leave this alone   

In [None]:
# Download road shapefile
road_url = f'https://www2.census.gov/geo/tiger/TIGER2022/PRISECROADS/tl_2022_{state_fips}_prisecroads.zip'
local_path = my_path
print('Downloading shapefile...')
r = requests.get(road_url)
z = zipfile.ZipFile(io.BytesIO(r.content))
print("Done")
z.extractall(path=local_path) # extract to folder
filenames = [y for y in sorted(z.namelist()) for ending in ['dbf', 'prj', 'shp', 'shx'] if y.endswith(ending)] 
print(filenames)
#Download USGS boat access data 
# URL of the compressed file containing multiple files
URL_BASE = "https://www.sciencebase.gov/catalog/file/get/63b81b50d34e92aad3cc004d?facet=Boatramps_United_States_final_20230104"

# Define the desired extensions
desired_extensions = ['.dbf', '.prj', '.shp', '.shx']

# Download the file
response = requests.get(URL_BASE)

# Check if the request was successful
if response.status_code == 200:
    # Save the content to a temporary location (e.g., in memory)
    zip_file = BytesIO(response.content)

    # Extract the ZIP file contents to a folder
    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        # Create a directory to store the extracted files
        os.makedirs(my_path, exist_ok=True)
        
        # Loop through the files in the zip and extract only the desired ones
        for file_name in zip_ref.namelist():
            if any(file_name.endswith(ext) for ext in desired_extensions):
                # Extract the file
                zip_ref.extract(file_name, my_path)
                print(f"Extracted: {file_name}")

    print("Files extracted successfully to my_path directory.")
else:
    print(f"Failed to download file. Status code: {response.status_code}")


In [None]:
def nas_api_call(nas_id):
    URL_BASE = 'http://nas.er.usgs.gov/api/v2/'
    url_request = f"{URL_BASE}/occurrence/search?species_ID={nas_id}"
    response = requests.get(url_request, timeout=None).json()
    results = pd.json_normalize(response, 'results')
    return results

def get_intersection_points(line_gdf, polygon_gdf):
    """
    Returns a GeoDataFrame with intersection points between a line and a polygon.
    
    Parameters:
    - line_gdf (GeoDataFrame): A GeoDataFrame containing LineString geometries.
    - polygon_gdf (GeoDataFrame): A GeoDataFrame containing Polygon geometries.

    Returns:
    - GeoDataFrame: A GeoDataFrame containing the intersection points as Point geometries.
    """
    
    # Ensure CRS match
    if line_gdf.crs != polygon_gdf.crs:
        polygon_gdf = polygon_gdf.to_crs(line_gdf.crs)

    # Compute intersection
    intersection = line_gdf.unary_union.intersection(polygon_gdf.unary_union)

    # Extract intersection points
    points = []

    def extract_points(geom):
        """ Recursively extract points from geometry objects. """
        if geom.is_empty:
            return
        if isinstance(geom, Point):
            points.append(geom)
        elif isinstance(geom, MultiPoint):
            points.extend(geom.geoms)
        elif isinstance(geom, (LineString, MultiLineString)):
            points.extend(Point(coord) for coord in geom.coords)
        elif isinstance(geom, GeometryCollection):
            for sub_geom in geom.geoms:
                extract_points(sub_geom)

    extract_points(intersection)

    # Create and return a GeoDataFrame of points
    return gpd.GeoDataFrame(geometry=points, crs=line_gdf.crs) if points else gpd.GeoDataFrame(columns=['geometry'], crs=line_gdf.crs)





# Function to get the endpoints of a line geometry
def get_endpoints(geometry):
    # Ensure the geometry is a LineString
    if geometry.geom_type == 'LineString':
        # Get the first and last coordinate of the line
        return [geometry.coords[0], geometry.coords[-1]]
    return []

def snap_points_to_nearest_poly(poly: gpd.GeoDataFrame, point: gpd.GeoDataFrame, snapdist: float) -> gpd.GeoDataFrame:
    """
    Snaps points to the nearest polygon within a given distance.
    
    Parameters:
        poly (GeoDataFrame): GeoDataFrame containing polygon geometries.
        point (GeoDataFrame): GeoDataFrame containing point geometries.
        snapdist (float): Maximum search distance to find the nearest polygon.
    
    Returns:
        GeoDataFrame: Updated point GeoDataFrame with snapped geometries.
    """
    # Ensure both datasets use the same projected CRS (Sweden EPSG:3006 in original, changed to 26915)
    poly = poly.to_crs(3857)
    point = point.set_crs(3857)
    
    # Create unique IDs
    poly["polyid"] = range(poly.shape[0])
    point["pointid"] = range(point.shape[0])
    
    # Store original polygon geometry
    poly["polygeom"] = poly.geometry
    
    # Perform spatial join to find nearest polygons within snap distance
    sj = gpd.sjoin_nearest(left_df=point, right_df=poly, how="left", max_distance=snapdist)
    
    # Measure distances (set to None if no polygon within snapdistance)
    sj["distance"] = sj.apply(lambda x: x.geometry.distance(x.polygeom) if x.polygeom is not None else None, axis=1)
    
    # Sort by distance and drop duplicates (keeping closest polygon match)
    sj = sj.sort_values(by=["pointid", "distance"], ascending=True, na_position="last")
    sj = sj.drop_duplicates(subset="pointid", keep="first")
    
    # Find the nearest point on the polygon
    sj["nearestpoint"] = sj.apply(
        lambda x: nearest_points(x.geometry, x.polygeom)[1] if (x.polygeom is not None and x.distance is not None) else None,
        axis=1
    )
    
    # Snap points to nearest point on polygon if applicable
    sj["geometry"] = sj.apply(
        lambda x: snap(x.geometry, x.nearestpoint, snapdist) if x.nearestpoint is not None else x.geometry,
        axis=1
    )
    
    return sj

def sjoin_nearest_replace_geom(left_gdf, right_gdf, **kwargs):
    """
    Performs a spatial join (nearest) and replaces the geometry of left_gdf 
    with the geometry of right_gdf while retaining left_gdf attributes 
    and carrying over the 'epointID' column from right_gdf.

    Parameters:
    - left_gdf (GeoDataFrame): The GeoDataFrame with attributes to keep.
    - right_gdf (GeoDataFrame): The GeoDataFrame whose geometry will replace the left_gdf geometry.
    - **kwargs: Additional arguments for gpd.sjoin_nearest (e.g., max_distance).

    Returns:
    - GeoDataFrame: Resulting GeoDataFrame with left_gdf attributes, right_gdf geometry, and epointID.
    """
    # Perform spatial join (nearest)
    joined = gpd.sjoin_nearest(left_gdf, right_gdf, how="left", **kwargs)

    # Ensure 'geometry_right' exists (GeoPandas renames conflicting geometry columns)
    if "geometry_right" not in joined.columns:
        joined = joined.rename(columns={"geometry": "geometry_right"})

    # Replace the left geometry with the nearest right geometry
    joined["geometry"] = joined["geometry_right"]

    # Keep original left_gdf columns + 'epointID' from right_gdf
    cols_to_keep = list(left_gdf.columns) + ["epointID"]
    joined = joined[cols_to_keep]

    return joined

def get_boundary_intersection_points(line_gdf, polygon_gdf):
    """
    Returns a GeoDataFrame with intersection points where a line intersects the boundary of a polygon.

    Parameters:
    - line_gdf (GeoDataFrame): A GeoDataFrame containing LineString geometries.
    - polygon_gdf (GeoDataFrame): A GeoDataFrame containing Polygon geometries.

    Returns:
    - GeoDataFrame: A GeoDataFrame containing the intersection points as Point geometries.
    """
    
    # Ensure CRS match
    if line_gdf.crs != polygon_gdf.crs:
        polygon_gdf = polygon_gdf.to_crs(line_gdf.crs)

    # Extract only the polygon boundaries (exterior rings)
    polygon_boundaries = polygon_gdf.boundary

    # Compute intersection between roads and polygon boundaries
    intersection = line_gdf.unary_union.intersection(polygon_boundaries.unary_union)

    # Extract intersection points
    points = []

    def extract_points(geom):
        """ Recursively extract points from geometry objects. """
        if geom.is_empty:
            return
        if isinstance(geom, Point):
            points.append(geom)
        elif isinstance(geom, MultiPoint):
            points.extend(geom.geoms)
        elif isinstance(geom, (LineString, MultiLineString)):
            points.extend(Point(coord) for coord in geom.coords)
        elif isinstance(geom, GeometryCollection):
            for sub_geom in geom.geoms:
                extract_points(sub_geom)

    extract_points(intersection)

    # Create and return a GeoDataFrame of points
    return gpd.GeoDataFrame(geometry=points, crs=line_gdf.crs) if points else gpd.GeoDataFrame(columns=['geometry'], crs=line_gdf.crs)

# Define a function to compute the nearest distance for each point in gdf1
def nearest_distance(row, gdf2):
    # Find the nearest point in gdf2
    nearest_geom = nearest_points(row.geometry, gdf2.unary_union)[1]
    # Return the distance to the nearest point
    return row.geometry.distance(nearest_geom)

In [None]:
# Get locations of your AIS.  
#If your AIS is not present in your state of interest you could identify the points where the road layer intersects your state border and use those
pos_data = nas_api_call(my_nas_id)
my_data = pos_data[["decimalLatitude", "decimalLongitude"]]
pos_data_gdf = gpd.GeoDataFrame(
    my_data, geometry=gpd.points_from_xy(my_data.decimalLongitude, my_data.decimalLatitude)).dropna().set_crs(3857)#.to_crs(5070)

In [None]:
# Get points where roads intersect your state boundary
my_roads = gpd.read_file(my_path + 'tl_2022_' + state_fips + '_prisecroads.shp').set_crs(3857, allow_override = True)
state_boundary = gpd.read_file(my_path + 'tl_2012_us_state.shp').set_crs(3857, allow_override = True)
my_boundary = state_boundary[state_boundary['STUSPS'] == my_training_state]

In [None]:
intersection_points_gdf = get_boundary_intersection_points(my_roads, my_boundary)
intersection_points_gdf['intID'] = range(1, len(intersection_points_gdf) + 1)

In [None]:
# Apply the function to each row in gdf1
intersection_points_gdf['distance_to_nearest'] = intersection_points_gdf.apply(nearest_distance, axis=1, gdf2=pos_data_gdf)

In [None]:
# Create the plot to spot check your files so far
fig, ax = plt.subplots(figsize=(10, 10))
# Plot the boundary
my_boundary.plot(ax=ax, edgecolor='red', facecolor='none', linewidth=2, label="State Boundary")
# Plot the roads
my_roads.plot(ax=ax, color='blue', linewidth=1, label="Roads")

intersection_points_gdf.plot(ax=ax, color='yellow', label="Intersections")

# Add a legend
ax.legend()

# Add a title
ax.set_title("Roads and State Boundary", fontsize=14)

# Show the plot
plt.show()

In [None]:
# Identify road endpoints; remove duplicates
# Extract endpoints for all lines
endpoints = my_roads['geometry'].apply(get_endpoints).explode()
# Convert the list of endpoints to a DataFrame
endpoints_df = pd.DataFrame(endpoints.tolist(), columns=['x', 'y'])
# Remove duplicate points
endpoints_df = endpoints_df.drop_duplicates()
# Optionally, you can create a GeoDataFrame for the endpoints
endpoints_gdf = gpd.GeoDataFrame(endpoints_df, geometry=gpd.points_from_xy(endpoints_df['x'], endpoints_df['y'])).set_crs(3857, allow_override = True).drop(columns = ['x', 'y'])
endpoints_gdf['epointID'] = range(1, len(endpoints_gdf) + 1)

In [None]:
# Import buffered water
buffered_water = gpd.read_file(my_path + my_training_state + "_buffered_water.shp").set_crs(3857, allow_override = True)
# Join ramps to water to get waterbodyID
buffered_water["Present"] = 0.0
buffered_water['waterID'] = range(1, len(buffered_water) + 1)

In [None]:
ramps = gpd.read_file(my_path + 'Boatramps_United_States_final_20230104.shp').set_crs(3857, allow_override=True)
my_ramps = ramps.loc[ramps['State'] == state_name]
ramp_geo = my_ramps[['geometry']]
ramp_geo['rampID'] = range(1, len(ramp_geo) + 1)

In [None]:
# Identify ramps that are within waterbodies and snap unjoinable ramps to the nearest waterbody within maximum distance.
ramps_in_water = ramp_geo.sjoin(buffered_water, how="left", predicate="within")
ramps_not_in_water = ramps_in_water[ramps_in_water['index_right'].isna()].drop(columns=["index_right"], errors="ignore").copy()
ramps_in_water = ramps_in_water.dropna(subset=["index_right"]).drop(columns=["index_right"], errors="ignore").set_crs(3857, allow_override = True)
ramps_in_water["waterID"] = ramps_in_water["waterID"].astype("int64")
snapped_ramps = snap_points_to_nearest_poly(buffered_water, ramps_not_in_water, 1000)
ramps_to_add = snapped_ramps[['rampID', 'waterID_right', 'nearestpoint']].rename(columns={'waterID_right': 'waterID', 'nearestpoint': 'geometry'}).set_crs(3857, allow_override = True)
my_ramps = pd.concat([ramps_in_water, ramps_to_add])

In [None]:
# Identify water without ramps; identify point on polygon perimeter closest to a road endpoint
water_w_ramps_list = my_ramps['waterID'].tolist()
water_no_ramps = buffered_water[~buffered_water['waterID'].isin(water_w_ramps_list)]
ramps_in_water_sj = sjoin_nearest_replace_geom(my_ramps, endpoints_gdf)
lakes_no_ramp_sj = sjoin_nearest_replace_geom(water_no_ramps, endpoints_gdf)
my_endpoints = pd.concat([ramps_in_water_sj, lakes_no_ramp_sj])

In [None]:
joined = gpd.sjoin_nearest(my_endpoints, intersection_points_gdf, how="left")

In [None]:
# Calculate road distance
source_gdf = my_endpoints.set_crs(3857, allow_override = True)
target_gdf = intersection_points_gdf.set_crs(3857, allow_override = True)
network_gdf = my_roads.set_crs(3857, allow_override = True)

# Ensure all GeoDataFrames use the same CRS
if not (source_gdf.crs == target_gdf.crs == network_gdf.crs):
    target_gdf = target_gdf.to_crs(source_gdf.crs)
    network_gdf = network_gdf.to_crs(source_gdf.crs)

# Build a graph from the polyline network
def build_network(gdf):
    G = nx.Graph()
    for _, row in gdf.iterrows():
        line = row.geometry
        if isinstance(line, LineString):
            coords = list(line.coords)
            for i in range(len(coords) - 1):
                G.add_edge(
                    coords[i],
                    coords[i + 1],
                    weight=Point(coords[i]).distance(Point(coords[i + 1]))
                )
    return G

network_graph = build_network(network_gdf)

# Helper function to find the nearest node in the network to a given point
def find_nearest_node(graph, point):
    nodes = list(graph.nodes)
    distances = [Point(node).distance(point) for node in nodes]
    return nodes[distances.index(min(distances))]

# Calculate shortest distances for an array of precomputed nearest neighbor pairs
def calculate_shortest_distances_with_pairs(point_pairs, source_gdf, target_gdf, network_graph):
    results = []
    
    for pair in point_pairs:
        source_id, target_id = pair
        # Get the source and target geometries
        source_row = source_gdf[source_gdf["epointID"] == source_id].iloc[0]
        target_row = target_gdf[target_gdf["intID"] == target_id].iloc[0]
        
        source_geom = source_row.geometry
        target_geom = target_row.geometry
        
        # Find nearest network nodes for source and target
        source_node = find_nearest_node(network_graph, source_geom)
        target_node = find_nearest_node(network_graph, target_geom)
        
        # Compute the shortest path distance along the network
        try:
            path_length = nx.shortest_path_length(
                network_graph, source_node, target_node, weight="weight"
            )
        except nx.NetworkXNoPath:
            path_length = float("inf")  # No path found
        
        # Record the result
        results.append({
            "epointID": source_id,
            "intID": target_id,
            "network_distance": path_length
        })
    
    return results

point_pairs = joined[['epointID', 'intID']].to_numpy()
#point_pairs = point_pairs[:5] # Test with subset first
# Calculate distances for the point pairs
shortest_distances = calculate_shortest_distances_with_pairs(point_pairs, source_gdf, target_gdf, network_graph)

# Convert results to a DataFrame
results_df = pd.DataFrame(shortest_distances)

In [None]:
dist_df = pd.merge(results_df, intersection_points_gdf, on = 'intID', how = 'left')
dist_df['total_dist_roads'] = dist_df['network_distance'] + dist_df['distance_to_nearest']
ramps_w_dist = pd.merge(ramps_in_water_sj, dist_df, on = 'epointID', how = 'left')
lakes_no_ramp_w_dist = pd.merge(lakes_no_ramp_sj, dist_df, on = 'epointID', how = 'left')
final_dist_df = pd.concat([ramps_w_dist, lakes_no_ramp_w_dist])

In [None]:
# Identify shortest distance by waterID
min_dist_to_source = final_dist_df.loc[final_dist_df.groupby("waterID")["total_dist"].idxmin()]
# Join back to waterbody shapefile and convert to raster
water_w_dist = pd.merge(buffered_water, min_dist_to_source, on = 'waterID', how = 'left')

In [None]:
def export_road_dist(joined_gdf: gpd.GeoDataFrame, resolution: int = 1000):
    bounds = joined_gdf.total_bounds
    transform = rasterio.transform.from_origin(bounds[0], bounds[3], resolution, resolution)
    out_shape = (
        int(np.ceil((bounds[3] - bounds[1]) / resolution)),  
        int(np.ceil((bounds[2] - bounds[0]) / resolution))
    )
    
    column_name = "dist_roads"  # Change this to dynamically select the column if needed
    raster = rasterize(
        [(geom, value) for geom, value in zip(joined_gdf.geometry, joined_gdf[column_name])],
        out_shape=out_shape,
        transform=transform,
        fill=0,
        dtype=rasterio.float32
    )
    
    output_filename = f"{my_path}{my_training_state}_{column_name}_unifested.tif"
    
    with rasterio.open(
        output_filename, "w",
        driver="GTiff",
        height=out_shape[0],
        width=out_shape[1],
        count=1,
        dtype=rasterio.float32,
        crs=my_crs,
        transform=transform
    ) as dst:
        dst.write(raster, 1)
        dst.set_band_description(1, column_name)  # Set band name
    
    # Plot the raster
    plt.figure(figsize=(10, 6))
    plt.imshow(raster, cmap='viridis', extent=[bounds[0], bounds[2], bounds[1], bounds[3]])
    plt.colorbar(label=f'{column_name} Richness')
    plt.title('Rasterized GeoDataFrame')
    plt.xlabel('Longitude')
    plt.ylabel('Latitude')
    plt.show()

In [None]:
export_road_dist(water_w_dist)