Input data

In [None]:
# Define input and output folders
input_folder = r"D:\River connectivity\Whole Danube Reach in Hungary\Historical Danube-Reprojected\clipped river centerline-segmnets-hymo segmnets-historical"
output_filtered_folder = r"D:\River connectivity\Whole Danube Reach in Hungary\Historical Danube-Reprojected\clipped river centerline-segmnets-hymo segmnets-historical\filtered for graph metrcis calculations- hymo-historical"

Processing code

In [None]:
import os
import geopandas as gpd
import networkx as nx
from shapely.geometry import LineString, MultiLineString
from shapely.ops import linemerge, unary_union

os.makedirs(output_filtered_folder, exist_ok=True)

def process_and_filter_shapefile(file_path, output_folder):
    # Read the shapefile
    gdf = gpd.read_file(file_path)
    
    # Merge line segments
    merged_lines = linemerge(unary_union(gdf.geometry))
    
    # Ensure consistency for single and multi-line geometries
    if isinstance(merged_lines, LineString):
        merged_lines = [merged_lines]
    elif isinstance(merged_lines, MultiLineString):
        merged_lines = list(merged_lines.geoms)
    
    # Create a new GeoDataFrame with each polyline as an individual feature
    sectioned_gdf = gpd.GeoDataFrame(geometry=merged_lines, crs=gdf.crs)
    
    # Create a graph from the line geometries
    G = nx.Graph()
    for idx, row in sectioned_gdf.iterrows():
        geom = row.geometry
        if geom.geom_type == 'LineString':
            G.add_edge(geom.coords[0], geom.coords[-1], index=idx)
        elif geom.geom_type == 'MultiLineString':
            for line in geom.geoms:
                G.add_edge(line.coords[0], line.coords[-1], index=idx)
    
    # Get the largest connected component
    largest_cc = max(nx.connected_components(G), key=len)
    G_mainstem = G.subgraph(largest_cc).copy()
    
    # Filter edges that are in the largest connected component
    mainstem_indexes = [data['index'] for u, v, data in G_mainstem.edges(data=True)]
    mainstem_gdf = sectioned_gdf.loc[mainstem_indexes]
    
    # Define the output path and save the filtered shapefile
    output_path = os.path.join(output_folder, f"filtered_{os.path.basename(file_path)}")
    mainstem_gdf.to_file(output_path)
    print(f"Exported filtered shapefile to {output_path}")

# Process each shapefile in the folder
for filename in os.listdir(input_folder):
    if filename.endswith(".shp"):
        shapefile_path = os.path.join(input_folder, filename)
        process_and_filter_shapefile(shapefile_path, output_filtered_folder)

print("Completed processing and exporting filtered shapefiles.")