# Spatial Distributions

In [None]:
import geopandas as gpd
import rasterio
from rasterio.warp import calculate_default_transform, reproject, Resampling
from pathlib import Path
import rasterio
from rasterstats import zonal_stats
import pandas as pd

## Land Cover

In [None]:
tributaries = gpd.read_file(Path("..") / "Data" / "TributaryBoundaries" / "tributaries.shp")
tributaries = tributaries.to_crs(epsg=3435)
tributaries["ID"] =["LCRW", "IBP", "CalUnion", "CherryCreek"]

src_raster =  Path("..") / "Data" / "LandCover" / "PercImpervLilCal.tif"
dst_raster =  Path("..") / "Data" / "LandCover" / "PercImpervLilCal_reprojected.tif"

# Target CRS: EPSG 3435 (meters)
target_crs = "EPSG:3435"

with rasterio.open(src_raster) as src:
    transform, width, height = calculate_default_transform(
        src.crs, target_crs, src.width, src.height, *src.bounds)
    
    kwargs = src.meta.copy()
    kwargs.update({
        'crs': target_crs,
        'transform': transform,
        'width': width,
        'height': height
    })

    with rasterio.open(dst_raster, 'w', **kwargs) as dst:
        for i in range(1, src.count + 1):
            reproject(
                source=rasterio.band(src, i),
                destination=rasterio.band(dst, i),
                src_transform=src.transform,
                src_crs=src.crs,
                dst_transform=transform,
                dst_crs=target_crs,
                resampling=Resampling.nearest
            )

# Reproject polygons to match raster CRS
with rasterio.open(dst_raster) as src:
    tributaries = tributaries.to_crs(src.crs)

# Calculate mean impervious surface per polygon
stats = zonal_stats(tributaries, dst_raster, stats="mean", geojson_out=True)

# Add results to GeoDataFrame
tributaries["Impervious_mean"] = [feature["properties"]["mean"] for feature in stats]

# Example: including an ID or name field if it exists
print(tributaries[["ID", "Impervious_mean"]])

## Surficial Geology

In [None]:
soil_cover = gpd.read_file(Path("..") / "Data" / "Soils" / "USDA_soils.shp")
soil_cover = soil_cover.to_crs(epsg=3435)

In [None]:
soil_names = pd.read_excel(Path("..") / "Data" / "Soils" / "soil_names.xlsx", sheet_name='Sheet2')
soil_names.set_index('FID', inplace=True)
soil_names

# Drop duplicates in soil_names to avoid duplicated joins (only if MUName is consistent per MUSYM)
soil_names_clean = soil_names[['MUSYM', 'MUName']].drop_duplicates(subset='MUSYM')

# Merge with soil_cover
soil_cover = soil_cover.merge(soil_names_clean, on='MUSYM', how='left')
soil_cover
import geopandas as gpd
gdf = soil_cover.copy()

# Project to a metric CRS if needed
gdf = gdf.to_crs(epsg=32616)  # example: UTM zone 16N for Chicago region

# Calculate area in square meters or convert to km²
gdf['area_km2'] = gdf.geometry.area / 1e6

# View results
# print(gdf[['MUName', 'area_km2']])

# Sum the total area
total_area = gdf['area_km2'].sum()
# print(f"Total Area: {total_area:.2f} km²")

# gdf


In [None]:
df = gdf.copy()

# Group by GUAbbrev and sum the area
area_by_group = df.groupby('MUName', dropna=True)['area_km2'].sum().reset_index()

# Calculate total area across all rows
total_area = df['area_km2'].sum()

# Add a percentage column
area_by_group['percent_of_total'] = (area_by_group['area_km2'] / total_area) * 100

# Sort by descending percentage (optional)
area_by_group = area_by_group.sort_values('percent_of_total', ascending=False)

# Display result
# print(area_by_group)

In [None]:
# Make sure both GeoDataFrames use the same CRS
gdf = gdf.to_crs(tributaries.crs)

# Spatial intersection
intersections = gpd.overlay(gdf, tributaries, how='intersection')

# Calculate area of intersection in km²
intersections['intersect_area_km2'] = intersections.geometry.area / 1e6

# Sum total area per polygon
total_area_per_poly = intersections.groupby('ID')['intersect_area_km2'].sum().rename('total_area')

# Merge total area back and compute percentages
intersections = intersections.merge(total_area_per_poly, on='ID')
intersections['percentage'] = 100 * intersections['intersect_area_km2'] / intersections['total_area']

# Pivot the result: GUAbbrev as rows, ID as columns
summary = intersections.pivot_table(
    index='MUName',
    columns='ID',
    values='percentage',
    aggfunc='sum',
    fill_value=0
)

# Optional: Round and sort
soil_type_distribution = summary.round(2).sort_index()

# #check 100%
# column_sums = pivot.sum(axis=0)
# print(column_sums)

# View the table
soil_type_distribution


## Surficial Geology

In [None]:
# Load in data file
geology_cover =  gpd.read_file(Path("..") / "Data" / "SurficialGeology" / "ISGS_WestLCRW.shp")
# If they don't match, reproject one of the datasets
geology_cover = geology_cover.to_crs(epsg=3435)

In [None]:
gdf = geology_cover.copy()

# Project to a metric CRS if needed
surficial_gdf = gdf.to_crs(epsg=32616)  # example: UTM zone 16N for Chicago region

# Calculate area in square meters or convert to km²
surficial_gdf['area_km2'] = surficial_gdf.geometry.area / 1e6

surficial_df = surficial_gdf.copy()

# Group by GUAbbrev and sum the area
area_by_group = surficial_df.groupby('GUAbbrev', dropna=True)['area_km2'].sum().reset_index()

# Calculate total area across all rows
total_area = surficial_df['area_km2'].sum()

# Add a percentage column
area_by_group['percent_of_total'] = (area_by_group['area_km2'] / total_area) * 100

# Sort by descending percentage (optional)
area_by_group = area_by_group.sort_values('percent_of_total', ascending=False)

In [None]:
# Make sure both GeoDataFrames use the same CRS
surficial_gdf = surficial_gdf.to_crs(tributaries.crs)

# Spatial intersection
intersections = gpd.overlay(surficial_gdf, tributaries, how='intersection')

# Calculate area of intersection in km²
intersections['intersect_area_km2'] = intersections.geometry.area / 1e6

# Sum total area per polygon
total_area_per_poly = intersections.groupby('ID')['intersect_area_km2'].sum().rename('total_area')

# Merge total area back and compute percentages
intersections = intersections.merge(total_area_per_poly, on='ID')
intersections['percentage'] = 100 * intersections['intersect_area_km2'] / intersections['total_area']

# Pivot the result: GUAbbrev as rows, ID as columns
pivot = intersections.pivot_table(
    index='GUAbbrev',
    columns='ID',
    values='percentage',
    aggfunc='sum',
    fill_value=0
)

# Optional: Round and sort
geological_distribution = pivot.round(2).sort_index()

# #check 100%
# column_sums = pivot.sum(axis=0)
# print(column_sums)

# View the table
geological_distribution

In [None]:
soil_type_distribution = soil_type_distribution.to_pickle('soil_type_distribution.pkl')
geological_distribution.to_pickle('geological_distribution.pkl')
