This notebook is designed to generate the map statistics presented in the gulf sturgeon substrate manuscript.

In [1]:
#########
# Imports
import sys, os
import numpy as np
import geopandas as gpd
import pandas as pd
from glob import glob

pd.option_context('display.max_rows', None, 'display.max_columns', None)

<pandas._config.config.option_context at 0x2c1ce618fb0>

Get the paths to the shapefiles. Shapefiles with `proc` in their name have been post-processed in ArcGIS.

In [2]:
shpTopDir = r'D:\redbo_science\projects\GulfSturgeonProject_2025\ProcessedData\Substrate_Summaries\02_Substrate_Shps_Mosaic_Transects'
shpTopDir = os.path.normpath(shpTopDir)

# Get Raw Shps
rawShpFiles = glob(os.path.join(shpTopDir, 'Raw', '*proc.shp'))
print(rawShpFiles)

['D:\\redbo_science\\projects\\GulfSturgeonProject_2025\\ProcessedData\\Substrate_Summaries\\02_Substrate_Shps_Mosaic_Transects\\Raw\\BCH_substrate_shps_mosaic_postproc.shp', 'D:\\redbo_science\\projects\\GulfSturgeonProject_2025\\ProcessedData\\Substrate_Summaries\\02_Substrate_Shps_Mosaic_Transects\\Raw\\BOU_substrate_shps_mosaic_postproc.shp', 'D:\\redbo_science\\projects\\GulfSturgeonProject_2025\\ProcessedData\\Substrate_Summaries\\02_Substrate_Shps_Mosaic_Transects\\Raw\\CHI_substrate_shps_mosaic_postproc.shp', 'D:\\redbo_science\\projects\\GulfSturgeonProject_2025\\ProcessedData\\Substrate_Summaries\\02_Substrate_Shps_Mosaic_Transects\\Raw\\CHU_substrate_shps_mosaic_postproc.shp', 'D:\\redbo_science\\projects\\GulfSturgeonProject_2025\\ProcessedData\\Substrate_Summaries\\02_Substrate_Shps_Mosaic_Transects\\Raw\\LEA_substrate_shps_mosaic_postproc.shp', 'D:\\redbo_science\\projects\\GulfSturgeonProject_2025\\ProcessedData\\Substrate_Summaries\\02_Substrate_Shps_Mosaic_Transects\\R

# Prep Dataframes

In [3]:
def getBasin(river):
    prl = 'Pearl Basin'
    pas = 'Pascagoula Basin'

    if river == 'BCH':
        basin = prl
    elif river == 'PRL':
        basin = prl
    elif river == 'BOU':
        basin = pas
    elif river == 'LEA':
        basin = pas
    elif river == 'PAS':
        basin = pas
    elif river == 'CHI':
        basin = pas
    elif river == 'CHU':
        basin = pas
    else:
        basin = 'UNKNOWN'
    return basin

In [4]:
# Function to open dataframe, get river code from name, and populate attributes
def getShp(f):
    river = os.path.basename(f).split('_')[0]
    model = os.path.basename(os.path.dirname(f))
    basin = getBasin(river)
    
    df = gpd.read_file(f)
    df['river'] = river
    df['model'] = model
    df['basin'] = basin

    # Calculate area in hectars
    df['Area_m'] = df.geometry.area
    df['Area_ha'] = df['Area_m']*0.0001

    return df

In [5]:
# Get raw shapefiles to gdf
for i, f in enumerate(rawShpFiles):
    df = getShp(f)
    if i == 0:
        rawAll = df
    else:
        rawAll = pd.concat([rawAll, df], ignore_index=True)

print(rawAll)

       Substrate          Name      Area_m  ORIG_FID  Shape_Leng  Shape_Area  \
0            1.0  Fines Ripple   63.618457         1   37.686595   63.618457   
1            1.0  Fines Ripple  162.101403         2   88.098544  162.101403   
2            1.0  Fines Ripple   37.508845         5   38.489075   37.508845   
3            1.0  Fines Ripple   32.812989        10   54.857478   32.812989   
4            1.0  Fines Ripple   33.158594        34   32.868156   33.158594   
...          ...           ...         ...       ...         ...         ...   
67810        6.0         Other   35.234502     34395   65.827676   35.234502   
67811        6.0         Other   51.867065     34397   72.238404   51.867065   
67812        6.0         Other   28.242339     34398   44.936911   28.242339   
67813        6.0         Other   40.788610     34399   53.086000   40.788610   
67814        6.0         Other   36.838705     34400   41.240707   36.838705   

                                       

# Find the total mapped area for the study. 
Use the Raw shapefile

In [6]:
df = rawAll

In [7]:
# Total mapped area
totalArea = np.sum(df['Area_ha'].to_numpy())
print('Total Mapped Area:', np.around(totalArea, 0))

Total Mapped Area: 7391.0


In [8]:
# Pascagoula basin total area
pasTotalArea = np.sum(df.loc[df['basin'] == 'Pascagoula Basin', 'Area_ha'].to_numpy())
print('Pascagoula Mapped Area:', np.around(pasTotalArea, 0))

Pascagoula Mapped Area: 3223.0


In [9]:
# Pearl basin total area
prlTotalArea = np.sum(df.loc[df['basin'] == 'Pearl Basin', 'Area_ha'].to_numpy())
print('Pearl Mapped Area:', np.around(prlTotalArea, 0))

Pearl Mapped Area: 4169.0


In [10]:
# Find each rivers mapped area
riv = df['river'].unique()
rivAreaTotal = {}
for r in riv:
    t = np.sum(df.loc[df['river'] == r, 'Area_ha'].to_numpy())
    rivAreaTotal[r] = np.around(t, 0)

for k, v in rivAreaTotal.items():
    print(k, 'Mapped Area:', v)


BCH Mapped Area: 567.0
BOU Mapped Area: 115.0
CHI Mapped Area: 1415.0
CHU Mapped Area: 88.0
LEA Mapped Area: 1172.0
PAS Mapped Area: 433.0
PRL Mapped Area: 3602.0


# Get Polygon Counts and Proportions
Work on Raw shapefile first

In [11]:
df = rawAll

In [12]:
shpTotal = pd.DataFrame()
rivers = df['river'].unique()
substrate = df['Name'].unique()

for r in rivers:
    sub = {}
    for s in substrate:
        sub['river'] = [r]
        sub['substrate'] = [s]
        dfSlice = df.loc[(df['river'] == r) & (df['Name'] == s), ['Area_m']]

        sub['total_poly'] = [len(dfSlice)]
        sub['min_poly_m'] = [np.around(np.min(dfSlice), 1)]
        sub['max_poly_m'] = [np.around(np.max(dfSlice), 1)]
        sub['total_poly_m'] = [np.around(np.sum(dfSlice['Area_m']), 1)]
        
        sub = pd.DataFrame.from_dict(sub)
        shpTotal = pd.concat([shpTotal, sub], ignore_index=True)

    
print(shpTotal)       


   river       substrate  total_poly  min_poly_m  max_poly_m  total_poly_m
0    BCH    Fines Ripple         677        28.9    280033.4     3030138.5
1    BCH      Fines Flat        1680        28.4     48325.8     1869999.5
2    BCH  Cobble Boulder         384         4.8      2558.9       32059.3
3    BCH     Hard Bottom         741        28.3      4558.2      208632.2
4    BCH            Wood        3256         3.1      2829.1      443423.5
5    BCH           Other        1037        28.2      1075.3       80819.1
6    BOU    Fines Ripple          41        31.0    430745.5      568199.0
7    BOU      Fines Flat         467        29.2    123545.6      337532.0
8    BOU  Cobble Boulder         146         3.1       568.2        9414.3
9    BOU     Hard Bottom         218        28.3     16031.8      143268.1
10   BOU            Wood         665         3.5      1684.4       80852.6
11   BOU           Other         181        28.3       431.1       13652.4
12   CHI    Fines Ripple 

In [13]:
print('Raw')
print('Model Polygon Summary')
df = shpTotal

t = np.sum(df['total_poly'].to_numpy())
min = np.min(df['min_poly_m'].to_numpy())
max = np.max(df['max_poly_m'].to_numpy())

print('Total Polygons:', t, '\tMin Area:', min, '\tMax Area:', max, '\n\n')

shpProp = pd.DataFrame()
rivers = df['river'].unique()
for r in rivers:
    dfSlice = df.loc[df['river'] == r]

    t = np.sum(dfSlice['total_poly'].to_numpy())
    min = np.min(dfSlice['min_poly_m'].to_numpy())
    max = np.max(dfSlice['max_poly_m'].to_numpy())

    print(r, '\tTotal Polygons:', t, '\tMin Area:', min, '\tMax Area:', max)

    dfSlice = dfSlice.pivot(index='river', columns=['substrate'])['total_poly_m']
    shpProp = pd.concat([shpProp, dfSlice])

    print('\n\n')

substrates = shpProp.columns
shpProp['total_m'] = shpProp.sum(axis=1)

for s in substrates:
    shpProp[s] = np.around((shpProp[s]/shpProp['total_m'])*100, 1)
shpProp['Soft'] = shpProp['Fines Flat'] + shpProp['Fines Ripple']
shpProp['Hard'] = shpProp['Cobble Boulder'] + shpProp['Hard Bottom']
print(shpProp)

rawFR = shpProp['Fines Ripple']
rawFF = shpProp['Fines Flat']
rawCB = shpProp['Cobble Boulder']
rawHB = shpProp['Hard Bottom']
rawWD = shpProp['Wood']
rawSoft = shpProp['Soft']
rawHard = shpProp['Hard']


Raw
Model Polygon Summary
Total Polygons: 67815 	Min Area: 3.0 	Max Area: 3696010.5 


BCH 	Total Polygons: 7775 	Min Area: 3.1 	Max Area: 280033.4



BOU 	Total Polygons: 1718 	Min Area: 3.1 	Max Area: 430745.5



CHI 	Total Polygons: 14901 	Min Area: 3.1 	Max Area: 3696010.5



CHU 	Total Polygons: 1106 	Min Area: 4.0 	Max Area: 294424.1



LEA 	Total Polygons: 9671 	Min Area: 3.2 	Max Area: 1800748.0



PAS 	Total Polygons: 2204 	Min Area: 4.4 	Max Area: 1509827.5



PRL 	Total Polygons: 30440 	Min Area: 3.0 	Max Area: 3693297.5



substrate  Cobble Boulder  Fines Flat  Fines Ripple  Hard Bottom  Other  Wood  \
river                                                                           
BCH                   0.6        33.0          53.5          3.7    1.4   7.8   
BOU                   0.8        29.3          49.3         12.4    1.2   7.0   
CHI                   0.2         9.5          72.0         10.5    1.5   6.2   
CHU                   0.5        10.1          15.8   