## Note

This script uses the output from Mohamed's R script containing depressionDepth_m, depressionAreaFrac and deprCatchAreaFrac.

In [1]:
import pandas as pd
import geopandas as gpd
import networkx as nx
import numpy as np

### Read Data

In [2]:
geodata = pd.read_csv('../../model/add_HDS_GeoData.txt', header=0, index_col=0, sep='\t')

In [3]:
hds_params= pd.read_csv('../../geospacial/depressions/Milk_HDS_parameters.csv', header=0, index_col=0)

In [4]:
shapefile= gpd.read_file('../../geospacial/shapefiles/modified_shapefiles/Modified_SMMcat.shp')

In [5]:
# define ilake slcs for the Milk
milk_ilake= 'SLC_62'

### Remove HDS from St. Mary

In [6]:
geodata.index= geodata.index.astype(int)
hds_params.index= hds_params.index.astype(int)
shapefile['hru_nhm']= shapefile['hru_nhm'].astype(int)
shapefile['seg_nhm']= shapefile['seg_nhm'].astype(int)

In [7]:
# Create the dictionary from the 'hru_nhm' and 'seg_nhm' columns
hru_seg_dict = dict(zip(shapefile['hru_nhm'], shapefile['seg_nhm']))

In [8]:
# replace new params index with river seg
hds_params.index = hds_params.index.map(hru_seg_dict)

In [9]:
hds_params

Unnamed: 0_level_0,depressionDepth_m,depressionAreaFrac,deprCatchAreaFrac,Unnamed: 4
subid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
58183,0.000000,0.000000,0.000000,
58184,0.000000,0.000000,0.000000,
58185,0.000000,0.000000,0.000000,
58186,0.000000,0.000000,0.000000,
58188,0.000000,0.000000,0.000000,
...,...,...,...,...
58671,0.774399,0.264501,1.000000,
58672,1.559696,0.236104,0.980646,
58673,2.651541,0.287069,1.000000,
58674,0.527663,0.112770,1.000000,


In [10]:
# Dropping the last column using its name
hds_params = hds_params.drop(columns=['Unnamed: 4'])

In [11]:
    # Creating a DiGraph out of `df` object
riv_graph = nx.from_pandas_edgelist(geodata.reset_index(), source='subid', target='maindown', create_using=nx.DiGraph)

In [12]:
# Find St. Mary Segments
stmary = [58183]
stmary.extend(nx.ancestors(riv_graph, 58183))

In [13]:
len(stmary)

40

In [14]:
# Remove all rows containing info on St Mary so only the Milk is 
hdsdepths_filtered = hds_params[~hds_params.index.isin(stmary)]

In [15]:
hdsdepths_filtered

Unnamed: 0_level_0,depressionDepth_m,depressionAreaFrac,deprCatchAreaFrac
subid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
58242,0.335392,0.053234,0.637170
58243,0.271259,0.081200,0.825028
58244,0.362661,0.089438,0.660783
58245,0.090837,0.051981,0.193307
58246,0.140103,0.131132,0.654607
...,...,...,...
58671,0.774399,0.264501,1.000000
58672,1.559696,0.236104,0.980646
58673,2.651541,0.287069,1.000000
58674,0.527663,0.112770,1.000000


### Format GeoData

In [16]:
# Merge based on the index
merged_geodata = geodata.merge(hdsdepths_filtered[['depressionDepth_m', 'depressionAreaFrac','deprCatchAreaFrac']], left_index=True, right_index=True, how='left')

In [17]:
merged_geodata

Unnamed: 0_level_0,maindown,area,longitude,latitude,elev_mean,slope_mean,rivlen,SLC_1,SLC_2,SLC_3,...,SLC_111,SLC_112,SLC_113,SLC_114,SLC_115,SLC_116,SLC_117,depressionDepth_m,depressionAreaFrac,deprCatchAreaFrac
subid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
58675,-9999,2.730521e+08,-107.976305,48.849060,857.409912,0.00000,129339.991902,0.0,0.0,0.00013,...,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,1.179411,0.140129,0.813159
58674,-9999,2.727878e+08,-110.181345,48.645243,848.139893,0.00000,105620.001701,0.0,0.0,0.00000,...,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.527663,0.112770,1.000000
58673,-9999,2.876408e+08,-109.130976,49.315396,986.273254,0.00000,188640.001102,0.0,0.0,0.00000,...,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,2.651541,0.287069,1.000000
58672,-9999,9.586554e+08,-108.892603,49.169295,945.485352,0.00000,295200.002203,0.0,0.0,0.00000,...,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,1.559696,0.236104,0.980646
58671,-9999,3.860121e+08,-108.661660,49.257402,919.654114,0.00000,202460.006602,0.0,0.0,0.00000,...,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.774399,0.264501,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58185,58184,5.667200e+06,-112.839964,49.570476,907.278564,0.00163,1671.934918,0.0,0.0,0.00000,...,0.000000,0.029762,0.000000,0.0,0.0,0.0,0.0,,,
58231,58228,1.663663e+08,-113.103429,49.481403,1041.111694,0.00285,37256.337498,0.0,0.0,0.00000,...,0.002104,0.003577,0.000000,0.0,0.0,0.0,0.0,,,
58184,58183,1.750700e+07,-112.859951,49.577578,922.685303,0.00001,4020.941947,0.0,0.0,0.00000,...,0.000000,0.050505,0.000000,0.0,0.0,0.0,0.0,,,
58228,58183,2.611930e+07,-112.932045,49.556180,944.902893,0.00483,11789.842472,0.0,0.0,0.00000,...,0.000000,0.022727,0.002674,0.0,0.0,0.0,0.0,,,


In [18]:
# Replace NaN values with 0 in the new columns
merged_geodata['depressionDepth_m'].fillna(0, inplace=True)
merged_geodata['depressionAreaFrac'].fillna(0, inplace=True)
merged_geodata['deprCatchAreaFrac'].fillna(0, inplace=True)

In [19]:
# Rename '_mean' to 'hds_depth' and deprCatchAreaFrac to icatch
merged_geodata.rename(columns={'depressionDepth_m': 'hds_depth', 'deprCatchAreaFrac': 'icatch'}, inplace=True)

In [20]:
# find index of first and last SLC
filtered_columns = merged_geodata.filter(like='SLC').columns

if len(filtered_columns) > 0:
    first_slc_index = merged_geodata.columns.get_loc(filtered_columns[0])
    last_slc_index = merged_geodata.columns.get_loc(filtered_columns[-1])
    print("First column index with 'SLC':", first_slc_index)
    print("Last column index with 'SLC':", last_slc_index)
else:
    print("No columns with 'SLC' in the name found.")

First column index with 'SLC': 7
Last column index with 'SLC': 123


In [21]:
merged_geodata[milk_ilake] = merged_geodata['depressionAreaFrac']

### Adjust Milk SLC fractions based on weights

In [22]:
# Iterate through each row
for index, row in merged_geodata.iterrows():
    
        # Calculate the sum of slc columns for the current row
        sum_of_columns_row = row.iloc[first_slc_index:last_slc_index+1].sum()

        # Subtract the value in the milk_ilake column for the current row
        result_row = sum_of_columns_row - row[milk_ilake]

        # Calculate the scaling factor based on the formula
        scaling_factor = 1 - row[milk_ilake]

        # Iterate through SLC_1 to SLC_n columns and update values
        for col in merged_geodata.columns:
            if col.startswith('SLC_') and col != milk_ilake:
                merged_geodata.loc[index, col] = (row[col] / result_row) * scaling_factor


### Check that SLCs still sum to 1

In [23]:
# Check sums of rows
for index, row in merged_geodata.iterrows():
    # Calculate the sum of values in the specified columns
    row_sum = row.iloc[first_slc_index:(last_slc_index + 1)].sum()

    # Check if the sum is approximately equal to 1
    if not np.isclose(row_sum, 1, rtol=1e-6):
        print(f"Warning: Row {index} does not sum to 1 (Sum: {row_sum})")

In [24]:
# Insert 'hds_depth' column just before the first 'SLC' column
columns = list(merged_geodata.columns)
columns.insert(first_slc_index, columns.pop(columns.index('hds_depth')))

# Find the new index of 'hds_depth'
hds_depth_index = columns.index('hds_depth')
# Insert 'icatch' right before 'hds_depth'
columns.insert(hds_depth_index, columns.pop(columns.index('icatch')))
# Rearrange the DataFrame with the new column order
merged_geodata = merged_geodata[columns].copy()

In [25]:
# Delete the 'hds_frac' column
merged_geodata.drop(columns=['depressionAreaFrac'], inplace=True)

In [26]:
# Save the GeoDataFrame as a tab-separated text file
merged_geodata.to_csv('../../model/GeoData.txt', sep='\t', index=True)