## Note

This script uses the output from Mohamed's R script containing depressionDepth_m, depressionAreaFrac and deprCatchAreaFrac.

In [1]:
import pandas as pd
import geopandas as gpd
import networkx as nx
import numpy as np

### Read Data

In [2]:
geodata = pd.read_csv('../../model/add_HDS_GeoData.txt', header=0, index_col=0, sep='\t')

In [3]:
hds_params= pd.read_csv('../../geospacial/depressions/Milk_HDS_parameters.csv', header=0, index_col=0)

In [4]:
shapefile= gpd.read_file('../../geospacial/shapefiles/modified_shapefiles/Modified_SMMcat.shp')

In [5]:
# define ilake slcs for the Milk
milk_ilake= 'SLC_62'

### Remove HDS from St. Mary

In [6]:
geodata.index= geodata.index.astype(int)
hds_params.index= hds_params.index.astype(int)
shapefile['hru_nhm']= shapefile['hru_nhm'].astype(int)
shapefile['seg_nhm']= shapefile['seg_nhm'].astype(int)

In [7]:
# Create the dictionary from the 'hru_nhm' and 'seg_nhm' columns
hru_seg_dict = dict(zip(shapefile['hru_nhm'], shapefile['seg_nhm']))

In [8]:
# replace new params index with river seg
hds_params.index = hds_params.index.map(hru_seg_dict)

In [9]:
hds_params

Unnamed: 0_level_0,depressionDepth_m,depressionAreaFrac,deprCatchAreaFrac,Unnamed: 4
subid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
58183,0.000000,0.000000,0.000000,
58184,0.000000,0.000000,0.000000,
58185,0.000000,0.000000,0.000000,
58186,0.000000,0.000000,0.000000,
58188,0.000000,0.000000,0.000000,
...,...,...,...,...
58671,0.774399,0.264501,1.000000,
58672,1.559696,0.236104,0.980646,
58673,2.651541,0.287069,1.000000,
58674,0.527663,0.112770,1.000000,


In [None]:
# Dropping the last column using its name
hds_params = new_params.drop(columns=['Unnamed: 4'])

In [10]:
    # Creating a DiGraph out of `df` object
riv_graph = nx.from_pandas_edgelist(geodata.reset_index(), source='subid', target='maindown', create_using=nx.DiGraph)

In [6]:
# Find St. Mary Segments
stmary = [58183]
stmary.extend(nx.ancestors(riv_graph, 58183))

In [7]:
len(stmary)

40

In [8]:
# Remove all rows containing info on St Mary so only the Milk is 
hdsdepths_filtered = hds_depths[~hds_depths['seg_nhm'].isin(stmary)]

### Format GeoData

In [9]:
# Set 'seg_nhm' as the index in hdsdepths_filtered
hdsdepths_filtered = hdsdepths_filtered.set_index('seg_nhm')


In [10]:
# Merge based on the index
merged_geodata = geodata.merge(hdsdepths_filtered[['_count', '_mean']], left_index=True, right_index=True, how='left')

In [11]:
# Replace NaN values with 0 in the new columns
merged_geodata['_count'].fillna(0, inplace=True)
merged_geodata['_mean'].fillna(0, inplace=True)

In [12]:
# Rename '_mean' to 'hds_depth'
merged_geodata.rename(columns={'_mean': 'hds_depth'}, inplace=True)

In [13]:
# find index of first and last SLC
filtered_columns = merged_geodata.filter(like='SLC').columns

if len(filtered_columns) > 0:
    first_slc_index = merged_geodata.columns.get_loc(filtered_columns[0])
    last_slc_index = merged_geodata.columns.get_loc(filtered_columns[-1])
    print("First column index with 'SLC':", first_slc_index)
    print("Last column index with 'SLC':", last_slc_index)
else:
    print("No columns with 'SLC' in the name found.")

First column index with 'SLC': 7
Last column index with 'SLC': 123


In [14]:
# Convert count to area
merged_geodata['_count'] = merged_geodata['_count'] * (19.88 * 31.21) # multiply by DEM resolution from properties

In [15]:
# Add a new column 'hds_frac' which is equal to count/area
merged_geodata['hds_frac'] = merged_geodata['_count'] / merged_geodata['area']

In [16]:
merged_geodata[milk_ilake] = merged_geodata['hds_frac']

### Adjust Milk SLC fractions based on weights

In [17]:
# Iterate through each row
for index, row in merged_geodata.iterrows():
    
        # Calculate the sum of slc columns for the current row
        sum_of_columns_row = row.iloc[first_slc_index:last_slc_index+1].sum()

        # Subtract the value in the milk_ilake column for the current row
        result_row = sum_of_columns_row - row[milk_ilake]

        # Calculate the scaling factor based on the formula
        scaling_factor = 1 - row[milk_ilake]

        # Iterate through SLC_1 to SLC_n columns and update values
        for col in merged_geodata.columns:
            if col.startswith('SLC_') and col != milk_ilake:
                merged_geodata.loc[index, col] = (row[col] / result_row) * scaling_factor


### Check that SLCs still sum to 1

In [18]:
# Check sums of rows
for index, row in merged_geodata.iterrows():
    # Calculate the sum of values in the specified columns
    row_sum = row.iloc[first_slc_index:(last_slc_index + 1)].sum()

    # Check if the sum is approximately equal to 1
    if not np.isclose(row_sum, 1, rtol=1e-6):
        print(f"Warning: Row {index} does not sum to 1 (Sum: {row_sum})")

In [19]:
    # Insert 'hds_depth' column just before the first 'SLC' column
    columns = list(merged_geodata.columns)
    columns.insert(first_slc_index, columns.pop(columns.index('hds_depth')))
    merged_geodata = merged_geodata[columns]

In [20]:
# Delete the 'hds_frac' column
merged_geodata.drop(columns=['hds_frac'], inplace=True)
# Delete the '_count' column
merged_geodata.drop(columns=['_count'], inplace=True)

In [21]:
# Save the GeoDataFrame as a tab-separated text file
merged_geodata.to_csv('../../model/depression_analysis/dimension_GeoData.txt', sep='\t', index=True)