In [1]:
import pandas as pd
import sys
import os

In [2]:
# inputs
path_general = '/home/shg096/scratch/West/gistool_outputs/'
case_name = 'West'
minimume_fraction = 0.05 # fraction of land cover under which the fraction is set to 0 and other fractions are normalized

### sanity check for soil type. It is possible that the soil type is set to 0 or unknown for lakes or water bodies. In this study we replace that with the majority of soil type in the domain (including possible NaN values).

In [3]:
path_soil_type = path_general + 'soil_classes/'+case_name+'_stats_soil_classes.csv'
path_landcover_type = path_general + 'landsat/'+case_name+'_stats_NA_NALCMS_landcover_2020_30m.csv'
path_elevation_mean = path_general + 'merit_hydro/'+case_name+'_stats_elv.csv'

soil_type = pd.read_csv(path_soil_type)
landcover_type = pd.read_csv(path_landcover_type)
elevation_mean = pd.read_csv(path_elevation_mean)

soil_type = soil_type.sort_values(by='COMID').reset_index(drop=True)
landcover_type = landcover_type.sort_values(by='COMID').reset_index(drop=True)
elevation_mean = elevation_mean.sort_values(by='COMID').reset_index(drop=True)


# check if all the COMID are similar in all the three files (from similar shapefile)

In [4]:
# check if COMIDs are the similar
# check the len
if len(soil_type) != len(landcover_type) or len(landcover_type) != len(elevation_mean):
    sys.exit('The provided length of soil and land cover is not identical')

# check if the COMIDs are similar
if sum(soil_type['COMID'].values - landcover_type['COMID'].values) != 0 or \
sum(landcover_type['COMID'].values - elevation_mean['COMID'].values) != 0:
    sys.exit('The COMID of the shapefile in soil and land cover is not identical')

# soil maps sanity check

### if there is NaN replace with majority soil types in the domain
### if there is 0, unidentified, replae with majority soil types in the domain

In [5]:
has_nan = soil_type['majority'].isna().any()

if has_nan:
    print("The 'majority' column has NaN values will be replace with majority.")

# Find the majority value of the "majority" column
majority_value = soil_type['majority'].mode().values[0]

# Replace 0 values with the majority value
soil_type['majority'] = soil_type['majority'].replace(0, majority_value)

# Replace NaN values with the majority value
soil_type['majority'].fillna(majority_value, inplace=True)

# save the modified file
# get the file name and it path separaeted:
path_soil_type_path_name = os.path.dirname(path_soil_type)
path_soil_type_file_name = os.path.basename(path_soil_type)
soil_type.to_csv(path_soil_type_path_name+'/modified_'+path_soil_type_file_name, index=False)
#soil_type.to_csv(path_soil_type_path_name+'/'+path_soil_type_file_name, index=False)

# land cover map

### land cover map rescaling for fraction larger than a given minimum fraction

In [6]:
# land cover sanity check and renormalization
# Possible normalization of land cover fraction above a certain threshold

for col in landcover_type.columns:
    if col.startswith('frac_'):
        landcover_type[col] = landcover_type[col].apply(lambda x: 0 if x < minimume_fraction else x)

# Second iteration: Normalize non-zero values based on row sums
for index, row in landcover_type.iterrows():
    frac_columns = [col for col in landcover_type.columns if col.startswith('frac_')]
    row_sum = row[frac_columns].sum()
    if row_sum > 0:
        for col in frac_columns:
            landcover_type.at[index, col] /= row_sum
            
# save the modified file
# get the file name and it path separaeted:
path_landcover_type_path_name = os.path.dirname(path_landcover_type)
path_landcover_type_file_name = os.path.basename(path_landcover_type)
landcover_type.to_csv(path_landcover_type_path_name+'/modified_'+path_landcover_type_file_name, index=False)
#landcover_type.to_csv(path_landcover_type_path_name+'/'+path_landcover_type_file_name, index=False)


# Mean value of elevation set to zero if NaN (wont affect mizuRoute routing).

### assumes the shapfile in open water or sea level.

In [7]:
has_nan = elevation_mean['mean'].isna().any()

if has_nan:
    print("The 'mean' column has NaN values will be replace by 0.")
    
elevation_mean['mean'].fillna(0, inplace=True)


# save the modified file
# get the file name and it path separaeted:
path_elevation_mean_path_name = os.path.dirname(path_elevation_mean)
path_elevation_mean_file_name = os.path.basename(path_elevation_mean)
elevation_mean.to_csv(path_elevation_mean_path_name+'/modified_'+path_elevation_mean_file_name, index=False)
#elevation_mean.to_csv(path_elevation_mean_path_name+'/'+path_elevation_mean_file_name, index=False)