# Creation of GeoClass and GeoData info files

In [1]:
import pandas      as      pd
import numpy       as      np
import geopandas   as      gpd
import sys
from   itertools   import  product
import os
import datetime

In [2]:
# inputs
path_org = '/scratch/mia725/calibration_workflow/Bow_Banff/'

In [4]:
# load the information from the gistool for soil and land cover and find the number of geoclass
soil_type = pd.read_csv(path_org+'gistool-outputs/modified_domain_stats_soil_classes.csv')
landcover_type = pd.read_csv(path_org+'gistool-outputs/modified_domain_stats_NA_NALCMS_landcover_2020_30m.csv')
elevation_mean = pd.read_csv(path_org+'gistool-outputs/modified_domain_stats_elv.csv')

soil_type = soil_type.sort_values(by='COMID').reset_index(drop=True)
landcover_type = landcover_type.sort_values(by='COMID').reset_index(drop=True)
elevation_mean = elevation_mean.sort_values(by='COMID').reset_index(drop=True)

In [5]:
# 
if not os.path.isdir(path_org+'/HYPE'):
    os.makedirs(path_org+'/HYPE')

In [6]:
# find the combination of the majority soil and land cover
combinations_set_all = set()
for index, row in landcover_type.iterrows():
    # get the fraction for land cover for each row
    fractions = [col for col in landcover_type.columns if col.startswith('frac') and row[col] > 0.00]
    # remove frac_ from the list
    fractions = [col.split('_')[1] for col in fractions]
    fractions = [int(name) for name in fractions]

    # get the majority soil type for each row
    majority_soil = [soil_type['majority'].iloc[index].item()]

    # Combine as combination of soil and land cover and keep as a set
    combinations = list(product(fractions, majority_soil))
    combinations_set = set(combinations)
    combinations_set_all.update(combinations_set)

print(combinations_set_all)
print(len(combinations_set_all))

data_list = [{'landcover': item[0], 'soil': item[1]} for item in combinations_set_all]

# Create a pandas DataFrame from the list of dictionaries
combination = pd.DataFrame(data_list)

combination ['SLC'] = 0
combination ['SLC'] = np.arange(len(combination))+1

combination

{(4, 3.0), (16, 3.0), (17, 3.0), (7, 3.0), (18, 3.0), (8, 3.0), (19, 3.0), (10, 3.0), (1, 3.0), (9, 3.0), (11, 3.0), (12, 3.0), (2, 3.0), (13, 3.0), (3, 3.0), (14, 3.0), (15, 3.0), (5, 3.0), (6, 3.0)}
19


Unnamed: 0,landcover,soil,SLC
0,4,3.0,1
1,16,3.0,2
2,17,3.0,3
3,7,3.0,4
4,18,3.0,5
5,8,3.0,6
6,19,3.0,7
7,10,3.0,8
8,1,3.0,9
9,9,3.0,10


In [7]:
landcover_type_prepared = landcover_type.copy()

for i in range(1, len(combination)+1):
    column_name = f'SLC_{i}'
    landcover_type_prepared[column_name] = 0.00

landcover_type_prepared['soil'] = soil_type['majority']

def get_non_zero_columns(row):
    return [col for col in row.index if col.startswith('frac_') and row[col] != 0]

# Apply the function to each row
landcover_type_prepared['non_zero_columns'] = landcover_type_prepared.apply(get_non_zero_columns, axis=1)


for index, row in landcover_type_prepared.iterrows():
    # get the soil type
    soil_type_value = soil_type['majority'].iloc[index]

    for i in row['non_zero_columns']:

        # remove frac from column name 
        land_cover_value = i.replace("frac_", "")
        
        # get the SLC value
        result = combination[(combination['landcover'] == int(land_cover_value)) & (combination['soil'] == int(soil_type_value))]['SLC']
        column_name = 'SLC_'+str(result.values[0])
        landcover_type_prepared.loc[index, column_name] = landcover_type_prepared[i].iloc[index]
        
        

landcover_type_prepared

Unnamed: 0,COMID,lat,lon,frac_1,frac_2,frac_3,frac_4,frac_5,frac_6,frac_7,...,SLC_12,SLC_13,SLC_14,SLC_15,SLC_16,SLC_17,SLC_18,SLC_19,soil,non_zero_columns
0,71028585,51.167592,-115.573618,0.557947,1e-05,1e-05,1e-05,1e-05,1e-05,1e-05,...,1e-05,1e-05,1e-05,1e-05,1e-05,1e-05,1e-05,1e-05,3.0,"[frac_1, frac_2, frac_3, frac_4, frac_5, frac_..."
1,71028597,51.177004,-115.613003,0.451326,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,"[frac_1, frac_8, frac_10, frac_16, frac_18]"
2,71028609,51.172666,-115.649826,0.650584,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,"[frac_1, frac_10, frac_16]"
3,71028676,51.194885,-115.754929,0.584845,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,"[frac_1, frac_10, frac_16]"
4,71028700,51.231534,-115.839235,0.911125,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,"[frac_1, frac_16]"
5,71028740,51.267797,-115.895695,0.81209,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,"[frac_1, frac_16]"
6,71028852,51.308712,-116.018158,0.81303,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,"[frac_1, frac_16]"
7,71028904,51.351015,-116.101124,0.901948,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,"[frac_1, frac_16]"
8,71028957,51.391795,-116.124331,0.932756,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,"[frac_1, frac_16]"
9,71028974,51.437953,-116.152698,0.816139,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,"[frac_1, frac_10, frac_16]"


In [8]:
riv = gpd.read_file(path_org+'MERIT_geofabric/extracted_rivers.shp')
riv.sort_values(by='COMID').reset_index(drop=True)
riv['lengthm'] = 0.00
riv['lengthm'] = riv['lengthkm'] * 1000

cat = gpd.read_file(path_org+'MERIT_geofabric/extracted_subbasins.shp')
cat.sort_values(by='COMID').reset_index(drop=True)
cat['area'] = 0.00
cat['area'] = cat['unitarea'] * 1000000 # km2 to m2
cat['latitude'] = cat.centroid.y
cat['longitude'] = cat.centroid.x


  cat['latitude'] = cat.centroid.y

  cat['longitude'] = cat.centroid.x


In [9]:
# add information to the geodata dataframe
landcover_type_prepared['NextDownID'] = riv['NextDownID']
landcover_type_prepared['area'] = cat['area']
landcover_type_prepared['latitude'] = cat['latitude']
landcover_type_prepared['longitude'] = cat['longitude']
landcover_type_prepared['elev_mean'] = elevation_mean['mean']
landcover_type_prepared['slope_mean'] = riv['slope']
landcover_type_prepared['rivlen'] = riv['lengthm']
landcover_type_prepared['uparea'] = riv['uparea']

In [10]:
column_name_mapping = {
    'COMID': 'subid',
    'NextDownID': 'maindown',
    'area': 'area',
    'latitude': 'latitude',
    'longitude': 'longitude',
    'elev_mean': 'elev_mean',
    'slope_mean': 'slope_mean',
    'rivlen': 'rivlen'
}

# Rename the columns based on the dictionary
landcover_type_prepared = landcover_type_prepared.rename(columns=column_name_mapping)
landcover_type_prepared

#
slc_columns = [col for col in landcover_type_prepared.columns if col.startswith('SLC_')]

# Sort the columns as per your requirements
column_order = ['subid', 'maindown', 'area', 'latitude', 'longitude', 'elev_mean', 'slope_mean', 'rivlen'] + slc_columns + ['uparea']

landcover_type_prepared = landcover_type_prepared[column_order]
landcover_type_prepared

Unnamed: 0,subid,maindown,area,latitude,longitude,elev_mean,slope_mean,rivlen,SLC_1,SLC_2,...,SLC_11,SLC_12,SLC_13,SLC_14,SLC_15,SLC_16,SLC_17,SLC_18,SLC_19,uparea
0,71028585,-9999,6241355.0,51.167592,-115.573618,1540.534302,0.006318,3080.424631,1e-05,1e-05,...,1e-05,1e-05,1e-05,1e-05,1e-05,1e-05,1e-05,1e-05,1e-05,2216.04038
1,71028597,71028585,11788640.0,51.177004,-115.613003,1606.422241,0.020344,4374.718469,0.0,0.222137,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2057.875154
2,71028609,71028597,18727430.0,51.172666,-115.649826,1851.788086,0.00011,2717.985655,0.0,0.276626,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2010.527636
3,71028676,71028609,120125600.0,51.194885,-115.754929,1887.362671,0.001438,19294.403255,0.0,0.28566,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1751.831428
4,71028700,71028676,12535720.0,51.231534,-115.839235,1655.126587,0.001781,4202.046576,0.0,0.088875,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1481.884755
5,71028740,71028700,62586610.0,51.267797,-115.895695,1918.708862,0.00197,7445.781995,0.0,0.18791,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1346.247821
6,71028852,71028740,113253700.0,51.308712,-116.018158,1858.003662,0.003578,15651.534969,0.0,0.18697,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1206.154627
7,71028904,71028852,28688600.0,51.351015,-116.101124,1826.938477,0.002678,4548.938408,0.0,0.098052,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,967.784251
8,71028957,71028904,29018260.0,51.391795,-116.124331,1798.771729,0.002882,4954.0764,0.0,0.067244,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,881.114677
9,71028974,71028957,13984260.0,51.437953,-116.152698,1885.56189,0.004735,1580.261818,0.0,0.072934,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,821.671852


In [11]:
landcover_type_prepared = landcover_type_prepared.sort_values(by='uparea').reset_index(drop=True)
landcover_type_prepared = landcover_type_prepared.drop(columns=['uparea'])
landcover_type_prepared.to_csv(path_org+'HYPE/GeoData.txt', sep='\t', index=False)
landcover_type_prepared

Unnamed: 0,subid,maindown,area,latitude,longitude,elev_mean,slope_mean,rivlen,SLC_1,SLC_2,...,SLC_10,SLC_11,SLC_12,SLC_13,SLC_14,SLC_15,SLC_16,SLC_17,SLC_18,SLC_19
0,71032292,71030270,25591310.0,51.625792,-116.088587,2506.075439,0.013547,493.825993,0.0,0.742346,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,71030896,71030100,26857360.0,51.147736,-115.853184,2289.016113,0.082352,1506.814916,0.0,0.274551,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,71031687,71030287,27933850.0,51.397574,-115.99937,2453.431641,0.024555,2073.364759,0.0,0.766328,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,71031654,71028957,30424570.0,51.452938,-116.113207,2210.857666,0.036514,2762.866768,0.0,0.313606,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,71031643,71029016,30687640.0,51.396484,-116.25443,2289.583252,0.056913,3493.652183,0.0,0.540049,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,71030527,71030071,31641690.0,51.03586,-115.667376,2251.640381,0.046684,3056.158382,0.0,0.475307,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,71031094,71029857,31697740.0,51.255897,-115.614486,2223.414062,0.058983,2188.585843,0.0,0.43614,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,71030784,71030302,35486860.0,51.124546,-115.901305,2190.634033,0.038086,4505.681011,0.0,0.17905,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,71030774,71028597,35558880.0,51.122304,-115.599617,1954.207153,0.038891,4778.277046,0.0,0.211891,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,71030563,71029963,36321530.0,51.059644,-115.72337,2209.296875,0.039513,5278.338646,0.0,0.353228,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
combination

combination = combination.rename(columns={'landcover': 'LULC'})
combination = combination.rename(columns={'soil': 'SOIL TYPE'})
combination = combination[['SLC','LULC','SOIL TYPE']]
combination['Main crop cropid'] = 0
combination['Second crop cropid'] = 0
combination['Crop rotation group'] = 0
combination['Vegetation type'] = 1
combination['Special class code'] = 0
combination['Tile depth'] = 0
combination['Stream depth'] = 2.296
combination['Number of soil layers'] = 3
combination['Soil layer depth 1'] = 0.091
combination['Soil layer depth 2'] = 0.493
combination['Soil layer depth 3'] = 2.296

combination

Unnamed: 0,SLC,LULC,SOIL TYPE,Main crop cropid,Second crop cropid,Crop rotation group,Vegetation type,Special class code,Tile depth,Stream depth,Number of soil layers,Soil layer depth 1,Soil layer depth 2,Soil layer depth 3
0,1,4,3.0,0,0,0,1,0,0,2.296,3,0.091,0.493,2.296
1,2,16,3.0,0,0,0,1,0,0,2.296,3,0.091,0.493,2.296
2,3,17,3.0,0,0,0,1,0,0,2.296,3,0.091,0.493,2.296
3,4,7,3.0,0,0,0,1,0,0,2.296,3,0.091,0.493,2.296
4,5,18,3.0,0,0,0,1,0,0,2.296,3,0.091,0.493,2.296
5,6,8,3.0,0,0,0,1,0,0,2.296,3,0.091,0.493,2.296
6,7,19,3.0,0,0,0,1,0,0,2.296,3,0.091,0.493,2.296
7,8,10,3.0,0,0,0,1,0,0,2.296,3,0.091,0.493,2.296
8,9,1,3.0,0,0,0,1,0,0,2.296,3,0.091,0.493,2.296
9,10,9,3.0,0,0,0,1,0,0,2.296,3,0.091,0.493,2.296


In [13]:
# Add commented lines
commented_lines = [
"""! MODIS landcover													
! Add legend (raster value) and discription													
!	original legend (raster_value)	description											
!   1: 'Temperate or sub-polar needleleaf forest',
!   2: 'Sub-polar taiga needleleaf forest',
!   3: 'Tropical or sub-tropical broadleaf evergreen forest',
!   4: 'Tropical or sub-tropical broadleaf deciduous forest',
!   5: 'Temperate or sub-polar broadleaf deciduous forest',
!   6: 'Mixed forest',
!   7: 'Tropical or sub-tropical shrubland',
!   8: 'Temperate or sub-polar shrubland',
!   9: 'Tropical or sub-tropical grassland',
!   10: 'Temperate or sub-polar grassland',
!   11: 'Sub-polar or polar shrubland-lichen-moss',
!   12: 'Sub-polar or polar grassland-lichen-moss',
!   13: 'Sub-polar or polar barren-lichen-moss',
!   14: 'Wetland',
!   15: 'Cropland',
!   16: 'Barren lands',
!   17: 'Urban',
!   18: 'Water',
!   19: 'Snow and Ice',											
!													
!													
!													
! ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------													
!	SoilGrid V1												
!		original legend (raster_value)	description										
!	 C 	    1	 clay										
!	 SIC 	2	 silty clay										
!	 SC 	3	 sandy clay										
!	 CL 	4	 clay loam										
!	 SICL 	5	 silty clay loam										
!	 SCL 	6	 sandy clay loam										
!	 L   	7	 loam										
!	 SIL 	8	 silty loam										
!	 SL 	9	 sandy loam										
!	 SI 	10	 silt										
!	 LS 	11	 loamy sand										
!	 S  	12	 sand										
!													
!													
! ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------	
!          SLC	LULC	SOIL TYPE	Main crop cropid	Second crop cropid	Crop rotation group	Vegetation type	Special class code	Tile depth	Stream depth	Number of soil layers	Soil layer depth 1	Soil layer depth 2	Soil layer depth 3"""
]

# Open the file in write mode
with open(path_org+'/HYPE/GeoClass.txt', 'w') as file:
    # Write the commented lines
    for line in commented_lines:
        file.write(line + '\n')

# writing the `GeoClass.txt` file
with open(path_org+'/HYPE/GeoClass.txt', 'a') as file:
        combination.to_csv(file, sep='\t', index=False, header=False)