In [1]:
# Creation of geo class and geo data info files

In [2]:
import pandas      as      pd
import numpy       as      np
import geopandas   as      gpd
import sys
from   itertools   import  product
import os
import datetime

In [3]:
# inputs
path_org = '/home/shg096/scratch/test/'

In [4]:
# load the information from the gistool for soil and land cover and find the number of geoclass
soil_type = pd.read_csv(path_org+'gistool_outputs/soil_classes/modified_domain_stats_soil_classes.csv')
landcover_type = pd.read_csv(path_org+'gistool_outputs/landsat/modified_domain_stats_NA_NALCMS_landcover_2020_30m.csv')
elevation_mean = pd.read_csv(path_org+'gistool_outputs/merit_hydro/modified_domain_stats_elv.csv')

soil_type = soil_type.sort_values(by='COMID').reset_index(drop=True)
landcover_type = landcover_type.sort_values(by='COMID').reset_index(drop=True)
elevation_mean = elevation_mean.sort_values(by='COMID').reset_index(drop=True)

In [5]:
# 
if not os.path.isdir(path_org+'/HYPE'):
    os.makedirs(path_org+'/HYPE')

In [6]:
# find the combination of the majority soil and land cover
combinations_set_all = set()
for index, row in landcover_type.iterrows():
    # get the fraction for land cover for each row
    fractions = [col for col in landcover_type.columns if col.startswith('frac') and row[col] > 0.00]
    # remove frac_ from the list
    fractions = [col.split('_')[1] for col in fractions]
    fractions = [int(name) for name in fractions]

    # get the majority soil type for each row
    majority_soil = [soil_type['majority'].iloc[index].item()]

    # Combine as combination of soil and land cover and keep as a set
    combinations = list(product(fractions, majority_soil))
    combinations_set = set(combinations)
    combinations_set_all.update(combinations_set)

print(combinations_set_all)
print(len(combinations_set_all))

data_list = [{'landcover': item[0], 'soil': item[1]} for item in combinations_set_all]

# Create a pandas DataFrame from the list of dictionaries
combination = pd.DataFrame(data_list)

combination ['SLC'] = 0
combination ['SLC'] = np.arange(len(combination))+1

combination

{(4, 3.0), (16, 3.0), (17, 3.0), (7, 3.0), (18, 3.0), (8, 3.0), (19, 3.0), (10, 3.0), (1, 3.0), (9, 3.0), (11, 3.0), (12, 3.0), (2, 3.0), (13, 3.0), (3, 3.0), (14, 3.0), (15, 3.0), (5, 3.0), (6, 3.0)}
19


Unnamed: 0,landcover,soil,SLC
0,4,3.0,1
1,16,3.0,2
2,17,3.0,3
3,7,3.0,4
4,18,3.0,5
5,8,3.0,6
6,19,3.0,7
7,10,3.0,8
8,1,3.0,9
9,9,3.0,10


In [7]:
landcover_type_prepared = landcover_type.copy()

for i in range(1, len(combination)+1):
    column_name = f'SLC_{i}'
    landcover_type_prepared[column_name] = 0.00

landcover_type_prepared['soil'] = soil_type['majority']

def get_non_zero_columns(row):
    return [col for col in row.index if col.startswith('frac_') and row[col] != 0]

# Apply the function to each row
landcover_type_prepared['non_zero_columns'] = landcover_type_prepared.apply(get_non_zero_columns, axis=1)


for index, row in landcover_type_prepared.iterrows():
    # get the soil type
    soil_type_value = soil_type['majority'].iloc[index]

    for i in row['non_zero_columns']:

        # remove frac from column name 
        land_cover_value = i.replace("frac_", "")
        
        # get the SLC value
        result = combination[(combination['landcover'] == int(land_cover_value)) & (combination['soil'] == int(soil_type_value))]['SLC']
        column_name = 'SLC_'+str(result.values[0])
        landcover_type_prepared.loc[index, column_name] = landcover_type_prepared[i].iloc[index]
        
        

landcover_type_prepared

Unnamed: 0,COMID,lat,lon,frac_1,frac_2,frac_3,frac_4,frac_5,frac_6,frac_7,...,SLC_12,SLC_13,SLC_14,SLC_15,SLC_16,SLC_17,SLC_18,SLC_19,soil,non_zero_columns
0,71022153,52.276063,-113.868966,0.000010,0.00001,0.00001,0.00001,0.00001,0.00001,0.00001,...,0.00001,0.00001,0.00001,0.00001,0.000010,0.480598,0.00001,0.00001,3.0,"[frac_1, frac_2, frac_3, frac_4, frac_5, frac_..."
1,71022160,52.153275,-113.963718,0.000000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,...,0.00000,0.00000,0.00000,0.00000,0.000000,1.000000,0.00000,0.00000,3.0,[frac_15]
2,71022164,52.071025,-114.040900,0.000000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,...,0.00000,0.00000,0.00000,0.00000,0.000000,0.937372,0.00000,0.00000,3.0,"[frac_15, frac_18]"
3,71022165,52.062767,-114.069721,0.108643,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,...,0.00000,0.00000,0.00000,0.00000,0.068153,0.719236,0.00000,0.00000,3.0,"[frac_1, frac_14, frac_15, frac_18]"
4,71022168,52.054057,-114.096759,0.000000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,...,0.00000,0.00000,0.00000,0.00000,0.000000,1.000000,0.00000,0.00000,3.0,[frac_15]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
405,71039048,50.912529,-115.465044,0.314622,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,...,0.00000,0.00000,0.00000,0.00000,0.000000,0.000000,0.00000,0.00000,3.0,"[frac_1, frac_10, frac_16]"
406,71039109,50.931672,-115.554518,0.264265,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,...,0.00000,0.00000,0.00000,0.00000,0.000000,0.000000,0.00000,0.00000,3.0,"[frac_1, frac_8, frac_10, frac_16]"
407,71039121,50.917342,-115.216567,0.318708,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,...,0.00000,0.00000,0.00000,0.00000,0.000000,0.000000,0.00000,0.00000,3.0,"[frac_1, frac_8, frac_10, frac_16]"
408,71039250,50.952414,-115.034746,0.507148,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,...,0.00000,0.00000,0.00000,0.00000,0.000000,0.000000,0.00000,0.00000,3.0,"[frac_1, frac_10, frac_16]"


In [8]:
riv = gpd.read_file(path_org+'domain/domain_riv.shp')
riv.sort_values(by='COMID').reset_index(drop=True)
riv['lengthm'] = 0.00
riv['lengthm'] = riv['lengthkm'] * 1000

cat = gpd.read_file(path_org+'domain/domain_cat.shp')
cat.sort_values(by='COMID').reset_index(drop=True)
cat['area'] = 0.00
cat['area'] = cat['unitarea'] * 1000000 # km2 to m2
cat['latitude'] = cat.centroid.y
cat['longitude'] = cat.centroid.x


  cat['latitude'] = cat.centroid.y

  cat['longitude'] = cat.centroid.x


In [9]:
# add information to the geodata dataframe
landcover_type_prepared['NextDownID'] = riv['NextDownID']
landcover_type_prepared['area'] = cat['area']
landcover_type_prepared['latitude'] = cat['latitude']
landcover_type_prepared['longitude'] = cat['longitude']
landcover_type_prepared['elev_mean'] = elevation_mean['mean']
landcover_type_prepared['slope_mean'] = riv['slope']
landcover_type_prepared['rivlen'] = riv['lengthm']
landcover_type_prepared['uparea'] = riv['uparea']

In [10]:
column_name_mapping = {
    'COMID': 'subid',
    'NextDownID': 'maindown',
    'area': 'area',
    'latitude': 'latitude',
    'longitude': 'longitude',
    'elev_mean': 'elev_mean',
    'slope_mean': 'slope_mean',
    'rivlen': 'rivlen'
}

# Rename the columns based on the dictionary
landcover_type_prepared = landcover_type_prepared.rename(columns=column_name_mapping)
landcover_type_prepared

#
slc_columns = [col for col in landcover_type_prepared.columns if col.startswith('SLC_')]

# Sort the columns as per your requirements
column_order = ['subid', 'maindown', 'area', 'latitude', 'longitude', 'elev_mean', 'slope_mean', 'rivlen'] + slc_columns + ['uparea']

landcover_type_prepared = landcover_type_prepared[column_order]
landcover_type_prepared

Unnamed: 0,subid,maindown,area,latitude,longitude,elev_mean,slope_mean,rivlen,SLC_1,SLC_2,...,SLC_11,SLC_12,SLC_13,SLC_14,SLC_15,SLC_16,SLC_17,SLC_18,SLC_19,uparea
0,71022153,-9999,4.132882e+07,52.276063,-113.868966,5,0.000972,9650.665556,0.00001,0.000010,...,0.00001,0.00001,0.00001,0.00001,0.00001,0.000010,0.480598,0.00001,0.00001,11718.725317
1,71022160,71022153,1.357867e+08,52.153275,-113.963718,5,0.000848,30018.433408,0.00000,0.000000,...,0.00000,0.00000,0.00000,0.00000,0.00000,0.000000,1.000000,0.00000,0.00000,11420.248116
2,71022164,71022160,1.100465e+07,52.071025,-114.040900,5,0.000969,4117.764753,0.00000,0.000000,...,0.00000,0.00000,0.00000,0.00000,0.00000,0.000000,0.937372,0.00000,0.00000,11158.223079
3,71022165,71022164,1.473209e+06,52.062767,-114.069721,5,0.000505,1382.912645,0.00000,0.000000,...,0.00000,0.00000,0.00000,0.00000,0.00000,0.068153,0.719236,0.00000,0.00000,11121.798002
4,71022168,71022165,1.172964e+07,52.054057,-114.096759,5,0.000810,3820.904367,0.00000,0.000000,...,0.00000,0.00000,0.00000,0.00000,0.00000,0.000000,1.000000,0.00000,0.00000,11050.683368
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
405,71039048,71035979,3.033944e+07,50.912529,-115.465044,5,0.036308,3473.762210,0.00000,0.512098,...,0.00000,0.00000,0.00000,0.00000,0.00000,0.000000,0.000000,0.00000,0.00000,30.339439
406,71039109,71036389,4.198761e+07,50.931672,-115.554518,5,0.017588,6307.483695,0.00000,0.491015,...,0.00000,0.00000,0.00000,0.00000,0.00000,0.000000,0.000000,0.00000,0.00000,41.987611
407,71039121,71035238,5.209443e+07,50.917342,-115.216567,5,0.028829,5537.016700,0.00000,0.488937,...,0.00000,0.00000,0.00000,0.00000,0.00000,0.000000,0.000000,0.00000,0.00000,52.094426
408,71039250,71028304,2.795767e+07,50.952414,-115.034746,5,0.032782,2518.299361,0.00000,0.396983,...,0.00000,0.00000,0.00000,0.00000,0.00000,0.000000,0.000000,0.00000,0.00000,27.957672


In [11]:
landcover_type_prepared = landcover_type_prepared.sort_values(by='uparea').reset_index(drop=True)
landcover_type_prepared = landcover_type_prepared.drop(columns=['uparea'])
landcover_type_prepared.to_csv(path_org+'HYPE/GeoData.txt', sep='\t', index=False)
landcover_type_prepared

Unnamed: 0,subid,maindown,area,latitude,longitude,elev_mean,slope_mean,rivlen,SLC_1,SLC_2,...,SLC_10,SLC_11,SLC_12,SLC_13,SLC_14,SLC_15,SLC_16,SLC_17,SLC_18,SLC_19
0,71031130,71028757,2.507111e+07,51.232173,-114.810458,5,0.063497,218.630117,0.00000,0.000000,...,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.000000,0.000000,0.00000,0.00000
1,71031429,71030308,2.518819e+07,51.344427,-115.456222,5,0.025596,327.762133,0.00000,0.643451,...,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.000000,0.000000,0.00000,0.00000
2,71024578,71022164,2.542043e+07,52.110399,-114.073128,5,0.007512,730.218286,0.00000,0.000000,...,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.000000,0.942776,0.00000,0.00000
3,71032292,71030270,2.559131e+07,51.625792,-116.088587,5,0.013547,493.825993,0.00000,0.742346,...,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.000000,0.000000,0.00000,0.00000
4,71024619,71022248,2.579878e+07,52.096781,-114.716824,5,0.007155,1479.544192,0.00000,0.000000,...,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.000000,0.750455,0.00000,0.00000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
405,71022168,71022165,1.172964e+07,52.054057,-114.096759,5,0.000810,3820.904367,0.00000,0.000000,...,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.000000,1.000000,0.00000,0.00000
406,71022165,71022164,1.473209e+06,52.062767,-114.069721,5,0.000505,1382.912645,0.00000,0.000000,...,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.068153,0.719236,0.00000,0.00000
407,71022164,71022160,1.100465e+07,52.071025,-114.040900,5,0.000969,4117.764753,0.00000,0.000000,...,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.000000,0.937372,0.00000,0.00000
408,71022160,71022153,1.357867e+08,52.153275,-113.963718,5,0.000848,30018.433408,0.00000,0.000000,...,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.000000,1.000000,0.00000,0.00000


In [12]:
combination

combination = combination.rename(columns={'landcover': 'LULC'})
combination = combination.rename(columns={'soil': 'SOIL TYPE'})
combination = combination[['SLC','LULC','SOIL TYPE']]
combination['Main crop cropid'] = 0
combination['Second crop cropid'] = 0
combination['Crop rotation group'] = 0
combination['Vegetation type'] = 1
combination['Special class code'] = 0
combination['Tile depth'] = 0
combination['Stream depth'] = 2.296
combination['Number of soil layers'] = 3
combination['Soil layer depth 1'] = 0.091
combination['Soil layer depth 2'] = 0.493
combination['Soil layer depth 3'] = 2.296

combination

Unnamed: 0,SLC,LULC,SOIL TYPE,Main crop cropid,Second crop cropid,Crop rotation group,Vegetation type,Special class code,Tile depth,Stream depth,Number of soil layers,Soil layer depth 1,Soil layer depth 2,Soil layer depth 3
0,1,4,3.0,0,0,0,1,0,0,2.296,3,0.091,0.493,2.296
1,2,16,3.0,0,0,0,1,0,0,2.296,3,0.091,0.493,2.296
2,3,17,3.0,0,0,0,1,0,0,2.296,3,0.091,0.493,2.296
3,4,7,3.0,0,0,0,1,0,0,2.296,3,0.091,0.493,2.296
4,5,18,3.0,0,0,0,1,0,0,2.296,3,0.091,0.493,2.296
5,6,8,3.0,0,0,0,1,0,0,2.296,3,0.091,0.493,2.296
6,7,19,3.0,0,0,0,1,0,0,2.296,3,0.091,0.493,2.296
7,8,10,3.0,0,0,0,1,0,0,2.296,3,0.091,0.493,2.296
8,9,1,3.0,0,0,0,1,0,0,2.296,3,0.091,0.493,2.296
9,10,9,3.0,0,0,0,1,0,0,2.296,3,0.091,0.493,2.296


In [13]:
# Add commented lines
commented_lines = [
"""! MODIS landcover													
! Add legend (raster value) and discription													
!	original legend (raster_value)	description											
!   1: 'Temperate or sub-polar needleleaf forest',
!   2: 'Sub-polar taiga needleleaf forest',
!   3: 'Tropical or sub-tropical broadleaf evergreen forest',
!   4: 'Tropical or sub-tropical broadleaf deciduous forest',
!   5: 'Temperate or sub-polar broadleaf deciduous forest',
!   6: 'Mixed forest',
!   7: 'Tropical or sub-tropical shrubland',
!   8: 'Temperate or sub-polar shrubland',
!   9: 'Tropical or sub-tropical grassland',
!   10: 'Temperate or sub-polar grassland',
!   11: 'Sub-polar or polar shrubland-lichen-moss',
!   12: 'Sub-polar or polar grassland-lichen-moss',
!   13: 'Sub-polar or polar barren-lichen-moss',
!   14: 'Wetland',
!   15: 'Cropland',
!   16: 'Barren lands',
!   17: 'Urban',
!   18: 'Water',
!   19: 'Snow and Ice',											
!													
!													
!													
! ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------													
!	SoilGrid V1												
!		original legend (raster_value)	description										
!	 C 	    1	 clay										
!	 SIC 	2	 silty clay										
!	 SC 	3	 sandy clay										
!	 CL 	4	 clay loam										
!	 SICL 	5	 silty clay loam										
!	 SCL 	6	 sandy clay loam										
!	 L   	7	 loam										
!	 SIL 	8	 silty loam										
!	 SL 	9	 sandy loam										
!	 SI 	10	 silt										
!	 LS 	11	 loamy sand										
!	 S  	12	 sand										
!													
!													
! ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------	
!          SLC	LULC	SOIL TYPE	Main crop cropid	Second crop cropid	Crop rotation group	Vegetation type	Special class code	Tile depth	Stream depth	Number of soil layers	Soil layer depth 1	Soil layer depth 2	Soil layer depth 3"""
]

# Open the file in write mode
with open(path_org+'/HYPE/GeoClass.txt', 'w') as file:
    # Write the commented lines
    for line in commented_lines:
        file.write(line + '\n')

# writing the `GeoClass.txt` file
with open(path_org+'/HYPE/GeoClass.txt', 'a') as file:
        combination.to_csv(file, sep='\t', index=False, header=False)