In [1]:
# read / save camels attributes

In [2]:

import glob
import os
import sys
import numpy as np
import pandas as pd


In [48]:
df_baseatt = pd.read_csv('../../../data/camels_attributes_table.csv')

# Attributes used in papers
Reference:
Feng, Dapeng, Kuai Fang, and Chaopeng Shen. "Enhancing streamflow forecast and extracting insights using long‐short term memory networks with data integration at continental scales." Water Resources Research 56, no. 9 (2020): e2019WR026793.

Xie, Kang, Pan Liu, Jianyun Zhang, Dongyang Han, Guoqing Wang, and Chaopeng Shen. "Physics-guided deep learning for rainfall-runoff modeling by considering extreme events and monotonic relationships." Journal of Hydrology 603 (2021): 127043.

Ouyang, Wenyu, Kathryn Lawson, Dapeng Feng, Lei Ye, Chi Zhang, and Chaopeng Shen. "Continental-scale streamflow modeling of basins with reservoirs: Towards a coherent deep-learning-based strategy." Journal of Hydrology 599 (2021): 126455.

In [49]:
# att names adjusted to raw CAMELS names

att_Feng2020 = {
    "mean_elev": {"description": "Catchment mean elevation", "unit": "m"},
    "mean_slope": {"description": "Catchment mean slope", "unit": "m/km"},
    "area_gauges2": {"description": "Catchment area (GAGESII estimate)", "unit": "km2"},
    "frac_forest": {"description": "Forest fraction", "unit": "—"},
    "lai_max": {
        "description": "Maximum monthly mean of the leaf area index",
        "unit": "—",
    },
    "lai_diff": {
        "description": "Difference between the maximum and minimum monthly mean of the leaf area index",
        "unit": "—",
    },
    "dom_land_cover_frac": {
        "description": "Fraction of the catchment area associated with the dominant land cover",
        "unit": "—",
    },
    "dom_land_cover": {"description": "Dominant land cover type", "unit": "—"},
    "root_depth_50": {
        "description": "Root depth at 50th percentile, extracted from a root depth distribution based on the International Geosphere-Biosphere Programme (IGBP) land cover",
        "unit": "m",
    },
    "soil_depth_statsgo": {"description": "Soil depth", "unit": "m"},
    "soil_porosity": {"description": "Volumetric soil porosity", "unit": "—"},
    "soil_conductivity": {
        "description": "Saturated hydraulic conductivity",
        "unit": "cm/hr",
    },
    "max_water_content": {"description": "Maximum water content", "unit": "m"},
    "geol_1st_class": {
        "description": "Most common geologic class in the catchment basin",
        "unit": "—",
    },
    "geol_2nd_class": {
        "description": "Second most common geologic class in the catchment basin",
        "unit": "—",
    },
    "geol_porostiy": {"description": "Subsurface porosity", "unit": "—"},
    "geol_permeability": {"description": "Subsurface permeability", "unit": "m2"},
}

att_Xie2021 = {
    "p_mean": {"description": "Mean daily precipitation", "unit": "mm"},
    "pet_mean": {
        "description": "Mean daily potential evapotranspiration",
        "unit": "mm",
    },
    "aridity": {"description": "Ratio of mean PET to mean precipitation", "unit": "—"},
    "p_seasonality": {
        "description": "Seasonality and timing of precipitation",
        "unit": "mm",
    },
    "frac_snow": {
        "description": "Fraction of precipitation falling on days with temperatures below 0 °C",
        "unit": "—",
    },
    "high_prec_freq": {
        "description": "Frequency of high-precipitation days (≥ 5 times mean daily precipitation)",
        "unit": "—",
    },
    "high_prec_dur": {
        "description": "Average duration of high-precipitation events",
        "unit": "day",
    },
    "low_prec_freq": {"description": "Frequency of dry days (≤ 1 mm/day)", "unit": "—"},
    "low_prec_dur": {"description": "Average duration of dry periods", "unit": "day"},
    "mean_elev": {"description": "Catchment mean elevation", "unit": "m"},
    "mean_slope": {"description": "Catchment mean slope", "unit": "m/km"},
    "area_gauges2": {"description": "Catchment area", "unit": "km2"},
    "frac_forest": {"description": "Forest fraction", "unit": "—"},
    "lai_max": {"description": "Maximum monthly mean of leaf area index", "unit": "—"},
    "lai_diff": {
        "description": "Difference between the max. and min. mean of the leaf area index",
        "unit": "—",
    },
    "dom_land_cover_frac": {
        "description": "Fraction of the catchment area associated with the dominant land cover",
        "unit": "—",
    },
    "dom_land_cover": {"description": "Dominant land cover type", "unit": "—"},
    "soil_depth_pelletier": {
        "description": "Depth to bedrock (maximum 50 m)",
        "unit": "m",
    },
    "soil_depth_statsgo": {"description": "Soil depth (maximum 1.5 m)", "unit": "m"},
    "soil_porosity": {"description": "Volumetric porosity", "unit": "—"},
    "soil_conductivity": {
        "description": "Saturated hydraulic conductivity",
        "unit": "cm/hr",
    },
    "max_water_content": {
        "description": "Maximum water content of the soil",
        "unit": "m",
    },
    "sand_frac": {"description": "Fraction of sand in the soil", "unit": "—"},
    "silt_frac": {"description": "Fraction of silt in the soil", "unit": "—"},
    "clay_frac": {"description": "Fraction of clay in the soil", "unit": "—"},
    "carbonate_rocks_frac": {
        "description": "Fraction of Carbonate sedimentary rocks",
        "unit": "—",
    },
    "geol_permeability": {"description": "Surface permeability (log10)", "unit": "m2"},
}

In [50]:
if not 'Feng2020' in df_baseatt.columns:
    flag = [False] * len(df_baseatt)
    for i, j in att_Feng2020.items():
        if not i in df_baseatt['Attribute_text'].values:
            print(f'Warning! Cannot find {i} in Attribute_text')
        else:
            indi = np.where(df_baseatt['Attribute_text'].values==i)[0][0]
            flag[indi] = True

    df_baseatt['Feng2020'] = flag


if not 'att_Xie2021' in df_baseatt.columns:
    flag = [False] * len(df_baseatt)
    for i, j in att_Xie2021.items():
        if not i in df_baseatt['Attribute_text'].values:
            print(f'Warning! Cannot find {i} in Attribute_text')
        else:
            indi = np.where(df_baseatt['Attribute_text'].values==i)[0][0]
            flag[indi] = True

    df_baseatt['att_Xie2021'] = flag

In [53]:
df_baseatt.to_csv('../../../data/camels_attributes_table_TrainModel.csv', index=False)