# Hydrogeology attributes

Notebook to create the file `CAMELS_DE_hydrogeology_attributes.csv`.  

columns in CAMELS-GB:
- gauge_id
- inter_high_perc
- inter_mod_perc
- inter_low_perc
- frac_high_perc
- frac_mod_perc
- frac_low_perc
- no_gw_perc
- low_nsig_perc
- nsig_low_perc


In [2]:
import os
from glob import glob
import pandas as pd

from camelsp.util import OUTPUT_PATH



In [3]:
# get camels_ids from hydromet timeseries
camels_ids = [camels_id.split("_")[-1].split(".csv")[0] for camels_id in glob("../output_data/camels_de/timeseries/*.csv")]

# sort camels_ids
camels_ids = sorted(camels_ids)

print(f"Total number of stations in CAMELS-DE v1: {len(camels_ids)}")

Total number of stations in CAMELS-DE v1: 1460


## Read hydrogeology data

We extracted the hydrogeology attributes from the HUEK250 dataset, read and process here.

In [68]:
# dataframe to store all hydrogeo data for all camels_ids
df_all = pd.DataFrame()

# huek250 variables
variables = ["ch", "kf", "ha", "ga", "vf", "gc"]

for camels_id in camels_ids:
    # dataframe to store all hydrogeo data for a single camels_id
    df = pd.DataFrame()
    df["camels_id"] = [camels_id]

    # read all variables
    for variable in variables:
        path = os.path.join(OUTPUT_PATH, f"raw_catchment_attributes/hydrogeo/huek250/{camels_id}/data/huek250_{variable}.csv")
        df_variable = pd.read_csv(path)

        # drop column camels_id
        df_variable.drop("camels_id", axis=1, inplace=True)

        # concatenate dataframes
        df = pd.concat([df, df_variable], axis=1)

    # check that all waterbody columns have the same value
    waterbody_cols = [col for col in df.columns if "waterbody" in col]
    assert len(set(df[waterbody_cols].values.flatten().tolist())) == 1

    # make one waterbody column
    df["waterbody"] = df[waterbody_cols[0]]
    df.drop(waterbody_cols, axis=1, inplace=True)

    # check that all no_data columns have the same value
    no_data_cols = [col for col in df.columns if "no_data" in col]
    assert len(set(df[no_data_cols].values.flatten().tolist())) == 1

    # make one no_data column
    df["no_data"] = df[no_data_cols[0]]
    df.drop(no_data_cols, axis=1, inplace=True)

    # add to df_all
    df_all = pd.concat([df_all, df], axis=0)

# round to 2 decimal places
df_all = df_all.round(2)

# save results
df_all.to_csv("../output_data/camels_de/CAMELS_DE_hydrogeology_attributes.csv", index=False) 