# Hydrogeology attributes

Notebook to create the file `CAMELS_DE_hydrogeology_attributes.csv`.  

columns in CAMELS-GB:
- gauge_id
- inter_high_perc
- inter_mod_perc
- inter_low_perc
- frac_high_perc
- frac_mod_perc
- frac_low_perc
- no_gw_perc
- low_nsig_perc
- nsig_low_perc


In [2]:
import os
from glob import glob
import pandas as pd

from camelsp.util import OUTPUT_PATH



In [3]:
# get camels_ids from hydromet timeseries
camels_ids = [camels_id.split("_")[-1].split(".csv")[0] for camels_id in glob("../output_data/camels_de/timeseries/*.csv")]

# sort camels_ids
camels_ids = sorted(camels_ids)

print(f"Total number of stations in CAMELS-DE v1: {len(camels_ids)}")

Total number of stations in CAMELS-DE v1: 1460


## Read hydrogeology data

We extracted the hydrogeology attributes from the HUEK250 dataset, read and process here.

In [None]:
# path of extracted dam data
path = os.path.join(OUTPUT_PATH, "raw_catchment_attributes/hydrogeo/huek250/dams_in_germany.csv")



# read dam data
dam_data = pd.read_csv(path)

# filter for camels_ids and sort by camels_id
dam_data = dam_data[dam_data["camels_id"].isin(camels_ids)].sort_values("camels_id").reset_index(drop=True)

# make columns dams_year_last and dams_year_first integer
dam_data["dams_year_first"] = dam_data["dams_year_first"].astype("Int64")
dam_data["dams_year_last"] = dam_data["dams_year_last"].astype("Int64")

dam_data

In [56]:
# dataframe to store all hydrogeo data for all camels_ids
df_all = pd.DataFrame()

# huek250 variables
variables = ["ch", "kf", "ha", "ga", "vf", "gc"]

for camels_id in camels_ids:
    # dataframe to store all hydrogeo data for a single camels_id
    df = pd.DataFrame()
    df["camels_id"] = [camels_id]

    # read all variables
    for variable in variables:
        path = os.path.join(OUTPUT_PATH, f"raw_catchment_attributes/hydrogeo/huek250/{camels_id}/data/huek250_{variable}.csv")
        df_variable = pd.read_csv(path)

        # drop column camels_id
        df_variable.drop("camels_id", axis=1, inplace=True)

        # concatenate dataframes
        df = pd.concat([df, df_variable], axis=1)

    # check that all waterbody columns have the same value
    waterbody_cols = [col for col in df.columns if "waterbody" in col]
    assert len(set(df[waterbody_cols].values.flatten().tolist())) == 1

    # make one waterbody column
    df["waterbody"] = df[waterbody_cols[0]]
    df.drop(waterbody_cols, axis=1, inplace=True)

    # check that all no_data columns have the same value
    no_data_cols = [col for col in df.columns if "no_data" in col]
    assert len(set(df[no_data_cols].values.flatten().tolist())) == 1

    # make one no_data column
    df["no_data"] = df[no_data_cols[0]]
    df.drop(no_data_cols, axis=1, inplace=True)

    # add to df_all
    df_all = pd.concat([df_all, df], axis=0)

df_all    

Unnamed: 0,camels_id,aquitard_perc,aquifer_perc,aquifer_aquitard_mixed_perc,kf_very_high_perc,kf_high_perc,kf_medium_perc,kf_moderate_perc,kf_low_perc,kf_very_low_perc,...,geochemical_rocktype_silicate_perc,geochemical_rocktype_silicate_carbonatic_perc,geochemical_rocktype_carbonatic_perc,geochemical_rocktype_sulfatic_perc,geochemical_rocktype_silicate_organic_components_perc,geochemical_rocktype_anthropogenically_modified_through_filling_perc,geochemical_rocktype_sulfatic_halitic_perc,geochemical_rocktype_halitic_perc,waterbody,no_data
0,DE110000,67.057823,8.803989,24.138189,0.0,0.116629,1.488479,6.402118,27.963286,18.240576,...,47.515999,31.628122,16.440259,4.415620,0.000000,0.000000,0.0,0.0,0.0,0.0
0,DE110010,64.109570,9.939942,25.950488,0.0,0.107057,1.845308,5.876686,26.201569,16.743542,...,43.679947,32.006677,20.260154,4.053223,0.000000,0.000000,0.0,0.0,0.0,0.0
0,DE110020,32.412673,44.607422,22.979905,0.0,5.464995,1.735923,4.922878,15.070393,5.320979,...,13.939976,32.141034,52.632230,1.286761,0.000000,0.000000,0.0,0.0,0.0,0.0
0,DE110030,25.296329,52.082797,22.620875,0.0,12.197100,1.110332,5.577850,13.418512,3.411765,...,8.952346,34.191448,56.033168,0.823039,0.000000,0.000000,0.0,0.0,0.0,0.0
0,DE110040,22.354099,72.085765,5.560136,0.0,72.085765,0.000000,0.000000,0.021066,0.000000,...,0.000000,87.780012,12.219988,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,DEG10580,46.602619,31.917779,21.479601,0.0,0.000000,0.380016,24.615207,17.808603,0.000000,...,41.182915,4.412694,26.618185,27.776317,0.008175,0.001715,0.0,0.0,0.0,0.0
0,DEG10590,54.125446,14.474696,31.399858,0.0,0.000000,4.178961,6.635499,38.333834,0.000000,...,65.570373,0.932341,15.019398,18.025470,0.435653,0.016764,0.0,0.0,0.0,0.0
0,DEG10600,30.803561,28.194220,41.002220,0.0,0.000000,0.941102,19.891599,30.803561,0.000000,...,1.079473,30.721877,60.904975,7.293675,0.000000,0.000000,0.0,0.0,0.0,0.0
0,DEG10610,97.869347,0.984913,1.145739,0.0,0.000000,0.000000,0.984913,25.786298,32.570185,...,83.697778,15.317308,0.984913,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0
