In [1]:
import requests
import os
import zipfile
import shutil

from glob import glob
from tqdm import tqdm
import geopandas as gpd
import pandas as pd

## 1) Download to `in`

Download CAMELS-DE from Zenodo (10.5281/zenodo.12733968) if it does not exist yet.

In [4]:
# Check if the data is already downloaded in the `in` directory
if os.path.exists("../in/camels_de_v0_1_0"):
    print("Data is already downloaded.")
else:
    # Define the DOI and construct the URL
    doi = "10.5281/zenodo.12733968"
    url = f"https://zenodo.org/records/{doi.split('.')[2].split('/')[-1]}/files/camels_de.zip?download=1"

    # Send a GET request to the URL
    response = requests.get(url)
    response.raise_for_status()  # Check if the request was successful

    # Write the content of the response to a file
    with open("../in/camels_de.zip", "wb") as f:
        f.write(response.content)

    # Unzip the downloaded file
    with zipfile.ZipFile("../in/camels_de.zip", "r") as zip_ref:
        zip_ref.extractall("../in/camels_de_v0_1_0")

    # Remove the downloaded zip file
    os.remove("../in/camels_de.zip")

    print("Download complete.")

Data is already downloaded.


## 2) Restructure the data for the FastAPI backend to `out`

In [6]:
os.makedirs("../out/camels_de_v0_1_0", exist_ok=True)

First, copy attributes as is.

In [8]:
attribute_files = glob("../in/camels_de_v0_1_0/*attributes.csv")
attribute_files += ["../in/camels_de_v0_1_0/CAMELS_DE_simulation_benchmark.csv"]

# Copy attributes files to out
for file in attribute_files:
    shutil.copy(file, "../out/camels_de_v0_1_0/")
    print(f"Copied {file} to ../out/camels_de_v0_1_0/")

Copied ../in/camels_de_v0_1_0/CAMELS_DE_humaninfluence_attributes.csv to ../out/camels_de_v0_1_0/
Copied ../in/camels_de_v0_1_0/CAMELS_DE_topographic_attributes.csv to ../out/camels_de_v0_1_0/
Copied ../in/camels_de_v0_1_0/CAMELS_DE_climatic_attributes.csv to ../out/camels_de_v0_1_0/
Copied ../in/camels_de_v0_1_0/CAMELS_DE_soil_attributes.csv to ../out/camels_de_v0_1_0/
Copied ../in/camels_de_v0_1_0/CAMELS_DE_hydrogeology_attributes.csv to ../out/camels_de_v0_1_0/
Copied ../in/camels_de_v0_1_0/CAMELS_DE_hydrologic_attributes.csv to ../out/camels_de_v0_1_0/
Copied ../in/camels_de_v0_1_0/CAMELS_DE_landcover_attributes.csv to ../out/camels_de_v0_1_0/
Copied ../in/camels_de_v0_1_0/CAMELS_DE_simulation_benchmark.csv to ../out/camels_de_v0_1_0/


Next, copy the CAMELS_DE_catchment_boundaries folder as is.

In [15]:
shutil.copytree("../in/camels_de_v0_1_0/CAMELS_DE_catchment_boundaries/", "../out/camels_de_v0_1_0/CAMELS_DE_catchment_boundaries/", dirs_exist_ok=True)

# Also make geojson files
gdf_catchments = gpd.read_file("../in/camels_de_v0_1_0/CAMELS_DE_catchment_boundaries/catchments/CAMELS_DE_catchments.gpkg").to_crs("EPSG:4326")
gdf_catchments.to_file("../out/camels_de_v0_1_0/CAMELS_DE_catchment_boundaries/catchments/CAMELS_DE_catchments.geojson", driver="GeoJSON")
gdf_stations = gpd.read_file("../in/camels_de_v0_1_0/CAMELS_DE_catchment_boundaries/gauging_stations/CAMELS_DE_gauging_stations.gpkg").to_crs("EPSG:4326")
gdf_stations.to_file("../out/camels_de_v0_1_0/CAMELS_DE_catchment_boundaries/gauging_stations/CAMELS_DE_gauging_stations.geojson", driver="GeoJSON")

Now, we go for the catchment specific data. We will restructure the data to be more easily accessible by the FastAPI backend.

# get all IDs
ids = [id.split("_")[-1].split(".")[0] for id in glob("../in/camels_de_v0_1_0/timeseries/CAMELS_DE_hydromet_timeseries_*.csv")]

for id in tqdm(ids):
    # create a folder for each catchment ID
    os.makedirs("../out/camels_de_v0_1_0/catchments", exist_ok=True)

    # make a folder for the catchment
    os.makedirs(f"../out/camels_de_v0_1_0/catchments/{id}", exist_ok=True)

    # make subdirectories for timeseries, attributes and geometry
    os.makedirs(f"../out/camels_de_v0_1_0/catchments/{id}/timeseries", exist_ok=True)
    os.makedirs(f"../out/camels_de_v0_1_0/catchments/{id}/attributes", exist_ok=True)
    os.makedirs(f"../out/camels_de_v0_1_0/catchments/{id}/geometry", exist_ok=True)

    # copy the timeseries files
    shutil.copy(f"../in/camels_de_v0_1_0/timeseries/CAMELS_DE_hydromet_timeseries_{id}.csv", f"../out/camels_de_v0_1_0/catchments/{id}/timeseries/CAMELS_DE_hydromet_timeseries_{id}.csv")
    shutil.copy(f"../in/camels_de_v0_1_0/timeseries_simulated/CAMELS_DE_discharge_sim_{id}.csv", f"../out/camels_de_v0_1_0/catchments/{id}/timeseries/CAMELS_DE_discharge_sim_{id}.csv")

    # copy the attributes files
    for file in attribute_files:
        df = pd.read_csv(file)
        df = df[df["gauge_id"] == id].reset_index(drop=True)
        df.to_csv(f"../out/camels_de_v0_1_0/catchments/{id}/attributes/{file.split('/')[-1]}", index=False)

    # copy the geometry files
    gdf = gdf_catchments[gdf_catchments["gauge_id"] == id].reset_index(drop=True)
    gdf.to_file(f"../out/camels_de_v0_1_0/catchments/{id}/geometry/CAMELS_DE_catchment_{id}.geojson", driver="GeoJSON")
    gdf = gdf_stations[gdf_stations["gauge_id"] == id].reset_index(drop=True)
    gdf.to_file(f"../out/camels_de_v0_1_0/catchments/{id}/geometry/CAMELS_DE_gauging_station_{id}.geojson", driver="GeoJSON")