In [1]:
import sys
from pathlib import Path

# Add src directory to Python path
project_root = Path.cwd().parent
src_path = project_root / "src"
if str(src_path) not in sys.path:
    sys.path.insert(0, str(src_path))
    print(f"Added {src_path} to Python path")

Added /Users/cooper/Desktop/hydro-forecasting/src to Python path


In [2]:
import geopandas as gpd
from shapely.geometry import Point

from hydro_forecasting.data.caravanify_parquet import CaravanifyParquet, CaravanifyParquetConfig

---

In [3]:
def load_basin_ids(country: str) -> list[str]:
    """
    Function to load basins for a given country in Central Asia
    """
    # Make country lowercase and make the first letter uppercase
    country = country.lower()
    country = country.capitalize()

    if country != "Tajikistan" and country != "Kyrgyzstan":
        print("Country not supported")
        return []

    configs = CaravanifyParquetConfig(
        attributes_dir="/Users/cooper/Desktop/CaravanifyParquet/CA/post_processed/attributes",
        timeseries_dir="/Users/cooper/Desktop/CaravanifyParquet/CA/post_processed/timeseries/csv",
        gauge_id_prefix="CA",
        use_hydroatlas_attributes=True,
        use_caravan_attributes=True,
        use_other_attributes=True,
    )

    caravan = CaravanifyParquet(configs)
    ca_basins = caravan.get_all_gauge_ids()
    caravan.load_stations(ca_basins)
    static_data = caravan.get_static_attributes()

    return list(static_data[static_data["country"] == country]["gauge_id"].unique())


basin_ids = load_basin_ids("kyrgyzstan")

In [4]:
config_ca = CaravanifyParquetConfig(
    attributes_dir="/Users/cooper/Desktop/CaravanifyParquet/CA/post_processed/attributes",
    timeseries_dir="/Users/cooper/Desktop/CaravanifyParquet/CA/post_processed/timeseries/csv",
    shapefile_dir="/Users/cooper/Desktop/CAMELS-CH/data/CARAVANIFY/CA/post_processed/shapefiles",
    gauge_id_prefix="CA",
    use_hydroatlas_attributes=True,
    use_caravan_attributes=True,
    use_other_attributes=True,
)

caravan_ca = CaravanifyParquet(config_ca)
caravan_ca.load_stations(basin_ids)

static = caravan_ca.get_static_attributes()

In [5]:
static = static[["gauge_id", "gauge_lat", "gauge_lon"]]
static

Unnamed: 0,gauge_id,gauge_lat,gauge_lon
0,CA_15013,42.652788,78.921090
1,CA_15016,42.585012,78.883448
2,CA_15020,42.471215,78.531209
3,CA_15022,42.467058,78.537862
4,CA_15025,42.424441,78.434206
...,...,...,...
57,CA_16169,40.182493,72.081291
58,CA_16176,41.598136,71.655836
59,CA_16487,40.638926,73.691057
60,CA_16510,40.116804,71.719305


In [6]:
geometry = [Point(lon, lat) for lon, lat in zip(static["gauge_lon"], static["gauge_lat"], strict=False)]

# Create GeoDataFrame
gdf = gpd.GeoDataFrame(static, geometry=geometry, crs="EPSG:4326")

# Write to shapefile
gdf.to_file(f"/Users/cooper/Desktop/kazakhstan-data/shapefiles/test_tajik/{'kyrgyzstan'}_gauge_locations.shp")

In [7]:
shapefiles = caravan_ca.get_shapefiles()
shapefiles = shapefiles[shapefiles["gauge_id"].isin(basin_ids)]

# Print crs
print(shapefiles.crs)


# Write to shapefile
shapefiles.to_file(f"/Users/cooper/Desktop/kazakhstan-data/shapefiles/test_tajik/{'kyrgyzstan'}_basins.shp")

EPSG:4326
