In [1]:
import sys
from pathlib import Path

# Add src directory to Python path
project_root = Path.cwd().parent
src_path = project_root / "src"
if str(src_path) not in sys.path:
    sys.path.insert(0, str(src_path))
    print(f"Added {src_path} to Python path")

Added /Users/cooper/Desktop/hydro-forecasting/src to Python path


In [2]:
import geopandas as gpd
from shapely.geometry import Point

from hydro_forecasting.data.caravanify_parquet import CaravanifyParquet, CaravanifyParquetConfig

---

In [3]:
def load_basin_ids(country: str) -> list[str]:
    """
    Function to load basins for a given country in Central Asia
    """
    # Make country lowercase and make the first letter uppercase
    country = country.lower()
    country = country.capitalize()

    if country != "Tajikistan" and country != "Kyrgyzstan":
        print("Country not supported")
        return []

    configs = CaravanifyParquetConfig(
        attributes_dir="/Users/cooper/Desktop/CaravanifyParquet/CA/post_processed/attributes",
        timeseries_dir="/Users/cooper/Desktop/CaravanifyParquet/CA/post_processed/timeseries/csv",
        gauge_id_prefix="CA",
        use_hydroatlas_attributes=True,
        use_caravan_attributes=True,
        use_other_attributes=True,
    )

    caravan = CaravanifyParquet(configs)
    ca_basins = caravan.get_all_gauge_ids()
    caravan.load_stations(ca_basins)
    static_data = caravan.get_static_attributes()

    return list(static_data[static_data["country"] == country]["gauge_id"].unique())


basin_ids = load_basin_ids("tajikistan")

In [4]:
config_ca = CaravanifyParquetConfig(
    attributes_dir="/Users/cooper/Desktop/CaravanifyParquet/CA/post_processed/attributes",
    timeseries_dir="/Users/cooper/Desktop/CaravanifyParquet/CA/post_processed/timeseries/csv",
    shapefile_dir="/Users/cooper/Desktop/CAMELS-CH/data/CARAVANIFY/CA/post_processed/shapefiles",
    gauge_id_prefix="CA",
    use_hydroatlas_attributes=True,
    use_caravan_attributes=True,
    use_other_attributes=True,
)

caravan_ca = CaravanifyParquet(config_ca)
caravan_ca.load_stations(basin_ids)

static = caravan_ca.get_static_attributes()

In [5]:
static = static[["gauge_id", "gauge_lat", "gauge_lon"]]
static

Unnamed: 0,gauge_id,gauge_lat,gauge_lon
0,CA_16205,39.857349,70.54477
1,CA_17050,37.492292,71.558719
2,CA_17077,38.149088,69.92568
3,CA_17082,39.012027,70.367636
4,CA_17100,39.273184,71.383511
5,CA_17110,38.723194,70.408124
6,CA_17137,37.545421,68.131404
7,CA_17147,38.896769,68.828991
8,CA_17150,38.710166,68.791481
9,CA_17202,38.631839,68.330996


In [6]:
geometry = [Point(lon, lat) for lon, lat in zip(static['gauge_lon'], static['gauge_lat'], strict=False)]

# Create GeoDataFrame
gdf = gpd.GeoDataFrame(static, geometry=geometry, crs='EPSG:4326')

# Write to shapefile
gdf.to_file('/Users/cooper/Desktop/kazakhstan-data/shapefiles/test_tajik/gauge_locations.shp')

In [8]:
shapefiles = caravan_ca.get_shapefiles()
shapefiles = shapefiles[shapefiles["gauge_id"].isin(basin_ids)]

# Write to shapefile
shapefiles.to_file('/Users/cooper/Desktop/kazakhstan-data/shapefiles/test_tajik/basins.shp')