In [1]:
import xarray
import pandas as pd
import geopandas as gpd

In [2]:
hydrosos_dir = "../../workspaces/app_workspace/hydrosos/streamflow/"
all_data_fname = "vpu_122/combined_all_data_122.nc"
monthly_data_fname = "vpu_122/combined_monthly_data_122.nc"
geometry_data_fname = 'global_streams_simplified.gpkg'
stream_order_data_fname = 'v2-master-table.parquet'
countries_table_fname = 'v2-countries-table.parquet'

## River Geometry + StreamOrder

In [3]:
# geometry data for all rivers
gdf_geo = gpd.read_file(hydrosos_dir + geometry_data_fname, engine='pyogrio')
# stream order for all rivers
df_stream_order = pd.read_parquet(hydrosos_dir + stream_order_data_fname)
# filter vpu 122 and fields
df_stream_order = df_stream_order[df_stream_order['VPUCode'] == 122][['LINKNO', 'strmOrder']].reset_index(drop=True)
# merge 2 dfs
df_river = pd.merge(gdf_geo, df_stream_order, on=['LINKNO']).rename(columns={'LINKNO': 'rivid'})
df_river

In [4]:
# write out the file
gdf_river = gpd.GeoDataFrame(df_river, geometry='geometry')
gdf_river.to_file(hydrosos_dir + 'vpu_122/river_features_122.geojson', driver='GeoJSON')

## River data

In [55]:
all_data = xarray.open_dataset(hydrosos_dir + all_data_fname)
monthly_data = xarray.open_dataset(hydrosos_dir + monthly_data_fname)

## Countries Table

In [38]:
geoglows_countries = pd.read_parquet('../../workspaces/app_workspace/' + countries_table_fname)
geoglows_countries = geoglows_countries.drop_duplicates('RiverCountry')
geoglows_countries = geoglows_countries[['RiverCountry']]
geoglows_countries

Unnamed: 0,RiverCountry
0,Egypt
2712,Sudan
8026,Eritrea
13072,Ethiopia
16030,Djibouti
...,...
6664057,Haiti
6664099,Dominican Republic
6669832,Jamaica
6673589,Antigua & Barbuda


In [39]:
countries_list = gpd.read_file("../../public/data/geojson/countries.geojson")
countries_list = countries_list.rename(columns={"ADMIN": "RiverCountry_2"})
countries_list = countries_list[['RiverCountry_2']]
countries_list

Unnamed: 0,RiverCountry_2
0,Aruba
1,Afghanistan
2,Angola
3,Anguilla
4,Albania
...,...
250,Samoa
251,Yemen
252,South Africa
253,Zambia


In [40]:
df_merge = geoglows_countries.merge(countries_list, how='outer', left_on='RiverCountry', right_on='RiverCountry_2')
df_merge

Unnamed: 0,RiverCountry,RiverCountry_2
0,Abyei,
1,Afghanistan,Afghanistan
2,,Akrotiri Sovereign Base Area
3,Aksai Chin,
4,,Aland
...,...,...
281,West Bank,
282,Western Sahara,Western Sahara
283,Yemen,Yemen
284,Zambia,Zambia


In [42]:
df_merge[df_merge['RiverCountry_2'].isna()]

Unnamed: 0,RiverCountry,RiverCountry_2
0,Abyei,
3,Aksai Chin,
12,Antigua & Barbuda,
21,"Bahamas, The",
34,Bosnia & Herzegovina,
43,Burma,
45,CH-IN,
51,Central African Rep,
59,"Congo, Dem Rep of the",
60,"Congo, Rep of the",


In [43]:
df_merge[df_merge['RiverCountry'].isna()]

Unnamed: 0,RiverCountry,RiverCountry_2
2,,Akrotiri Sovereign Base Area
4,,Aland
7,,American Samoa
10,,Anguilla
11,,Antarctica
...,...,...
271,,United States Virgin Islands
272,,United States of America
276,,Vanuatu
277,,Vatican
