In [1]:
#Run the forecast py script to fetch the data from S3 zarr for a single riverID 

import zarr
import s3fs
import pandas as pd

def get_forecast_data(river_number, date):
    s3_bucket_url = f's3://geoglows-v2-forecasts/{date}.zarr/'

    s3 = s3fs.S3FileSystem(anon=True)

    try:
        mapper = s3fs.S3Map(root=s3_bucket_url, s3=s3, check=False)
        zarr_group = zarr.open_group(mapper, mode='r')

        rivid_array = zarr_group['rivid'][:]
        if river_number in rivid_array:
            river_index = list(rivid_array).index(river_number)
            qout_array = zarr_group['Qout'][:, :, river_index]
            time_array = zarr_group['time'][:]
            ensemble_array = zarr_group['ensemble'][:]

            forecast_df = pd.DataFrame(qout_array, columns=time_array)
            forecast_df.index = [f"ensemble_{i}" for i in ensemble_array]
            forecast_df.columns = time_array
            forecast_df = forecast_df.transpose()

            return forecast_df
        else:
            return pd.DataFrame()
    except Exception as e:
        print(f"Error accessing data for RiverNumber {river_number} on {date}: {e}")
        return pd.DataFrame()

def list_s3_contents(bucket_url):
    s3 = s3fs.S3FileSystem(anon=True)

    try:
        contents = s3.ls(bucket_url)
        return contents
    except Exception as e:
        print(f"Error accessing S3 bucket: {e}")
        return []

# Define the river number and date for fetching data
river_number = 110123714  # Replace with the desired river number
date = '2024040100'    # Replace with the desired date

# Fetch the forecast data for the specified river number and date
forecast_data = get_forecast_data(river_number, date)

# Display the fetched forecast data
if not forecast_data.empty:
    print(f"Forecast data for RiverNumber {river_number} on {date}:")
    print(forecast_data.head())
else:
    print(f"No forecast data found for RiverNumber {river_number} on {date}.")




Forecast data for RiverNumber 110123714 on 2024040100:
         ensemble_1    ensemble_2    ensemble_3    ensemble_4    ensemble_5  \
0      1.857319e-09  1.857319e-09  1.857319e-09  1.857319e-09  1.857319e-09   
3600            NaN           NaN           NaN           NaN           NaN   
7200            NaN           NaN           NaN           NaN           NaN   
10800  8.586609e-10  8.586609e-10  8.586609e-10  8.586609e-10  8.586609e-10   
14400           NaN           NaN           NaN           NaN           NaN   

         ensemble_6    ensemble_7    ensemble_8    ensemble_9   ensemble_10  \
0      1.857319e-09  1.857319e-09  1.857319e-09  1.857319e-09  1.857319e-09   
3600            NaN           NaN           NaN           NaN           NaN   
7200            NaN           NaN           NaN           NaN           NaN   
10800  8.586609e-10  8.586609e-10  8.586609e-10  8.586609e-10  8.586609e-10   
14400           NaN           NaN           NaN           NaN           NaN

In [2]:
#Do the altair line plot of the fetched data and overlay with Return period line

import zarr
import s3fs
import pandas as pd
import altair as alt

def get_forecast_data(river_number, date):
    s3_bucket_url = f's3://geoglows-v2-forecasts/{date}.zarr/'

    s3 = s3fs.S3FileSystem(anon=True)

    try:
        mapper = s3fs.S3Map(root=s3_bucket_url, s3=s3, check=False)
        zarr_group = zarr.open_group(mapper, mode='r')

        rivid_array = zarr_group['rivid'][:]
        if river_number in rivid_array:
            river_index = list(rivid_array).index(river_number)
            qout_array = zarr_group['Qout'][:, :, river_index]
            time_array = zarr_group['time'][:]
            ensemble_array = zarr_group['ensemble'][:]

            forecast_df = pd.DataFrame(qout_array, columns=time_array)
            forecast_df.index = [f"ensemble_{i}" for i in ensemble_array]
            forecast_df.columns = time_array
            forecast_df = forecast_df.transpose()
            forecast_df.reset_index(inplace=True)
            forecast_df = forecast_df.melt(id_vars=["index"], var_name="ensemble", value_name="flow")
            forecast_df.rename(columns={"index": "date"}, inplace=True)

            return forecast_df
        else:
            return pd.DataFrame()
    except Exception as e:
        print(f"Error accessing data for RiverNumber {river_number} on {date}: {e}")
        return pd.DataFrame()

# Define the river number and date for fetching data
river_number = 110123714  # Replace with the desired river number
date = '2024040100'    # Replace with the desired date

# Fetch the forecast data for the specified river number and date
forecast_data = get_forecast_data(river_number, date)

# Define the return period data
return_periods = {
    "period": ["2-year", "5-year", "10-year"],
    "flow": [20, 25, 30]
}
return_period_df = pd.DataFrame(return_periods)

# Display the fetched forecast data
if not forecast_data.empty:
    print(f"Forecast data for RiverNumber {river_number} on {date}:")
    print(forecast_data.head())
else:
    print(f"No forecast data found for RiverNumber {river_number} on {date}.")

# Plot the data using Altair
base = alt.Chart(forecast_data).mark_line().encode(
    x='date:T',
    y='flow:Q',
    color='ensemble:N'
).properties(
    title=f'Forecast Data for RiverNumber {river_number} on {date}'
)

return_period_chart = alt.Chart(return_period_df).mark_rule(color='red').encode(
    y='flow:Q',
    size=alt.value(2),
    tooltip=['period:N', 'flow:Q']
).properties(
    title='Return Periods'
)

# Combine the forecast data chart with the return period chart
combined_chart = alt.layer(base, return_period_chart).resolve_scale(
    y='independent'
)

combined_chart.save('forecast_with_return_period.html')
print("Chart saved as 'forecast_with_return_period.html'")

Forecast data for RiverNumber 110123714 on 2024040100:
    date    ensemble          flow
0      0  ensemble_1  1.857319e-09
1   3600  ensemble_1           NaN
2   7200  ensemble_1           NaN
3  10800  ensemble_1  8.586609e-10
4  14400  ensemble_1           NaN
Chart saved as 'forecast_with_return_period.html'
