In [None]:
%pip install -q cloud-tpu-client https://storage.googleapis.com/tpu-pytorch/wheels/torch_xla-1.13-cp38-cp38m-linux_x86_64.whl
%pip install -q hydrology polars neuralforecast wandb

In [28]:
from hydrology import HydrologyApi, Measure
from datetime import datetime
import polars as pl

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Load Dataset

In [29]:


api = HydrologyApi()

level_stations = api.get_stations(Measure.MeasureType.LEVEL, river="River Wear")
rainfall_stations = api.get_stations(
    Measure.MeasureType.RAINFALL, 
    position=(54.774, -1.558), radius=15
).filter(
    ~pl.col("station_name").is_in(
        # Stations with lots of missing data
        [
            "ESH Winning",
            "Stanley Hustledown",
            "Washington",
        ]
    )
)

measures = [
    Measure(station_id, Measure.MeasureType.LEVEL)
    for station_id in level_stations["station_id"]
] + [
    Measure(station_id, Measure.MeasureType.RAINFALL)
    for station_id in rainfall_stations["station_id"]
]

stations = pl.concat(
    [
        level_stations,
        rainfall_stations,
    ],
).unique()

df = api.get_measures(measures, stations, start_date=datetime(2007, 1, 1))

df.head()

measures = ['waterLevel']
params = ({'observedProperty': ['waterLevel'], 'riverName': 'River Wear', 'status.label': 'Active'},)
measures = ['rainfall']
params = ({'observedProperty': ['rainfall'], 'lat': 54.774, 'long': -1.558, 'dist': 15, 'status.label': 'Active'},)


timestamp,Durham New Elvet Bridge level-i-900-m,North Dalton rainfall-t-900-mm,Sunderland Bridge level-i-900-m,Chester Le Street level-i-900-m,Knitlsey Mill rainfall-t-900-mm,Witton Park level-i-900-m,Peterlee rainfall-t-900-mm,Evenwood Gate rainfall-t-900-mm,Fulwell rainfall-t-900-mm,Stanhope level-i-900-m,Tunstall rainfall-t-900-mm,Harpington Hill Farm rainfall-t-900-mm
datetime[μs],f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32
2007-01-01 00:00:00,0.726,0.0,0.851,0.824,0.0,1.009,0.0,0.0,0.0,0.859,0.0,0.0
2007-01-01 00:15:00,0.73,0.0,0.863,0.821,0.0,0.997,0.0,0.0,0.0,0.855,0.0,0.0
2007-01-01 00:30:00,0.74,0.0,0.876,0.823,0.0,1.0,0.0,0.0,0.0,0.845,0.0,0.0
2007-01-01 00:45:00,0.744,0.0,0.886,0.819,0.0,1.001,0.0,0.0,0.0,0.826,0.0,0.0
2007-01-01 01:00:00,0.763,0.0,0.894,0.823,0.0,0.993,0.0,0.0,0.0,0.825,0.0,0.0


## Forecasting

In [30]:
train_df = (
    df.rename({"timestamp": "ds", "Durham New Elvet Bridge level-i-900-m": "y"})
    .with_columns(pl.lit("River Wear").alias("unique_id"))
    .to_pandas()
    .reset_index()
    .rename_axis(None, axis=1)
)

In [32]:
from neuralforecast.core import NeuralForecast
from neuralforecast.auto import NHITS
from neuralforecast.losses.pytorch import MQLoss
import wandb
from pytorch_lightning.loggers import WandbLogger

wandb.login()
wandb.init(project="river-level-forecasting")


def make_loss():
    return MQLoss(quantiles=[0.9])


models = [
    NHITS(
        h=4 * 24,  # 1 day
        input_size=1 * 4 * 24,  # 1 day
        hist_exog_list=train_df.columns.drop(["ds", "unique_id", "y"]).to_list(),
        scaler_type="minmax",
        loss=make_loss(),
        max_steps=1000,
        logger=WandbLogger(),
    )
]

nf = NeuralForecast(
    models=models,
    freq="15min",
)

nf.fit(df=train_df)



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011277777779226502, max=1.0…

Seed set to 1
d:\code\river-level-analysis\.venv\lib\site-packages\pytorch_lightning\loggers\wandb.py:396: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
d:\code\river-level-analysis\.venv\lib\site-packages\pytorch_lightning\loggers\wandb.py:396: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type          | Params
-----------------------------------------------
0 | loss         | MQLoss        | 1     
1 | padder_train | ConstantPad1d | 0     
2 | scaler       | TemporalNorm  | 0     
3 | blocks 

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

OutOfMemoryError: CUDA out of memory. Tried to allocate 6.15 GiB. GPU 