In [1]:
from datetime import timedelta

import polars as pl
from src.hydrology import FloodingApi, HydrologyApi, Parameter


In [2]:
hydro_api = HydrologyApi(cache_max_age=timedelta(seconds=10))

hydro_level_stations = (
    hydro_api.get_stations(Parameter.LEVEL, river='River Wear')
    .collect()
    .with_columns(pl.lit(Parameter.LEVEL).alias('parameter'))
)

hydro_rainfall_stations = (
    hydro_api.get_stations(Parameter.RAINFALL, position=(54.774, -1.558), radius=15)
    .filter(
        ~pl.col('label').is_in(
            # Stations with lots of missing data
            [
                'ESH Winning',
                'Stanley Hustledown',
                'Washington',
                'Evenwood Gate'
            ]
        )
    )
    .collect()
    .with_columns(pl.lit(Parameter.RAINFALL).alias('parameter'))
)

hydro_rainfall_stations


label,notation,lat,long,RLOIid,parameter
str,str,f64,f64,str,str
"""North Dalton""","""9fcbf8c6-b643-4f58-a2be-8beff3…",54.823801,-1.365798,"""""","""rainfall"""
"""Peterlee""","""935b389b-7ab8-46e6-9758-f8eb38…",54.761165,-1.3407,"""""","""rainfall"""
"""Harpington Hill Farm""","""bf61ce31-b20e-4593-85dc-a08313…",54.633882,-1.480513,"""""","""rainfall"""
"""Knitlsey Mill""","""524a8fa0-d70b-4a0a-b178-ca765e…",54.829708,-1.807996,"""""","""rainfall"""
"""Fulwell""","""513abf6b-b269-4400-8828-7e833f…",54.939078,-1.393689,"""""","""rainfall"""
"""Tunstall""","""051f1b2a-6aca-4402-8956-5474ad…",54.761793,-1.902523,"""""","""rainfall"""


In [3]:
flooding_api = FloodingApi()

flooding_level_stations = (
    (await flooding_api.get_stations(Parameter.LEVEL, river='River Wear'))
    .with_columns(pl.lit(Parameter.LEVEL).alias('parameter'))
)

flooding_rainfall_stations = (
    (await flooding_api.get_stations(Parameter.RAINFALL, position=(54.774, -1.558), radius=20))
    .with_columns(pl.lit(Parameter.RAINFALL).alias('parameter'))
)

flooding_rainfall_stations

label,notation,RLOIid,lat,long,parameter
str,str,str,f64,f64,str
"""Rainfall station""","""023839""","""""",54.829815,-1.807716,"""rainfall"""
"""Rainfall station""","""018777""","""""",54.860973,-1.670477,"""rainfall"""
"""Rainfall station""","""023164""","""""",54.610418,-1.740626,"""rainfall"""
"""Rainfall station""","""025394""","""""",54.894495,-1.489343,"""rainfall"""
"""Rainfall station""","""032822""","""""",54.633845,-1.480219,"""rainfall"""
"""Rainfall station""","""021028""","""""",54.938987,-1.393566,"""rainfall"""
"""Rainfall station""","""026090""","""""",54.760776,-1.340287,"""rainfall"""
"""Rainfall station""","""025878""","""""",54.823818,-1.36572,"""rainfall"""


## Joining rainfall stations

There isn't a trivial way to join the rainfall stations, so we find the closest match by using the latitude and longitude of the stations.

In [4]:
def lat_long_distance(lat1, long1, lat2, long2):
    from math import atan2, cos, radians, sin, sqrt

    r_earth = 6373.0

    lat1 = radians(lat1)
    long1 = radians(long1)
    lat2 = radians(lat2)
    long2 = radians(long2)

    dlat = lat2 - lat1
    dlong = long2 - long1

    a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlong / 2) ** 2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    distance = r_earth * c

    return distance


def get_closest(lat, long):
    distances = {
        notation: lat_long_distance(lat, long, lat2, long2)
        for (notation, lat2, long2) in flooding_rainfall_stations[
            ['notation', 'lat', 'long']
        ].iter_rows()
    }
    closest = min(distances, key=distances.get)
    return {'flooding_api_notation': closest, 'distance_km': distances[closest]}


rainfall_stations = (
    hydro_rainfall_stations.with_columns(
        pl.Series(
            [
                get_closest(*row)
                for row in hydro_rainfall_stations[['lat', 'long']].iter_rows()
            ]
        )
        .alias('closest_station')
        .to_frame()
        .unnest(columns=['closest_station'])
    )
    .filter(pl.col('distance_km') <= 0.1) # Position of stations in the flooding API is within a 100m grid for data protection reasons. Not that that helps much.
    .select(
        pl.col('label'),
        pl.col('notation').alias('hydrology_api_notation'),
        pl.col('parameter'),
        pl.col('flooding_api_notation'),
    )
)

rainfall_stations

label,hydrology_api_notation,parameter,flooding_api_notation
str,str,str,str
"""North Dalton""","""9fcbf8c6-b643-4f58-a2be-8beff3…","""rainfall""","""025878"""
"""Peterlee""","""935b389b-7ab8-46e6-9758-f8eb38…","""rainfall""","""026090"""
"""Harpington Hill Farm""","""bf61ce31-b20e-4593-85dc-a08313…","""rainfall""","""032822"""
"""Knitlsey Mill""","""524a8fa0-d70b-4a0a-b178-ca765e…","""rainfall""","""023839"""
"""Fulwell""","""513abf6b-b269-4400-8828-7e833f…","""rainfall""","""021028"""


## Joining Level Stations

We can simply join level stations by RLOI ID.

In [5]:
level_stations = (
  hydro_level_stations.join(flooding_level_stations, on='RLOIid', suffix='_flooding')
  .select(
    pl.col('label'),
    pl.col('notation').alias('hydrology_api_notation'),
    pl.col('parameter'),
    pl.col('notation_flooding').alias('flooding_api_notation')
  )
)

level_stations

label,hydrology_api_notation,parameter,flooding_api_notation
str,str,str,str
"""Stanhope""","""b29c481a-5012-40f5-bb0c-f9370b…","""level""","""024003"""
"""Durham New Elvet Bridge""","""ba3f8598-e654-430d-9bb8-e1652e…","""level""","""0240120"""
"""Witton Park""","""05784319-693a-4d75-b29e-32f01a…","""level""","""024008"""
"""Chester Le Street""","""e7d8bbb6-5bba-4057-9f49-a29948…","""level""","""024009"""
"""Sunderland Bridge""","""ddedb4d9-b2be-47c1-998d-acbc0f…","""level""","""024001"""


## Combining
We can then combine both dataframes to get all of the stations

In [6]:
stations = pl.concat([rainfall_stations, level_stations])

stations

label,hydrology_api_notation,parameter,flooding_api_notation
str,str,str,str
"""North Dalton""","""9fcbf8c6-b643-4f58-a2be-8beff3…","""rainfall""","""025878"""
"""Peterlee""","""935b389b-7ab8-46e6-9758-f8eb38…","""rainfall""","""026090"""
"""Harpington Hill Farm""","""bf61ce31-b20e-4593-85dc-a08313…","""rainfall""","""032822"""
"""Knitlsey Mill""","""524a8fa0-d70b-4a0a-b178-ca765e…","""rainfall""","""023839"""
"""Fulwell""","""513abf6b-b269-4400-8828-7e833f…","""rainfall""","""021028"""
"""Stanhope""","""b29c481a-5012-40f5-bb0c-f9370b…","""level""","""024003"""
"""Durham New Elvet Bridge""","""ba3f8598-e654-430d-9bb8-e1652e…","""level""","""0240120"""
"""Witton Park""","""05784319-693a-4d75-b29e-32f01a…","""level""","""024008"""
"""Chester Le Street""","""e7d8bbb6-5bba-4057-9f49-a29948…","""level""","""024009"""
"""Sunderland Bridge""","""ddedb4d9-b2be-47c1-998d-acbc0f…","""level""","""024001"""


## Save results

In [7]:
stations.write_json('stations.json')