In [2]:
from urllib.request import urlopen
from urllib.parse import urlencode
import json
import pandas as pd

def get_all_stations(river):
    api_url = 'https://environment.data.gov.uk/flood-monitoring/id/stations'
    result = urlopen(api_url + '?' + urlencode({'riverName': river})).read().decode('utf-8')
    data = json.loads(result)
    return pd.DataFrame(data['items'])

stations = get_all_stations('River Wear')
stations

Unnamed: 0,@id,RLOIid,catchmentName,dateOpened,easting,label,lat,long,measures,northing,notation,riverName,stageScale,stationReference,status,town,wiskiID,gridReference
0,http://environment.data.gov.uk/flood-monitorin...,8196,Wear,1958-01-01,398400,Stanhope,54.745941,-2.026369,[{'@id': 'http://environment.data.gov.uk/flood...,539000,24003,River Wear,http://environment.data.gov.uk/flood-monitorin...,24003,http://environment.data.gov.uk/flood-monitorin...,Stanhope,24003,
1,http://environment.data.gov.uk/flood-monitorin...,8288,Wear,2005-10-18,427592,Durham New Elvet Bridge,54.776573,-1.57257,[{'@id': 'http://environment.data.gov.uk/flood...,542492,240120,River Wear,http://environment.data.gov.uk/flood-monitorin...,240120,http://environment.data.gov.uk/flood-monitorin...,Durham,240120,NZ275424
2,http://environment.data.gov.uk/flood-monitorin...,8219,Wear,1972-01-01,417300,Witton Park,54.67286,-1.733251,[{'@id': 'http://environment.data.gov.uk/flood...,530900,24008,River Wear,http://environment.data.gov.uk/flood-monitorin...,24008,http://environment.data.gov.uk/flood-monitorin...,Witton Park,24008,
3,http://environment.data.gov.uk/flood-monitorin...,8046,Wear,1976-01-01,428300,Chester-Le-Street,54.854785,-1.560716,[{'@id': 'http://environment.data.gov.uk/flood...,551200,24009,River Wear,http://environment.data.gov.uk/flood-monitorin...,24009,http://environment.data.gov.uk/flood-monitorin...,Chester-Le-Street,24009,
4,http://environment.data.gov.uk/flood-monitorin...,8201,Wear,1957-01-01,426500,Sunderland Bridge,54.734468,-1.589972,[{'@id': 'http://environment.data.gov.uk/flood...,537800,24001,River Wear,http://environment.data.gov.uk/flood-monitorin...,24001,http://environment.data.gov.uk/flood-monitorin...,Sunderland Bridge,24001,


In [8]:
from time import sleep, time
from datetime import date, timedelta
from tqdm import tqdm

def fetch_day(station_reference, day):
  api_url = 'https://environment.data.gov.uk/flood-monitoring/id/stations/{}/readings'.format(
      station_reference)

  query_params = {
      'date': day.strftime('%Y-%m-%d'),
  }

  try:
    result = urlopen(api_url + '?' + urlencode(query_params)
                     ).read().decode('utf-8')
    df = pd.DataFrame(json.loads(result)['items'])[['dateTime', 'value']]
    df['dateTime'] = pd.to_datetime(df['dateTime'])
    return df
  except KeyboardInterrupt:
    raise KeyboardInterrupt
  except:
    return pd.DataFrame()
  
def water_level_incremental_dataset_build(current_df=pd.DataFrame(), max_days=30):
  current_stations = []
  if 'station' in current_df.columns:
    current_stations = list(current_df['station'].unique())
    
  if 'dateTime' in current_df.columns:
    current_df['dateTime'] = pd.to_datetime(current_df['dateTime'])
    
  stations_data = get_all_stations('River Wear')
  all_data = [current_df]
  
  to_fetch = []
  print('Building list of datapoints to fetch')
  for reference, station_name in stations_data[['stationReference', 'label']].values:
    fetched_days = []
    if station_name in current_stations:
      fetched_days = current_df[current_df['station']
                                == station_name]['dateTime'].dt.date.unique()

    for i in range(max_days):
      day = date.today() - timedelta(days=i)
      if day not in fetched_days:
        to_fetch.append((station_name, reference, day))
        
  if to_fetch:
    print('Fetching')
    
    try:
      for station_name, reference, day in tqdm(to_fetch, ncols=80):
        df = fetch_day(reference, day)
        sleep(0.1)
        df['station'] = station_name
        all_data.append(df)
    except KeyboardInterrupt:
      pass
    all_data = pd.concat(all_data)
    all_data.drop_duplicates(inplace=True)
    return all_data
  else:
    print('Up to date')
    return current_df
  
def weather_observations_icremental_dataset_build(current_df = pd.DataFrame()):
  # Only the last 24 hours of data are 
  API_KEY = '9fedcc7c-c5fc-40c3-8495-0fac8d6a8327'
  api_url = 'http://datapoint.metoffice.gov.uk/public/data/'
  
  url = 'http://datapoint.metoffice.gov.uk/public/data/val/wxfcs/all/json/sitelist?key=9fedcc7c-c5fc-40c3-8495-0fac8d6a8327'
  data = urlopen(url).read().decode('utf-8')
  data = json.loads(data)
  df = pd.DataFrame(data['Locations']['Location'])
  return df
  
df = weather_observations_icremental_dataset_build()
  

In [14]:
df[df['name'].str.find('Durham') != -1]

Unnamed: 0,elevation,id,latitude,longitude,name,region,unitaryAuthArea,obsSource,nationalPark
209,14.0,353867,54.8505,-1.5629,Durham I.C.G.,ne,Durham,,
771,56.0,350525,54.7606,-1.5698,Durham Botanic Garden,ne,Durham,,
1374,70.0,351290,54.7751,-1.5833,Durham,ne,Durham,,
3022,102.0,99049,54.767,-1.583,Durham,ne,Durham,,
5521,101.0,354234,54.7113,-1.6922,Willington (Durham),ne,Durham,,
5682,96.0,350569,54.7481,-1.6192,Brandon (Durham),ne,Durham,,


In [None]:
water_level_df = pd.DataFrame()
try:
  water_level_df = pd.read_csv('data/river_levels.csv')
except:
  print('No existing data found')
  
water_level_df = water_level_incremental_dataset_build(water_level_df)
water_level_df.to_csv('data/river_levels.csv', index=False)

Building list of datapoints to fetch
Up to date


In [None]:
weather_df = pd.DataFrame()

try:
  weather_df = pd.read_csv('data/weather.csv')
except:
  print('No existing data found')
  


Unnamed: 0,station,dateTime,value
0,Stanhope,2022-11-16 00:00:00+00:00,0.913
1,Stanhope,2022-11-16 00:15:00+00:00,0.903
2,Stanhope,2022-11-16 00:30:00+00:00,0.895
3,Stanhope,2022-11-16 00:45:00+00:00,0.89
4,Stanhope,2022-11-16 01:00:00+00:00,0.876
