In [1]:
# Imports
import os
import urllib.request
import urllib.error
import json
import pandas as pd
from datetime import datetime
from pandas import json_normalize
from shapely.geometry import Point, Polygon
from fastkml import kml
import xml.etree.ElementTree as ET


In [5]:
# -*- coding: utf-8 -*-
"""
NFRA API - Python examples of use

See https://nrfaapps.ceh.ac.uk/nrfa/nrfa-api.html
for a full description of the API and its capabilities.

This script is for Python 3
For Python 2, use urllib2 instead of urllib.request
i.e.
import urllib2
response = urllib2.urlopen(url).read()

"""
# The base URL to access the NFRA API
base_url = "https://nrfaapps.ceh.ac.uk/nrfa/ws"

# There are three web services available...
# -----------------------------------------------------------------------------
# 1. station-ids
# Returns a list of station identifiers
# -----------------------------------------------------------------------------
# Build request URL
# The response format must be specified
# Here we ask for a JSON object.
# See https://nrfaapps.ceh.ac.uk/nrfa/nrfa-api.html#parameter-format
# for all available formats
query = "format=json-object"
station_ids_url = "{BASE}/station-ids?{QUERY}".format(BASE=base_url,
                                                      QUERY=query)

# Send request and read response
response = urllib.request.urlopen(station_ids_url).read()

# Decode from JSON to Python dictionary
response = json.loads(response)

# See the list of station IDs
station_ids = response['station-ids']
print("1. List of station IDs (first 10):")
print(station_ids[:10])
print()

# Display the number of stations and a snippet of the data
print(f"Total number of stations: {len(station_ids)}")

# Save to a local file
with open('station_ids.json', 'w') as file:
    json.dump({'station-ids': station_ids}, file)

print("Station IDs saved to 'station_ids.json'\n")

1. List of station IDs (first 10):
[1001, 2001, 2002, 3001, 3002, 3003, 3004, 3005, 3006, 4001]

Total number of stations: 1600
Station IDs saved to 'station_ids.json'


### -----------------------------------------------------------------------------
# 2. station-info
### Return metadata for given station(s)
#### -----------------------------------------------------------------------------
#### Build request URL
#### The station ID(s) and response format must be specified.
#### Here we specify two station IDs and request JSON again.

In [8]:
query = "station=9001,13004&format=json-object"
stations_info_url = "{BASE}/station-info?{QUERY}".format(BASE=base_url,
                                                         QUERY=query)

# Send request and read response
response = urllib.request.urlopen(stations_info_url).read()

# Decode from JSON to Python dictionary
response = json.loads(response)

# See info from each station
stations_info = response['data']


# Display the number of stations and their metadata
print(f"Total number of stations in metadata request: {len(stations_info)}")
for station_info in stations_info:
    print(station_info)

# Save to a local file
with open('stations_metadata.json', 'w') as file:
    json.dump({'data': stations_info}, file)

print("Station metadata saved to 'stations_metadata.json'")
# Note, there is an optional query parameter called 'fields'. This allows us to
# specify what data is returned for the stations. Default is 'id' and 'name',
# but many options are available...
# See https://nrfaapps.ceh.ac.uk/nrfa/nrfa-api.html#parameter-fields

Total number of stations in metadata request: 2
{'id': 9001, 'name': 'Deveron at Avochie'}
{'id': 13004, 'name': 'Prosen Water at Prosen Bridge'}
Station metadata saved to 'stations_metadata.json'


In [11]:


# -----------------------------------------------------------------------------
# 3. time-series
# Return time series data for a single station
# -----------------------------------------------------------------------------
# Build request URL
# The station ID, data type and response format must be specified.
# Here we ask for gauged daily flows (gdf) in JSON format.
# See https://nrfaapps.ceh.ac.uk/nrfa/nrfa-api.html#parameter-data-type
# for all available data types, and
# https://nrfaapps.ceh.ac.uk/nrfa/nrfa-api.html#time-series-formats
# for all available time series response formats (note, although we use JSON
# again, the other formats available differ from those for previous requests)
query = "station=9001&data-type=gdf&format=json-object"
stations_info_url = "{BASE}/time-series?{QUERY}".format(BASE=base_url,
                                                        QUERY=query)

# Send request and read response
response = urllib.request.urlopen(stations_info_url).read()

# Decode from JSON to Python dictionary
response = json.loads(response)

# See data from response
print("3. Details of time series:")
print("Time of request: %s" % response["timestamp"])
print("Station: %s" % response['station']['name'])
print("Data type: %s" % response['data-type']['name'])
print("Data time series (first 10):")
print(response['data-stream'][:10])

3. Details of time series:
Time of request: 2024-01-17T17:09:17
Station: Deveron at Avochie
Data type: Gauged Daily Flow
Data time series (first 10):
['1959-10-01', 1.667, '1959-10-02', 1.667, '1959-10-03', 1.891, '1959-10-04', 1.512, '1959-10-05', 1.614]


In [ ]:
# Station Information
# -------------------
# 'id': 'Station Identifier'
# 'name': 'Station Name'
# 'catchmentArea': 'Catchment Area (in km²)'
# 'gridReference': 'Grid Reference'
#   'ngr': 'Grid Reference in String Form'
#   'easting': 'Grid Reference Easting (in metres)'
#   'northing': 'Grid Reference Northing (in metres)'
# 'latLong': 'Latitude and Longitude'
#   'string': 'Textual Representation of Latitude/Longitude'
#   'latitude': 'Latitude (in decimal degrees)'
#   'longitude': 'Longitude (in decimal degrees)'
# 'river': 'Name of the River'
# 'location': 'Name of the Location on the River'
# 'stationLevel': 'Altitude of the Station Above Ordnance Datum'
# 'stationInformation': 'Basic Station Information'
# 'catchmentInformation': 'Basic Catchment Information'
# 'gdfStatistics': 'Gauged Daily Flow Statistics'
# 'peakFlowStatistics': 'Basic Peak-Flow Statistics'
# 'elevation': 'Catchment Elevation Percentile Data'
# 'catchmentRainfall': 'Catchment Rainfall Standard Period Data'
# 'landCover': 'Land Cover Map Data'
#   'lcm2000': 'Land Cover Map Data (2000)'
#   'lcm2007': 'Land Cover Map Data (2007)'
# 'geology': 'Catchment Geology Data'
# 'fehDescriptors': 'FEH Catchment Descriptors'
# 'urbanExtent': 'Urban Extent Data'
# 'spatialLocation': 'Spatial Location Data'
# 'peakFlowMetadata': 'Metadata Related to Peak-Flow Data'
# 'dataSummary': 'Summary of Available Time-Series Data-Types'

# Time Series Data
# ----------------
# 'timestamp': 'Date and Time of Output Creation'
# 'interval': 'Date/Time Range and Period of Returned Data'
# 'station': 'Station Metadata'
#   'id': 'Station Identifier'
#   'name': 'Station Name'
# 'dataType': 'Data Type Information'
#   'id': 'Data-Type Identifier'
#

In [5]:
def download_all_station_data_csv(base_url, station_id, data_folder, data_type='gdf'):
    # Build the URL for the time-series data request for all dates
    time_series_url = f"{base_url}/time-series?station={station_id}&data-type={data_type}&format=json-object"

    # Create the data folder if it does not exist
    if not os.path.exists(data_folder):
        os.makedirs(data_folder)

    try:
        # Fetch the data
        response = urllib.request.urlopen(time_series_url)
        data = json.loads(response.read())

        # Check if 'data-stream' in response and it contains data
        if 'data-stream' in data:
            # Determine if the 'data-stream' is a list of values or a list of [date, value] pairs
            first_entry = next(iter(data['data-stream']), None)
            if first_entry and isinstance(first_entry, list) and len(first_entry) == 2:
                # It's a list of [date, value] pairs
                df = pd.DataFrame(data['data-stream'], columns=['Date', 'Value'])
            else:
                # It's a single list of values; create a DataFrame with a single column
                df = pd.DataFrame(data['data-stream'], columns=['Value'])
                # If there's no 'Date' in the data, we might need to generate it or handle it differently

            # Add additional station and data type information if it's a list of [date, value] pairs
            if 'Date' in df.columns:
                df['Station ID'] = data['station']['id']
                df['Station Name'] = data['station']['name']
                df['Data Type ID'] = data['data-type']['id']
                df['Data Type Name'] = data['data-type']['name']

            # Save the DataFrame to a CSV file
            csv_file_path = os.path.join(data_folder, f"{station_id}_{data_type}_all_data.csv")
            df.to_csv(csv_file_path, index=False)
            print(f"Data for station {station_id} has been saved to {csv_file_path}")
        else:
            print(f"No data available for station {station_id}.")

    except urllib.error.HTTPError as e:
        print(f"HTTP Error for station {station_id}: {e.code}")
    except json.JSONDecodeError as e:
        print(f"JSON Decode Error for station {station_id}: {e.msg}")

# Usage example:
base_url = "https://nrfaapps.ceh.ac.uk/nrfa/ws"
station_id = "1001"  # Replace with the desired station ID
data_folder = "station_data_csv"  # Replace with your data folder path

# Call the function to download all available data for the specified station
download_all_station_data_csv(base_url, station_id, data_folder)

Data for station 1001 has been saved to station_data_csv\1001_gdf_all_data.csv


In [11]:
def fetch_station_ids(base_url):
    url = f"{base_url}/station-ids?format=json-object"
    response = urllib.request.urlopen(url)
    data = json.loads(response.read())
    station_ids = data.get('station-ids', [])
    print("Fetched Station IDs:", station_ids)  # Debugging line
    return station_ids


def download_station_metadata(base_url, data_folder, fields='id,name,lat-long'):
    """
    Downloads station metadata for all stations and saves it as JSON.

    :param base_url: The base URL of the NRFA API.
    :param data_folder: Folder to save the metadata JSON.
    :param fields: Fields to include in the metadata.
    """
    # Create the data folder if it does not exist
    if not os.path.exists(data_folder):
        os.makedirs(data_folder)

    station_ids = fetch_station_ids(base_url)

    all_station_info = []
    for station_id in station_ids:
        metadata_url = f"{base_url}/station-info?station={station_id}&format=json-object&fields={fields}"

        try:
            # Fetch the metadata for each station
            response = urllib.request.urlopen(metadata_url)
            data = json.loads(response.read())
            all_station_info.extend(data.get('data', []))

        except urllib.error.HTTPError as e:
            error_message = e.read().decode()
            print(f"HTTP Error for station {station_id}: {e.code}")
            print(f"Error message: {error_message}")

    # Save the combined metadata to a JSON file
    json_file_path = os.path.join(data_folder, "all_stations_metadata.json")
    with open(json_file_path, 'w') as file:
        json.dump(all_station_info, file, indent=4)
        print(f"Station metadata has been saved to {json_file_path}")

# Usage example
base_url = "https://nrfaapps.ceh.ac.uk/nrfa/ws"
data_folder = "station_metadata"

download_station_metadata(base_url, data_folder)

Fetched Station IDs: [1001, 2001, 2002, 3001, 3002, 3003, 3004, 3005, 3006, 4001, 4003, 4004, 4005, 4006, 4007, 4008, 4009, 4010, 4011, 5001, 5002, 5003, 5004, 6001, 6003, 6006, 6007, 6008, 6009, 6011, 6012, 6013, 7001, 7002, 7003, 7004, 7005, 7006, 7007, 7008, 7009, 7010, 7011, 7012, 8001, 8002, 8003, 8004, 8005, 8006, 8007, 8008, 8009, 8010, 8011, 8013, 8015, 8016, 8017, 8018, 8021, 9001, 9002, 9003, 9004, 9005, 9006, 9007, 9008, 9009, 9010, 10001, 10002, 10003, 11001, 11002, 11003, 11004, 11005, 11006, 12001, 12002, 12003, 12004, 12005, 12006, 12007, 12008, 12009, 13001, 13002, 13003, 13004, 13005, 13007, 13008, 13009, 13010, 13012, 13017, 14001, 14002, 14005, 14006, 14007, 14009, 14010, 15001, 15002, 15003, 15004, 15005, 15006, 15007, 15008, 15010, 15011, 15012, 15013, 15014, 15015, 15016, 15017, 15018, 15021, 15023, 15024, 15025, 15027, 15028, 15029, 15030, 15032, 15034, 15035, 15038, 15039, 15041, 15046, 16001, 16002, 16003, 16004, 16007, 17001, 17002, 17003, 17004, 17005, 17008,

In [2]:

def load_stations_from_json(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
        flattened_data = []
        for station in data:
            flattened_station = {
                'id': station['id'],
                'name': station['name'],
                'latitude': station['lat-long']['latitude'],
                'longitude': station['lat-long']['longitude'],
            }       
            flattened_data.append(flattened_station)
        return flattened_data

In [4]:


def parse_kml_flood_data(kml_file_path):
    with open(kml_file_path, 'r') as file:
        doc = file.read()
    k = kml.KML()
    k.from_string(doc.encode('utf-8'))

    flood_events = []
    for feature in k.features():
        for placemark in feature.features():
            extended_data = placemark.extended_data
            if extended_data:
                schema_data = extended_data.schema_data[0]
                simple_data = {data.name: data.value for data in schema_data.simple_data}
                start_date = simple_data.get("start_date")
                end_date = simple_data.get("end_date")
                # Assuming the coordinates are in the right format to create a Polygon
                flood_polygon = Polygon([(float(coord.split(',')[0]), float(coord.split(',')[1]))
                                         for coord in placemark.geometry.exterior.coords])

                flood_event = {
                    'id': placemark.id,
                    'name': placemark.name,
                    'start_date': start_date,
                    'end_date': end_date,
                    'polygon': flood_polygon
                }
                flood_events.append(flood_event)
    return flood_events



def match_station_with_floods(stations, flood_events):
    station_flood_mapping = []
    
    for station in stations:
        station_point = Point(station['longitude'], station['latitude'])
        for flood_event in flood_events:
            if station_point.within(flood_event['polygon']):
                station_flood_mapping.append({
                    'station_id': station['id'],
                    'station_name': station['name'],
                    'flood_id': flood_event['id'],
                    'flood_name': flood_event['name'],
                    'start_date': flood_event['start_date'],
                    'end_date': flood_event['end_date'],
                })
    return station_flood_mapping


def basic_parse_kml(kml_file_path):
    with open(kml_file_path, 'r') as file:
        doc = file.read()
    k = kml.KML()
    k.from_string(doc.encode('utf-8'))

    for feature in k.features():
        print("Feature Type:", type(feature), "Feature Name:", feature.name)
        for placemark in feature.features():
            print("Placemark Type:", type(placemark), "Placemark ID:", placemark.id, "Placemark Name:", placemark.name)
            if placemark.extended_data:
                for schema_data in placemark.extended_data.schema_data:
                    for simple_data in schema_data.simple_data:
                        print("Extended Data:", simple_data.name, simple_data.value)
            if hasattr(placemark, 'geometry'):
                print("Geometry Type:", type(placemark.geometry))

# flood_events = basic_parse_kml('Recorded_Flood_Outlines.kml')



stations = load_stations_from_json('station_metadata/all_stations_metadata.json')

# flood_events = parse_kml_flood_data('Recorded_Flood_Outlines.kml')

print(stations[:10])


[{'id': 1001, 'name': 'Wick at Tarroul', 'latitude': 58.4762, 'longitude': -3.26706}, {'id': 2001, 'name': 'Helmsdale at Kilphedir', 'latitude': 58.141, 'longitude': -3.70295}, {'id': 2002, 'name': 'Brora at Bruachrobie', 'latitude': 58.01056, 'longitude': -3.87757}, {'id': 3001, 'name': 'Shin at Lairg', 'latitude': 58.02222, 'longitude': -4.40405}, {'id': 3002, 'name': 'Carron at Sgodachail', 'latitude': 57.89321, 'longitude': -4.54668}, {'id': 3003, 'name': 'Oykel at Easter Turnaig', 'latitude': 57.96185, 'longitude': -4.70086}, {'id': 3004, 'name': 'Cassley at Rosehall', 'latitude': 57.9821, 'longitude': -4.58615}, {'id': 3005, 'name': 'Shin at Inveran', 'latitude': 57.94319, 'longitude': -4.41038}, {'id': 3006, 'name': 'Loch Ailsh at Loch Ailsh', 'latitude': 58.04828, 'longitude': -4.85531}, {'id': 4001, 'name': 'Conon at Moy Bridge', 'latitude': 57.55658, 'longitude': -4.53997}]
