In [None]:
import pandas as pd
import numpy as np
import json
import math
import urllib.request
import re
import gc
import dateutil.parser
import dateutil.rrule
import dateutil.tz
import datetime

In [None]:
# Used across most of the plots for people flows
tzUTC = dateutil.tz.gettz('UTC')
tzLocal = dateutil.tz.gettz('Europe/London')
dateToday = datetime.datetime.combine(datetime.date.today(), datetime.datetime.min.time()).replace(tzinfo=tzLocal)

#plottableTypes = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']

#govChartStart = datetime.datetime.strptime('2020-03-01T00:00:00Z', '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=tzLocal)
#dateBaselineEnd = datetime.datetime.strptime('2020-03-15T23:59:59Z', '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=tzLocal)

trafficCountInterval = 900

In [None]:
scootDescriptionUrl = 'https://opendata.hullcc.gov.uk/dataset/30fd3969-556d-4eae-ae4c-f3f9d2cfa9e3/resource/4ff61436-cd61-4716-bdbc-79711f738a6c/download/scoot_data.geojson'

scootMetadataJson = json.loads(
    urllib.request.urlopen(scootDescriptionUrl).read().decode('utf-8')
)['features']

scootMetadata = pd.DataFrame.from_records(pd.json_normalize(scootMetadataJson), index=['properties.name'])
scootMetadata

In [None]:
dfPointTs = None

for loopId in scootMetadata.index:
    print('Obtaining timeseries for loop "%s"...' % loopId)
    loopMeta = scootMetadata[scootMetadata.index == loopId].to_dict(orient='records')[0]
    loopResourceId = loopMeta['properties.resource_id']
    
    pdLoopTs = pd.read_csv(urllib.request.urlopen('https://opendata.hullcc.gov.uk/datastore/dump/%s?format=csv' % loopResourceId))
    pdLoopTs['MeasurementTime'] = pdLoopTs['MeasurementTime'].apply(lambda t: datetime.datetime.strptime(t, "%Y-%m-%dT%H:%M:%S.%fZ").replace(tzinfo=tzUTC).astimezone(tzLocal))
    pdLoopTs.set_index('MeasurementTime', inplace=True, drop=True)
    
    if (pdLoopTs['VehicleFlow'].sum() == 0.0):
        print('  No vehicle flow data available.')
        continue
    
    # Change to per-minute vehicle flows
    pdLoopTs = (pdLoopTs['VehicleFlow'] / 5.0).to_frame().rename(columns={'VehicleFlow': loopId})
    
    if dfPointTs is None:
        dfPointTs = pdLoopTs
    else:
        dfPointTs = dfPointTs.join(
            pdLoopTs, 
            how='outer'
        )
        pdLoopTs = None
    
    gc.collect()

In [None]:
# Incoming data is actually 5 minutes, so align to that
# then accept we have lots of gaps and make it 15 minute data
# and then interpolate to fill gaps if possible, maximum of an hour distance
dfPointInterpTs = dfPointTs \
    .resample('300s').median() \
    .resample('900s').median() \
    .interpolate('linear', limit=2) \
    .apply(lambda v: v * 15)

# 15 minute timeseries
#dfPointInterpTs.tail(50)

dfPointInterpTs.to_pickle('../cache/hull-recent-traffic-volumes-pd.pkl')