In [None]:
import pandas as pd
import numpy as np
import json
import math
import urllib.request
import dateutil.parser
import dateutil.rrule
import datetime
import pickle
import gc

In [None]:
# Used across all of the plots
dateToday = datetime.datetime.combine(datetime.date.today(), datetime.datetime.min.time())

# Levels would have been relatively normal up until Friday 13 March, so take a year before that date
baselineEnd = datetime.datetime.strptime('2020-03-13T23:59:59Z', '%Y-%m-%dT%H:%M:%SZ')
baselineStart = baselineEnd - pd.Timedelta(days=365) + pd.Timedelta(seconds=1)

print('Baseline data from %s to %s' % (baselineStart, baselineEnd))

resampleFrequency = 900

In [None]:
carParkRequestBase = 'https://api.newcastle.urbanobservatory.ac.uk/api/v2/sensors/entity'

# Fetch a list of all the car parks...
carParkTimeseriesIRIs = {}
carParkMetadata = {}
carParkRequestPage = 1
carParkResponse = None

while carParkResponse is None or len(carParkResponse) > 1:
    carParkResponse = json.loads(
        urllib.request.urlopen(
            '%s?metric="Occupied%%20spaces"&page=%u' % (carParkRequestBase, carParkRequestPage)
        ).read().decode('utf-8')
    )['items']

    carParkRequestPage = carParkRequestPage + 1

    for carPark in carParkResponse:
        for feed in carPark['feed']:
            for timeseries in feed['timeseries']:
                for link in timeseries['links']:
                    if (link['rel'] == 'archives.friendly' and \
                        'latest' in timeseries):
                        carParkTimeseriesIRIs[carPark['meta']['name']] = link['href']
                        carParkMetadata[carPark['meta']['name']] = {
                            'address': carPark['meta']['address'],
                            'postcode': carPark['meta']['address'].split(',')[-1].strip(),
                            'district': carPark['meta']['address'].split(',')[-1].strip().split(' ')[0],
                            'capacity': feed['meta']['totalSpaces'],
                            'latest': timeseries['latest']['value']
                        }

print('Discovered %u car parks with occupancy data.' % len(carParkTimeseriesIRIs))

carParkMetadata = pd.DataFrame.from_records(carParkMetadata).transpose()
carParkMetadata

In [None]:
daysPerRequest = 37
dfCarParks = None

print('Requesting car park occupancy in %u day chunks...' % daysPerRequest)

for carParkName in carParkTimeseriesIRIs.keys():
    carParkRequestTimeseries = carParkTimeseriesIRIs[carParkName]
    carParkTimeseries = []

    print('  %s' % carParkName)
    print('  [', end='')

    for windowStart in dateutil.rrule.rrule(
        dateutil.rrule.DAILY,
        interval=daysPerRequest,
        dtstart=baselineStart,
        until=baselineEnd
    ):
        windowEnd = windowStart + pd.Timedelta(days=daysPerRequest) - pd.Timedelta(seconds=1)

        if windowEnd > baselineEnd:
            windowEnd = baselineEnd
            
        windowResponse = json.loads(
            urllib.request.urlopen(
              '%s?startTime=%s&endTime=%s' % (carParkRequestTimeseries, windowStart.isoformat().replace('+00:00', 'Z'), windowEnd.isoformat().replace('+00:00', 'Z'))
            ).read().decode('utf-8')
        )['historic']['values']

        carParkTimeseries.extend(windowResponse)
        print('.', end='')

    print(']')

    # Duration isn't relevant to the car park data
    dfCP = pd.DataFrame.from_records(carParkTimeseries, exclude=['duration'])

    # Times in this API are in ISO8601
    dfCP['time'] = dfCP['time'].apply(lambda t: datetime.datetime.strptime(t, "%Y-%m-%dT%H:%M:%S.%fZ"))

    dfCP = dfCP.rename(columns={'value': carParkName})
    dfCP.set_index('time', inplace=True, drop=True)

    dfCP = dfCP.resample('%us' % resampleFrequency).median().fillna(method='ffill', limit=12)

    if dfCarParks is None:
        dfCarParks = dfCP
    else:
        dfCarParks = dfCarParks.join(dfCP, how='outer')

    gc.collect()

dfCarParks


In [None]:
dfCarParks.to_pickle('cache/baseline-car-park-occupancy-pd.pkl')