In [5]:
import pandas as pd
import numpy as np
import json
import math
import urllib.request
import dateutil.parser
import dateutil.rrule
import datetime

In [6]:
# Used across all of the plots
dateToday = datetime.datetime.combine(datetime.date.today(), datetime.datetime.min.time())
baselineEnd = datetime.datetime.strptime('2020-03-13T23:59:59Z', '%Y-%m-%dT%H:%M:%SZ')

resampleFrequency = 900

# Car park occupancy across Tyne and Wear

The data represents the car parks with ANPR at the entry and exit, with data collected through Tyne and Wear UTMC. This is not all car parks.


In [7]:
print('Last updated %s' % (datetime.datetime.now().strftime('%d %B %Y %H:%M')))

carParkRequestBase = 'https://api.newcastle.urbanobservatory.ac.uk/api/v2/sensors/entity'

# Fetch a list of all the car parks...
carParkTimeseriesIRIs = {}
carParkMetadata = {}
carParkRequestPage = 1
carParkResponse = None

while carParkResponse is None or len(carParkResponse) > 1:
    carParkResponse = json.loads(
        urllib.request.urlopen(
            '%s?metric="Occupied%%20spaces"&page=%u' % (carParkRequestBase, carParkRequestPage)
        ).read().decode('utf-8')
    )['items']

    carParkRequestPage = carParkRequestPage + 1

    for carPark in carParkResponse:
        for feed in carPark['feed']:
            for timeseries in feed['timeseries']:
                for link in timeseries['links']:
                    if (link['rel'] == 'archives.friendly' and \
                        'latest' in timeseries):
                        carParkTimeseriesIRIs[carPark['meta']['name']] = link['href']
                        carParkMetadata[carPark['meta']['name']] = {
                            'address': carPark['meta']['address'],
                            'postcode': carPark['meta']['address'].split(',')[-1].strip(),
                            'district': carPark['meta']['address'].split(',')[-1].strip().split(' ')[0],
                            'capacity': feed['meta']['totalSpaces'],
                            'latest': timeseries['latest']['value']
                        }

print('Discovered %u car parks with occupancy data.' % len(carParkTimeseriesIRIs))

carParkMetadata = pd.DataFrame.from_records(carParkMetadata).transpose()
carParkMetadata

Last updated 19 March 2020 09:57
Discovered 28 car parks with occupancy data.


Unnamed: 0,address,postcode,district,capacity,latest
BALTIC,"BALTIC, South Shore Road, Gateshead, NE8 3BA",NE8 3BA,NE8,71,1
Bank Foot Metro,"Bank Foot Metro Station, Station Road, Kenton ...",NE13 8AG,NE13,62,13
Callerton Parkway Metro,"Callerton Parkway Metro Station, Woolsington, ...",NE13 8DF,NE13,192,1
Charles Street,"Charles Street, Gateshead, NE8 1EW",NE8 1EW,NE8,27,4
Church Street,"Church Street, Gateshead, NE8 2AT",NE8 2AT,NE8,43,9
Claremont Road,"Claremont Road, Newcastle upon Tyne, NE2 4AA",NE2 4AA,NE2,225,31
Dean Street,"Dean Street, Newcastle upon Tyne, NE1 1PG",NE1 1PG,NE1,257,119
East Boldon Metro,"East Boldon Metro Station, Station Road, East ...",NE36 0AB,NE36,75,51
Eldon Garden,"Eldon Garden, Percy Street, Newcastle upon Tyn...",NE1 7RT,NE1,449,155
Eldon Square,"Eldon Square, Percy Street, Newcastle upon Tyn...",NE1 7RT,NE1,497,120


In [8]:
daysPerRequest = 5

print('Requesting car park occupancy in %u day chunks...' % daysPerRequest)

dfCarParksBaseline = pd.read_pickle('cache/baseline-car-park-occupancy-pd.pkl')

Requesting car park occupancy in 5 day chunks...


In [9]:
dfCarParks = None

for carParkName in carParkTimeseriesIRIs.keys():
    carParkRequestTimeseries = carParkTimeseriesIRIs[carParkName]
    carParkTimeseries = []

    print('  %s' % carParkName)
    print('  [', end='')

    for windowStart in dateutil.rrule.rrule(
        dateutil.rrule.DAILY,
        interval=daysPerRequest,
        dtstart=baselineEnd + pd.Timedelta(seconds=1),
        until=dateToday + pd.Timedelta(hours=24)
    ):
        windowEnd = windowStart + pd.Timedelta(days=daysPerRequest) - pd.Timedelta(seconds=1)

        if windowEnd > dateToday + pd.Timedelta(hours=24):
            windowEnd = dateToday + pd.Timedelta(hours=24)
            
        windowResponse = json.loads(
            urllib.request.urlopen(
              '%s?startTime=%s&endTime=%s' % (carParkRequestTimeseries, windowStart.isoformat().replace('+00:00', 'Z'), windowEnd.isoformat().replace('+00:00', 'Z'))
            ).read().decode('utf-8')
        )['historic']['values']

        carParkTimeseries.extend(windowResponse)
        print('.', end='')

    print(']')

    # Duration isn't relevant to the car park data
    dfCP = pd.DataFrame.from_records(carParkTimeseries, exclude=['duration'])

    # Times in this API are in ISO8601
    dfCP['time'] = dfCP['time'].apply(lambda t: datetime.datetime.strptime(t, "%Y-%m-%dT%H:%M:%S.%fZ"))

    dfCP = dfCP.rename(columns={'value': carParkName})
    dfCP.set_index('time', inplace=True, drop=True)

    dfCP = dfCP.resample('%us' % resampleFrequency).median().fillna(method='ffill', limit=12)
    
    if dfCarParks is None:
        dfCarParks = dfCP
    else:
        dfCarParks = dfCarParks.join(dfCP, how='outer')

  BALTIC
  [..]
  Bank Foot Metro
  [..]
  Callerton Parkway Metro
  [..]
  Charles Street
  [..]
  Church Street
  [..]
  Claremont Road
  [..]
  Dean Street
  [..]
  East Boldon Metro
  [..]
  Eldon Garden
  [..]
  Eldon Square
  [..]
  Ellison Place
  [..]
  Fellgate Metro
  [..]
  Four Lane Ends Interchange
  [..]
  Gateshead Civic Centre
  [..]
  Gateshead College
  [..]
  Grainger Town
  [..]
  Heworth Interchange (Long Stay)
  [..]
  Heworth Interchange (Short Stay)
  [..]
  Kingston Park Metro
  [..]
  Manors
  [..]
  Mill Road
  [..]
  Northumberland Park Metro
  [..]
  Old Town Hall
  [..]
  Quarryfield Road
  [..]
  Regent Centre Interchange
  [..]
  Sage Gateshead
  [..]
  Stadium of Light Metro
  [..]
  Swinburne Street
  [..]


In [10]:
dfCarParks = pd.concat([dfCarParksBaseline, dfCarParks]).sort_index()
dfCarParks

Unnamed: 0_level_0,BALTIC,Bank Foot Metro,Callerton Parkway Metro,Charles Street,Church Street,Claremont Road,Dean Street,East Boldon Metro,Eldon Garden,Eldon Square,...,Kingston Park Metro,Manors,Mill Road,Northumberland Park Metro,Old Town Hall,Quarryfield Road,Regent Centre Interchange,Sage Gateshead,Stadium of Light Metro,Swinburne Street
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-03-15 00:00:00,0.0,0.0,0.0,0.0,0.0,,,21.0,,,...,0.0,,0.0,22.0,0.0,,2.0,0.0,2.0,0.0
2019-03-15 00:15:00,0.0,0.0,0.0,0.0,0.0,,,21.0,,,...,0.0,,0.0,22.0,0.0,,2.0,0.0,2.0,0.0
2019-03-15 00:30:00,0.0,0.0,0.0,0.0,0.0,,,21.0,,,...,0.5,,0.0,22.0,0.0,,2.0,0.0,2.0,0.0
2019-03-15 00:45:00,0.0,0.0,1.0,0.0,0.0,,,21.0,,,...,0.0,,0.0,22.0,0.0,,2.0,0.0,2.0,0.0
2019-03-15 01:00:00,0.0,0.0,1.0,0.0,0.0,,,21.0,,,...,0.0,,0.0,22.0,0.0,,2.0,0.0,2.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-03-19 08:45:00,9.0,13.5,5.0,1.5,7.0,24.0,78.0,48.0,102.0,58.0,...,6.0,129.0,103.5,48.0,7.0,1.0,89.0,52.5,36.0,5.0
2020-03-19 09:00:00,9.0,13.0,4.0,1.5,7.0,30.0,90.0,49.0,115.0,71.5,...,7.0,136.0,104.0,50.0,8.0,0.0,86.0,60.0,36.0,5.0
2020-03-19 09:15:00,9.0,13.0,4.0,0.0,7.0,30.0,100.0,50.5,125.0,89.0,...,6.0,143.0,104.0,52.0,10.0,0.5,85.0,62.5,38.0,5.0
2020-03-19 09:30:00,0.0,14.0,3.0,0.0,7.0,31.0,110.0,50.0,143.0,105.0,...,6.0,155.0,103.0,51.0,10.0,1.0,83.0,68.0,39.0,6.0


In [11]:
dfCarParks.to_pickle('cache/recent-car-park-occupancy-pd.pkl')