In [None]:
import matplotlib
import pandas as pd
import numpy as np
import json
import math
import urllib.request
import dateutil.parser
import dateutil.rrule
import datetime
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from textwrap import wrap

matplotlib.rcParams.update({
    'font.size': 13,
    'timezone': 'Europe/London'
})

In [None]:
# Used across all of the plots
dateToday = datetime.datetime.combine(datetime.date.today(), datetime.datetime.min.time())

colourUp = '#f64a8a'
colourDown = '#233067'

resampleFrequency = 900

# Car parks


In [None]:
carParkRequestBase = 'https://api.newcastle.urbanobservatory.ac.uk/api/v2/sensors/entity'

# Fetch a list of all the car parks...
carParkTimeseriesIRIs = {}
carParkMetadata = {}
carParkRequestPage = 1
carParkResponse = None

while carParkResponse is None or len(carParkResponse) > 1:
  carParkResponse = json.loads(
      urllib.request.urlopen(
        '%s?metric="Occupied%%20spaces"&page=%u' % (carParkRequestBase, carParkRequestPage)
      ).read().decode('utf-8')
  )['items']

  carParkRequestPage = carParkRequestPage + 1

  for carPark in carParkResponse:
    for feed in carPark['feed']:
      for timeseries in feed['timeseries']:
        for link in timeseries['links']:
          if (link['rel'] == 'archives.friendly' and \
            'latest' in timeseries):
            carParkTimeseriesIRIs[carPark['meta']['name']] = link['href']
            carParkMetadata[carPark['meta']['name']] = {
                'address': carPark['meta']['address'],
                'postcode': carPark['meta']['address'].split(',')[-1].strip(),
                'district': carPark['meta']['address'].split(',')[-1].strip().split(' ')[0],
                'capacity': feed['meta']['totalSpaces'],
                'latest': timeseries['latest']['value']
            }

print('Discovered %u car parks with occupancy data.' % len(carParkTimeseriesIRIs))

carParkMetadata = pd.DataFrame.from_records(carParkMetadata).transpose()
carParkMetadata

In [None]:
requestDays = 211
daysPerRequest = 37
dfCarParks = None

print('Requesting car park occupancy in %u day chunks...' % daysPerRequest)

for carParkName in carParkTimeseriesIRIs.keys():
  carParkRequestTimeseries = carParkTimeseriesIRIs[carParkName]
  carParkTimeseries = []

  print('  %s' % carParkName)
  print('  [', end='')

  for windowStart in dateutil.rrule.rrule(
      dateutil.rrule.DAILY,
      interval=daysPerRequest,
      dtstart=dateToday - pd.Timedelta(days=requestDays),
      until=dateToday + pd.Timedelta(hours=24)
    ):

    windowEnd = windowStart + pd.Timedelta(days=daysPerRequest) - pd.Timedelta(seconds=1)

    windowResponse = json.loads(
        urllib.request.urlopen(
          '%s?startTime=%s&endTime=%s' % (carParkRequestTimeseries, windowStart.isoformat().replace('+00:00', 'Z'), windowEnd.isoformat().replace('+00:00', 'Z'))
        ).read().decode('utf-8')
    )['historic']['values']

    carParkTimeseries.extend(windowResponse)
    print('.', end='')

  print(']')

  # Duration isn't relevant to the car park data
  dfCP = pd.DataFrame.from_records(carParkTimeseries, exclude=['duration'])

  # Times in this API are in ISO8601
  dfCP['time'] = dfCP['time'].apply(lambda t: datetime.datetime.strptime(t, "%Y-%m-%dT%H:%M:%S.%fZ"))
  
  dfCP = dfCP.rename(columns={'value': carParkName})
  dfCP.set_index('time', inplace=True, drop=True)

  dfCP = dfCP.resample('%us' % resampleFrequency).median().fillna(method='ffill', limit=12)

  if dfCarParks is None:
    dfCarParks = dfCP
  else:
    dfCarParks = dfCarParks.join(dfCP, how='outer')

dfCarParks


In [None]:
def plotCarParkTimeseries(type='continuous', historicCutOffDays=28):
  dfCarParksRecent = dfCarParks[dfCarParks.index >= dateToday - pd.Timedelta(days=historicCutOffDays)]
  dfCarParkMissingFreq = dfCarParksRecent \
    .isna() \
    .sum(axis=0) \
    .apply(lambda c: c / len(dfCarParksRecent.index))
  dfCarParkPlotList = sorted(
    dfCarParkMissingFreq[dfCarParkMissingFreq < 0.5].index,
    key=lambda carPark: carParkMetadata[carParkMetadata.index == carPark]['capacity'].values[0],
    reverse=True
  )

  if type == 'daily':
    dfCarParksRecent = dfCarParksRecent.resample('24H').sum() / (3600 / resampleFrequency)

  fig, axs = plt.subplots(
    len(dfCarParkPlotList), 1,
    figsize = (18, 1.5 * len(dfCarParkPlotList))
  )

  plotIndex = 0

  for carPark in dfCarParkPlotList:
    carParkMeta = carParkMetadata[carParkMetadata.index == carPark]
    ax = axs[plotIndex]
    ax.set_title(
      '%s (%s with %u spaces)' % (
        carPark,
        carParkMeta['district'].values[0],
        carParkMeta['capacity'].values[0]
      ),
      loc='left',
      fontdict={ 'horizontalalignment': 'left', 'fontsize': 12 }
    )
    ax.margins(x=0, y=0)

    if type == 'daily':
      dfWeekdays = dfCarParksRecent[dfCarParksRecent.index.to_series().apply(
        lambda t: t.strftime('%A') not in ['Saturday', 'Sunday']
      )][carPark]
      dfWeekends = dfCarParksRecent[dfCarParksRecent.index.to_series().apply(
        lambda t: t.strftime('%A') in ['Saturday', 'Sunday']
      )][carPark]

      ax.bar(
        dfWeekdays.index,
        dfWeekdays,
        color=colourDown,
        label='Vehicle-hours on weekdays'
      )
      ax.bar(
        dfWeekends.index,
        dfWeekends,
        color=colourUp,
        label='Vehicle-hours on weekends'
      )
    elif type == 'continuous':
      ax.fill_between(
        dfCarParksRecent[carPark].index,
        dfCarParksRecent[carPark],
        color=colourDown,
        where=dfCarParksRecent.index.to_series().apply(
          lambda t: t.strftime('%A') not in ['Saturday', 'Sunday']
        ),
        label='Occupied spaces on weekdays'
      )
      ax.fill_between(
        dfCarParksRecent[carPark].index,
        dfCarParksRecent[carPark],
        color=colourUp,
        where=dfCarParksRecent.index.to_series().apply(
          lambda t: t.strftime('%A') in ['Saturday', 'Sunday']
        ),
        label='Occupied spaces on weekend'
      )

    ax.xaxis.set_major_locator(mdates.WeekdayLocator(interval=1, byweekday=mdates.MO))
    ax.xaxis.set_tick_params(which='major', pad=15)
    ax.xaxis.set_minor_locator(mdates.DayLocator(interval=1))

    if ax == axs[-1]:
      ax.set_xlabel('Date')

      if historicCutOffDays > 75:
        timeLocatorMajor = mdates.AutoDateLocator(minticks=10, maxticks=30)
        conciseZeroFormats = ['', '%Y', '%b', '%d-%b', '%H:%M', '%H:%M']
        conciseOffsetFormats = ['', '%Y', '%b-%Y', '%d-%b-%Y-%b', '%d-%b-%Y', '%d-%b-%Y %H:%M']
        ax.xaxis.set_tick_params(which='major', pad=0)
        ax.xaxis.set_major_locator(timeLocatorMajor)
        ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter(locator=timeLocatorMajor, zero_formats=conciseZeroFormats, offset_formats=conciseOffsetFormats))
      else:
        dataFormatMajor = mdates.DateFormatter('%a %d %b')
        ax.xaxis.set_major_formatter(dataFormatMajor)
        ax.xaxis.set_minor_formatter(mdates.DateFormatter('%d'))
    else:
      ax.xaxis.set_ticklabels([]);

    if ax == axs[0]:
      ax.legend(
        loc='upper right',
        ncol=2,
        fontsize=11,
        frameon=False,
        bbox_to_anchor=(1.0, 1.35)
      )

    plotIndex = plotIndex + 1

  plt.tight_layout()
  fig.subplots_adjust(hspace=0.4)
  plt.show()

## Daily vehicle occupancy during the last six months

The below charts are expressed in vehicle-hours, meaning each vehicle being parked is multiplied by the duration of its stay.

In [None]:
# Daily car-hours as a bar chart
plotCarParkTimeseries('daily', 210)

## Recent timeseries

The below plot shows the profile of the car park occupancy within the last month or so, rather than being aggregated to a daily level.

In [None]:
# Continuous plot of areas for the last N days
plotCarParkTimeseries('continuous', 42)