In [16]:
import pandas as pd
import numpy as np
import json
import math
import pickle
import urllib.request
import dateutil.parser
import dateutil.rrule
import datetime
import sys
import gc
import re

In [17]:
dateToday = datetime.datetime.combine(datetime.date.today(), datetime.datetime.min.time())

# Load in baseline data that's obtained month-by-month in the baseline data script
peopleDataRaw = pickle.load(open('cache/baseline-pedestrian-flow-api-json.pkl', 'rb'))

In [18]:
peopleDataRequestSince = 0
peopleDataBaselineStart = sys.maxsize

for sensor in peopleDataRaw:
    for variable in sensor['data']:
        for record in sensor['data'][variable]:
            if record['Timestamp'] > peopleDataRequestSince:
                peopleDataRequestSince = record['Timestamp']
            if record['Timestamp'] < peopleDataBaselineStart:
                peopleDataBaselineStart = record['Timestamp']
            
peopleDataRequestSince = datetime.datetime.fromtimestamp(peopleDataRequestSince / 1000)
peopleDataBaselineStart = datetime.datetime.fromtimestamp(peopleDataBaselineStart / 1000)

print('Using baseline profile data for each cross line from %s until %s' % (peopleDataBaselineStart, peopleDataRequestSince))

Using baseline profile data for each cross line from 2019-03-15 00:00:06 until 2020-03-13 23:55:00


In [19]:
# Add to the baseline data the most recent data
peopleRequestBase = 'https://newcastle.urbanobservatory.ac.uk/api/v1.1/sensors/data/json/'
peopleRequestVariables = [
    'Walking North East',
    'Walking North West',
    'Walking South East',
    'Walking North',
    'Walking South',
    'Walking East',
    'Walking West'
]
peopleRequestIRI = ('%s?variable=%s&starttime=%s&endtime=%s') % (
    peopleRequestBase,
    ','.join(str(x).replace(' ', '%20') for x in peopleRequestVariables),
    (peopleDataRequestSince + pd.Timedelta(seconds=1)).strftime('%Y%m%d%H%M%S'),
    (dateToday + pd.Timedelta(days=1.5)).strftime('%Y%m%d%H%M%S')
)

print('Loading recent data...')
peopleDataWindow = json.loads(
  urllib
    .request
    .urlopen(peopleRequestIRI)
    .read()
    .decode('utf-8')
)['sensors']

for sensor in peopleDataWindow:
    for variable in sensor['data']:
        targetSensor = next(s for s in peopleDataRaw if s['Sensor Name'] == sensor['Sensor Name'])

        if variable not in targetSensor['data']:
            continue

        targetVariable = targetSensor['data'][variable]

        print('  Found %u recent observations and %u baseline observations for %s on %s' % (
            len(sensor['data'][variable]),
            len(targetSensor['data'][variable]),
            variable,
            targetSensor['Sensor Name']['0']
        ))

        targetVariable.extend(sensor['data'][variable])

Loading recent data...
  Found 1554 recent observations and 95525 baseline observations for Walking South on PER_PEOPLE_NORTHUMERLAND_LINE_LONG_DISTANCE_HEAD_0
  Found 1554 recent observations and 95525 baseline observations for Walking North on PER_PEOPLE_NORTHUMERLAND_LINE_LONG_DISTANCE_HEAD_0
  Found 429 recent observations and 32080 baseline observations for Walking East on PER_PEOPLE_BLACKETT-NORTHUMBERLAND-W
  Found 429 recent observations and 32080 baseline observations for Walking West on PER_PEOPLE_BLACKETT-NORTHUMBERLAND-W
  Found 1554 recent observations and 95524 baseline observations for Walking South on PER_PEOPLE_NORTHUMERLAND_LINE_LONG_DISTANCE_HEAD_1
  Found 1554 recent observations and 95524 baseline observations for Walking North on PER_PEOPLE_NORTHUMERLAND_LINE_LONG_DISTANCE_HEAD_1
  Found 1554 recent observations and 95791 baseline observations for Walking South on PER_PEOPLE_NORTHUMERLAND_LINE_MID_DISTANCE_HEAD_0
  Found 1554 recent observations and 95791 baseline

In [20]:
print('Obtained data from %u sensors.' % len(peopleDataRaw))

Obtained data from 15 sensors.


In [21]:
# Number of seconds to resample the pedestrian data to for all subsequent processing
peopleCountInterval = 900

In [22]:
cameraFriendlyNames = {
    #'PER_PEOPLE_BLACKETT-NORTHUMBERLAND-W': 'Blackett St pavement (north side) outside Rox',
    #'PER_PEOPLE_BLACKETT-BOOTS': 'Blackett St outside Boots',
    'PER_PEOPLE_THE_CORE_LINE_0': 'Blue Star Square at Newcastle Helix (east side)',
    'PER_PEOPLE_THE_CORE_LINE_1': 'Blue Star Square at Newcastle Helix (west side)',
    'PER_PEOPLE_USB_LINE_0': 'Science Square at Newcastle Helix',
    'PER_PEOPLE_NORTHUMERLAND_LINE_LONG_DISTANCE_HEAD_0': 'Northumberland St near Fenwick (west side)',
    'PER_PEOPLE_NORTHUMERLAND_LINE_LONG_DISTANCE_HEAD_1': 'Northumberland St near Fenwick (east side)',
    'PER_PEOPLE_NORTHUMERLAND_LINE_MID_DISTANCE_HEAD_0': 'Northumberland St near TK Maxx',
    'PER_PEOPLE_NORTHUMERLAND_LINE_SHORT_DISTANCE_HEAD_0': 'John Dobson St (west side) pavement near Goldsmiths',
    'PER_PEOPLE_NORTHUMERLAND_LINE_SHORT_DISTANCE_HEAD_1': 'John Dobson St crossing island between Blackett St and New Bridge St West',
    'PER_PEOPLE_NORTHUMERLAND_LINE_SHORT_DISTANCE_HEAD_2': 'John Dobson St (east side) pavement near The Stack',
    'PER_PEOPLE_NORTHUMERLAND_LINE_SHORT_DISTANCE_HEAD_3': 'Pavement (south side) corner John Dobson St and Blackett St',
    'PER_PEOPLE_NORTHUMERLAND_LINE_SHORT_DISTANCE_HEAD_4': 'Pavement (south side) corner John Dobson St and New Bridge St West',
    'PER_PEOPLE_NORTHUMERLAND_LINE_SHORT_DISTANCE_HEAD_5': 'Blackett St crossing from John Dobson St to Northumberland St (west side)',
    'PER_PEOPLE_NORTHUMERLAND_LINE_SHORT_DISTANCE_HEAD_6': 'New Bridge St West crossing John Dobson St to Northumberland St (east side)'
}

peopleCountFrames = {}

for sensor in peopleDataRaw:
    dfSensor = None
    
    if sensor['Sensor Name']['0'] not in cameraFriendlyNames:
        continue
        
    cameraName = cameraFriendlyNames[sensor['Sensor Name']['0']]
    
    for variable in sensor['data'].keys():
        # Skip vehicle counts or bus data
        if 'Walking' not in variable:
            continue
        
        # Ignore everything but the timestamp and the value
        dfPeopleTs = pd.DataFrame.from_records(sensor['data'][variable], columns=['Timestamp', 'Value'])
        
        # Timestamps are milliseconds since 1970 (epoch), so convert them to proper timestamps
        dfPeopleTs['Timestamp'] = (dfPeopleTs['Timestamp'].astype(int) / 1000).apply(datetime.datetime.fromtimestamp)
        
        dfPeopleTs = dfPeopleTs.rename(columns={'Value': variable})
        dfPeopleTs.set_index('Timestamp', inplace=True, drop=True)
        
        if dfSensor is None:
            dfSensor = dfPeopleTs
        else:
            dfSensor = dfSensor.join(dfPeopleTs)
    
    if dfSensor is None:
        print('No data available from "%s" camera.' % cameraName)
        continue
    else:
        print('Data from "%s" camera has been resampled to %u second intervals.' % (cameraName, peopleCountInterval))
    
    dfSensor = dfSensor.resample('%us' % peopleCountInterval).apply(lambda x: np.sum(x.values))
    peopleCountFrames[cameraName] = dfSensor
    
peopleDataRaw = None
gc.collect() ;

Data from "Northumberland St near TK Maxx" camera has been resampled to 900 second intervals.
No data available from "Blue Star Square at Newcastle Helix (west side)" camera.
Data from "John Dobson St (east side) pavement near The Stack" camera has been resampled to 900 second intervals.
Data from "Science Square at Newcastle Helix" camera has been resampled to 900 second intervals.
Data from "New Bridge St West crossing John Dobson St to Northumberland St (east side)" camera has been resampled to 900 second intervals.
Data from "Northumberland St near Fenwick (west side)" camera has been resampled to 900 second intervals.
No data available from "Blue Star Square at Newcastle Helix (east side)" camera.
Data from "John Dobson St (west side) pavement near Goldsmiths" camera has been resampled to 900 second intervals.
Data from "Northumberland St near Fenwick (east side)" camera has been resampled to 900 second intervals.
Data from "John Dobson St crossing island between Blackett St and N

In [23]:
# If you need to preview the data from one of the cameras...
testCam = list(peopleCountFrames.keys())[0]
print(testCam)
peopleCountFrames[testCam]

Northumberland St near TK Maxx


Unnamed: 0_level_0,Walking North,Walking South
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-03-15 00:00:00,14.0,34.0
2019-03-15 00:15:00,50.0,64.0
2019-03-15 00:30:00,25.0,37.0
2019-03-15 00:45:00,44.0,26.0
2019-03-15 01:00:00,58.0,15.0
...,...,...
2020-03-19 08:15:00,168.0,123.0
2020-03-19 08:30:00,142.0,156.0
2020-03-19 08:45:00,191.0,153.0
2020-03-19 09:00:00,168.0,154.0


In [24]:
pickle.dump(peopleCountFrames, open('cache/recent-pedestrian-flows-pd.pkl', 'wb'))

peopleCountFrames = None
gc.collect()

44