In [None]:
import pandas as pd
import numpy as np
import json
import math
import urllib.request
import dateutil.parser
import dateutil.rrule
import dateutil.tz
import datetime
import re
import gc
import time

In [None]:
tzUTC = dateutil.tz.gettz('UTC')
tzLocal = dateutil.tz.gettz('Europe/London')

earliestData = datetime.datetime.strptime('2020-04-20T00:00:00Z', '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=tzUTC)

In [None]:
visionApiBase = 'https://uo-vision.dev.urbanobservatory.ac.uk/stills/dict'
visionResponse = json.loads(
    urllib.request.urlopen(visionApiBase).read().decode('utf-8')
)

In [98]:
pdSources = pd.DataFrame.from_records(visionResponse).transpose()
pdSources['min_date'] = pdSources['min_date'].apply(lambda d: datetime.datetime.strptime(d, '%Y-%m-%d').replace(tzinfo=tzUTC))
pdSources['max_date'] = pdSources['max_date'].apply(lambda d: datetime.datetime.strptime(d, '%Y-%m-%d').replace(tzinfo=tzUTC))

pdSources

pointTsByInterval = {}
camerasByInterval = {}

In [103]:
for cameraId in pdSources.index:
    alreadyExists = False
    for testInterval in pointTsByInterval:
        if pointTsByInterval[testInterval] is not None and (('%s: Source image' % cameraId) in pointTsByInterval[testInterval].columns):
            alreadyExists = True
            
    if alreadyExists:
        continue
    
    source = pdSources[pdSources.index == cameraId].to_dict(orient='records')[0]
    
    print(cameraId)
    print('  [', end='')
    
    sourceTs = None
    
    for date in dateutil.rrule.rrule(
            dateutil.rrule.DAILY,
            interval=1,
            dtstart=source['min_date'] if source['min_date'] > earliestData else earliestData,
            until=source['max_date']
        ):
        
        windowResponse = None
        windowAttempts = 0
        while windowResponse is None:
            try:
                windowAttempts = windowAttempts + 1
                windowResponse = json.loads(
                    urllib.request.urlopen(
                      'https://uo-vision.dev.urbanobservatory.ac.uk/stills/counts?location=%s&date=%s' % (cameraId, date.isoformat()[0:10])
                    ).read().decode('utf-8')
                )
            except:
                windowAttempts = windowAttempts + 1
                print('x', end='')
                time.sleep(min(windowAttempts, 10))

        sourceOnDay = pd.DataFrame.from_records(pd.json_normalize(windowResponse), index=['ts'])
        if not sourceOnDay.empty:
            sourceOnDay.index = sourceOnDay.index.to_series().apply(lambda t: datetime.datetime.strptime(t, '%Y-%m-%d %H:%M:%S').replace(tzinfo=tzUTC))
        
        if len(sourceOnDay.columns) == 0:
            continue
        
        if sourceTs is None:
            sourceTs = sourceOnDay
        else:
            sourceTs = sourceTs.append(sourceOnDay)
        
        print('.', end='')
    
    sourceTs.drop(columns=['camera'], inplace=True)
    sourceTs.rename(inplace=True, errors='ignore', columns={
        'url': 'Source image',
        'counts.bus': 'Bus',
        'counts.car': 'Car',
        'counts.cyclist': 'Cyclist',
        'counts.motorcyclist': 'Motorcyclist',
        'counts.person': 'Person',
        'counts.truck': 'Truck',
        'counts.van': 'Van'
    })
    
    sourceTs = sourceTs.loc[~sourceTs.index.duplicated(keep='first')]
    sourceInterval = round(sourceTs.index.to_series().diff().median().seconds / 60) * 60
    camerasByInterval[cameraId] = sourceInterval

    sourceTsNumeric = sourceTs.resample('%us' % sourceInterval).nearest().drop(columns=['Source image'], errors='ignore').fillna(0)
    sourceTs = sourceTsNumeric.join(sourceTs['Source image'].resample('%us' % sourceInterval).nearest())
    
    sourceTs = sourceTs.add_prefix('%s: ' % cameraId)
    
    print('] Interval %u seconds' % sourceInterval)
    
    if sourceInterval not in pointTsByInterval:
        pointTsByInterval[sourceInterval] = None
    
    if pointTsByInterval[sourceInterval] is None:
        pointTsByInterval[sourceInterval] = sourceTs
    else:
        pointTsByInterval[sourceInterval] = pointTsByInterval[sourceInterval].join(sourceTs)

GH_A167F1
  [........] Interval 60 seconds
GH_A167G1
  [........] Interval 120 seconds
GH_A167H1
  [........] Interval 300 seconds
GH_A167I1
  [........] Interval 120 seconds
GH_A167J1
  [.x..xxxx.....] Interval 300 seconds
GH_A167K1
  [........] Interval 300 seconds
GH_A167L1
  [........] Interval 300 seconds
GH_A167M1
  [........] Interval 300 seconds
GH_A167O1
  [........] Interval 300 seconds
GH_A167P1
  [........] Interval 300 seconds
GH_A184A1
  [........] Interval 120 seconds
GH_A184A2
  [........] Interval 300 seconds
GH_A184B1
  [........] Interval 120 seconds
GH_A184C1
  [........] Interval 300 seconds
GH_A184E1
  [..xx.xxxx.....] Interval 120 seconds
GH_A184F1
  [........] Interval 300 seconds
GH_A692A1
  [........] Interval 300 seconds
GH_A692B1
  [........] Interval 300 seconds
GH_A694A1
  [........] Interval 300 seconds
GH_A695A1
  [........] Interval 300 seconds
GH_A695C1
  [........] Interval 300 seconds
GH_A695D1
  [....x....] Interval 300 seconds
GH_B1288A1
  [xxxxx..

In [104]:
pointTsByInterval[300].tail(20)

Unnamed: 0_level_0,CM_A69A1: Truck,CM_A69A1: Car,CM_A69A1: Bus,CM_A69A1: Van,CM_A69A1: Person,CM_A69A1: Cyclist,CM_A69A1: Source image,GH_A1114A1: Person,GH_A1114A1: Van,GH_A1114A1: Car,...,ST_B1344A1: Cyclist,ST_B1344A1: Motorcyclist,ST_B1344A1: Source image,ST_FOWA1: Car,ST_FOWA1: Bus,ST_FOWA1: Van,ST_FOWA1: Person,ST_FOWA1: Truck,ST_FOWA1: Cyclist,ST_FOWA1: Source image
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-04-27 09:15:00+00:00,0.0,0.0,0.0,0.0,0.0,0.0,https://file.newcastle.urbanobservatory.ac.uk/...,0.0,0.0,0.0,...,0.0,0.0,https://file.newcastle.urbanobservatory.ac.uk/...,1.0,0.0,1.0,1.0,0.0,0.0,https://file.newcastle.urbanobservatory.ac.uk/...
2020-04-27 09:20:00+00:00,0.0,9.0,0.0,0.0,0.0,0.0,https://file.newcastle.urbanobservatory.ac.uk/...,0.0,0.0,0.0,...,0.0,0.0,https://file.newcastle.urbanobservatory.ac.uk/...,0.0,0.0,0.0,2.0,1.0,0.0,https://file.newcastle.urbanobservatory.ac.uk/...
2020-04-27 09:25:00+00:00,0.0,2.0,0.0,0.0,0.0,0.0,https://file.newcastle.urbanobservatory.ac.uk/...,0.0,0.0,1.0,...,0.0,0.0,https://file.newcastle.urbanobservatory.ac.uk/...,2.0,0.0,1.0,0.0,0.0,0.0,https://file.newcastle.urbanobservatory.ac.uk/...
2020-04-27 09:30:00+00:00,0.0,9.0,0.0,1.0,0.0,0.0,https://file.newcastle.urbanobservatory.ac.uk/...,0.0,0.0,1.0,...,0.0,0.0,https://file.newcastle.urbanobservatory.ac.uk/...,0.0,0.0,0.0,3.0,0.0,0.0,https://file.newcastle.urbanobservatory.ac.uk/...
2020-04-27 09:35:00+00:00,0.0,0.0,0.0,0.0,0.0,0.0,https://file.newcastle.urbanobservatory.ac.uk/...,0.0,0.0,2.0,...,0.0,0.0,https://file.newcastle.urbanobservatory.ac.uk/...,1.0,0.0,1.0,2.0,0.0,0.0,https://file.newcastle.urbanobservatory.ac.uk/...
2020-04-27 09:40:00+00:00,1.0,5.0,0.0,0.0,1.0,0.0,https://file.newcastle.urbanobservatory.ac.uk/...,0.0,0.0,1.0,...,0.0,0.0,https://file.newcastle.urbanobservatory.ac.uk/...,0.0,0.0,0.0,1.0,0.0,0.0,https://file.newcastle.urbanobservatory.ac.uk/...
2020-04-27 09:45:00+00:00,0.0,0.0,0.0,0.0,0.0,0.0,https://file.newcastle.urbanobservatory.ac.uk/...,0.0,0.0,1.0,...,0.0,0.0,https://file.newcastle.urbanobservatory.ac.uk/...,0.0,0.0,1.0,3.0,0.0,0.0,https://file.newcastle.urbanobservatory.ac.uk/...
2020-04-27 09:50:00+00:00,0.0,7.0,0.0,0.0,0.0,0.0,https://file.newcastle.urbanobservatory.ac.uk/...,0.0,0.0,1.0,...,0.0,0.0,https://file.newcastle.urbanobservatory.ac.uk/...,0.0,0.0,0.0,4.0,0.0,0.0,https://file.newcastle.urbanobservatory.ac.uk/...
2020-04-27 09:55:00+00:00,2.0,0.0,0.0,0.0,0.0,0.0,https://file.newcastle.urbanobservatory.ac.uk/...,0.0,0.0,4.0,...,0.0,0.0,https://file.newcastle.urbanobservatory.ac.uk/...,1.0,2.0,1.0,1.0,0.0,0.0,https://file.newcastle.urbanobservatory.ac.uk/...
2020-04-27 10:00:00+00:00,0.0,8.0,0.0,1.0,0.0,0.0,https://file.newcastle.urbanobservatory.ac.uk/...,0.0,0.0,1.0,...,0.0,0.0,https://file.newcastle.urbanobservatory.ac.uk/...,0.0,0.0,0.0,3.0,1.0,0.0,https://file.newcastle.urbanobservatory.ac.uk/...


In [107]:
# TODO: Join pointTsByInterval to points and output

for interval in pointTsByInterval:
    pointTsByInterval[interval].to_pickle('../cache/recent-feature-counts-pd-%usec.pkl' % interval)
    pointTsByInterval[interval].to_csv('../output/recent-feature-counts-pd-%usec.csv' % interval)

In [None]:
# Use for testing only...

#ax = (dfPointTs[list(filter(lambda cn: 'Person' in cn, dfPointTs.columns))].resample('900s').mean() / 15).sum(axis=1).plot(figsize=(35, 6.5))
#ax = (dfPointTs[list(filter(lambda cn: 'Person' in cn, dfPointTs.columns))].resample('900s').mean() / 15).plot(figsize=(35, 6.5), stacked=True, legend=False)
#ax.set_ylabel('Pedestrians per minute')
#ax = (dfPointTs[list(filter(lambda cn: 'Car' in cn, dfPointTs.columns))].resample('900s').mean() / 15).plot(figsize=(35, 6.5), stacked=True, legend=False)
#ax.set_ylabel('Cars per minute')
#ax = (dfPointTs[list(filter(lambda cn: 'Bus' in cn, dfPointTs.columns))].resample('3600s').mean()).plot(figsize=(35, 6.5), stacked=True, legend=False)
#ax.set_ylabel('Buses per hour')