In [16]:
import pandas as pd
from datetime import date, datetime, timedelta
import untangle # xml
import requests # json
import re # regular expressions
from functools import reduce

In [17]:
# Configuration
today = date.today()
yesterday = date.today() - timedelta(days=1)

our_utla_codes = [
    'E08000025', # Birmingham
    'E08000027', # Dudley
    'E08000028', # Sandwell
    'E06000051', # Shropshire
    'E10000028', # Staffordshire
    'E06000021', # Stoke-on-Trent
    'E06000020', # Telford and Wrekin
    'E08000030', # Walsall
    'E08000031', # Wolverhampton
    'E10000034', # Worcestershire
]
utlacodes_expressandstar = [
    'E08000025', # Birmingham
    'E08000027', # Dudley
    'E08000028', # Sandwell
    'E10000028', # Staffordshire
    'E06000021', # Stoke-on-Trent
    'E08000030', # Walsall
    'E08000031', # Wolverhampton
]
utlacodes_blackcountry = [
    'E08000027', # Dudley
    'E08000028', # Sandwell
    'E08000030', # Walsall
    'E08000031', # Wolverhampton
]
utlacodes_blackcountryandbirmingham = [
    'E08000025', # Birmingham
    'E08000027', # Dudley
    'E08000028', # Sandwell
    'E08000030', # Walsall
    'E08000031', # Wolverhampton
]
utlacodes_sandwellandbirmingham = [
    'E08000025', # Birmingham
    'E08000028', # Sandwell
]
utlacodes_staffordshire = [
    'E10000028', # Staffordshire
]
utlacodes_wolverhampton = [
    'E08000031', # Wolverhampton
]
utlacodes_walsall = [
    'E08000030', # Walsall
]
utlacodes_dudley = [
    'E08000027', # Dudley
]
utlacodes_sandwell = [
    'E08000028', # Sandwell
]
utlacodes_birmingham = [
    'E08000025', # Birmingham
]
utlacodes_stoke = [
    'E06000021', # Stoke-on-Trent
]
utlacodes_staffordshireandstoke = [
    'E10000028', # Staffordshire
    'E06000021', # Stoke-on-Trent
]
utlacodes_worcestershire = [
    'E10000034', # Worcestershire
]

In [18]:
# Pull the XML and traverse to the blobs
xmlblobs = untangle.parse(
    'https://publicdashacc.blob.core.windows.net/publicdata?restype=container&comp=list'
).EnumerationResults.Blobs.Blob

In [23]:
# Extract the cdata of the names, filter to pattern, map to an object while extracting the date

pattern = '^data_([0-9]{4}(0[0-9]|1[0-2])([0-2][0-9]|3[0-1])).+\.json$'
datafiles = list(
    map(
        lambda filename: {
            'date': datetime.strptime(
                re.search(pattern,filename).group(1),
                '%Y%m%d'
            ),
            'filename': filename
        },
        filter(
            lambda filename: re.search(pattern,filename),
            map(
                lambda blob: blob.Name.cdata,
                xmlblobs
            )
        )
    )
)

In [24]:
datafiles

[{'date': datetime.datetime(2020, 4, 9, 0, 0),
  'filename': 'data_202004091537.json'},
 {'date': datetime.datetime(2020, 4, 10, 0, 0),
  'filename': 'data_202004101527.json'},
 {'date': datetime.datetime(2020, 4, 11, 0, 0),
  'filename': 'data_202004111452.json'},
 {'date': datetime.datetime(2020, 4, 12, 0, 0),
  'filename': 'data_202004121411.json'},
 {'date': datetime.datetime(2020, 4, 13, 0, 0),
  'filename': 'data_202004131413.json'},
 {'date': datetime.datetime(2020, 4, 14, 0, 0),
  'filename': 'data_202004141435.json'},
 {'date': datetime.datetime(2020, 4, 14, 0, 0),
  'filename': 'data_202004141544.json'},
 {'date': datetime.datetime(2020, 4, 15, 0, 0),
  'filename': 'data_202004151454.json'},
 {'date': datetime.datetime(2020, 4, 16, 0, 0),
  'filename': 'data_202004161444.json'},
 {'date': datetime.datetime(2020, 4, 17, 0, 0),
  'filename': 'data_202004171502.json'},
 {'date': datetime.datetime(2020, 4, 18, 0, 0),
  'filename': 'data_202004181457.json'},
 {'date': datetime.dat

In [29]:
def utla_day_to_dataframe(xmljsonref):
    # build a url to retrieve
    url = 'https://c19pub.azureedge.net/' + xmljsonref['filename']
    
    # request a URL ( I should really add a fail state which removes this file from the list )
    json = requests.get(url).json()
    
    # empty array :facepalm:
    utla_data = []
    
    # iterate through utlas element of JSON
    for (code, data) in json['utlas'].items():
        # if the code is one of our UTLAs
        if code in our_utla_codes:
            # loop through the days of data in the dailyConfirmedCases element
            for day in data['dailyConfirmedCases']:
                # append a mix of this data and that above
                utla_data.append([
                    code, # UTLA code
                    data['name']['value'], # filename
                    day['date'], # date of cases
                    day['value'] # cases
                ])
    xmljsonref['dataframe'] = pd.DataFrame(data=utla_data, columns=['code','name','date of cases','cases'])
    
    return xmljsonref

In [30]:
def map_add_reported_day(x):
    x['dataframe']['date reported'] = x['date']
    return x

In [31]:
our_utlas_historical = reduce(
    lambda all_data, day_data : all_data.append(day_data,ignore_index=True),
    map(
        lambda x : x['dataframe'],
        map(
            map_add_reported_day,
            map(utla_day_to_dataframe, datafiles)
        )
    ),
    pd.DataFrame(columns = ['code','name','date of cases','date reported','cases'])
)

In [32]:
our_utlas_historical

Unnamed: 0,code,name,date of cases,date reported,cases
0,E08000025,Birmingham,2020-03-01,2020-04-09,1
1,E08000025,Birmingham,2020-03-02,2020-04-09,1
2,E08000025,Birmingham,2020-03-05,2020-04-09,1
3,E08000025,Birmingham,2020-03-08,2020-04-09,1
4,E08000025,Birmingham,2020-03-09,2020-04-09,1
...,...,...,...,...,...
5029,E10000034,Worcestershire,2020-04-15,2020-04-20,40
5030,E10000034,Worcestershire,2020-04-16,2020-04-20,35
5031,E10000034,Worcestershire,2020-04-17,2020-04-20,26
5032,E10000034,Worcestershire,2020-04-18,2020-04-20,2


---
# Old style