In [None]:
import pandas as pd

pd.set_option('display.max_columns', None)

In [None]:
from typing import Iterable, Dict, Union, List
from json import dumps
from requests import get
from http import HTTPStatus


StructureType = Dict[str, Union[dict, str]]
FiltersType = Iterable[str]
APIResponseType = Union[List[StructureType], str]


def get_paginated_dataset(filters: FiltersType, structure: StructureType,
                          as_csv: bool = False) -> APIResponseType:
    """
    Extracts paginated data by requesting all of the pages
    and combining the results.

    Parameters
    ----------
    filters: Iterable[str]
        API filters. See the API documentations for additional
        information.

    structure: Dict[str, Union[dict, str]]
        Structure parameter. See the API documentations for
        additional information.

    as_csv: bool
        Return the data as CSV. [default: ``False``]

    Returns
    -------
    Union[List[StructureType], str]
        Comprehensive list of dictionaries containing all the data for
        the given ``filters`` and ``structure``.
    """
    endpoint = "https://api.coronavirus.data.gov.uk/v1/data"

    api_params = {
        "filters": str.join(";", filters),
        "structure": dumps(structure, separators=(",", ":")),
        "format": "json" if not as_csv else "csv"
    }

    data = list()

    page_number = 1

    while True:
        # Adding page number to query params
        api_params["page"] = page_number

        response = get(endpoint, params=api_params, timeout=10)

        if response.status_code >= HTTPStatus.BAD_REQUEST:
            raise RuntimeError(f'Request failed: {response.text}')
        elif response.status_code == HTTPStatus.NO_CONTENT:
            break

        if as_csv:
            csv_content = response.content.decode()

            # Removing CSV header (column names) where page 
            # number is greater than 1.
            if page_number > 1:
                data_lines = csv_content.split("\n")[1:]
                csv_content = str.join("\n", data_lines)

            data.append(csv_content.strip())
            page_number += 1
            continue

        current_data = response.json()
        page_data: List[StructureType] = current_data['data']
        
        data.extend(page_data)

        # The "next" attribute in "pagination" will be `None`
        # when we reach the end.
        if current_data["pagination"]["next"] is None:
            break

        page_number += 1

    if not as_csv:
        return data

    # Concatenating CSV pages
    return str.join("\n", data)


def main():
    query_filters = [
        f"areaType=region"
    ]

    query_structure = {
        "date": "date",
        "name": "areaName",
        "code": "areaCode",
        "daily": "newCasesBySpecimenDate",
        "cumulative": "cumCasesBySpecimenDate"
    }

    json_data = get_paginated_dataset(query_filters, query_structure)
    print("JSON:")
    print(f"Length:", len(json_data))
    print("Data (first 3 items):", json_data[:3])

    print("---" * 10)
    
    csv_data = get_paginated_dataset(query_filters, query_structure, as_csv=True)
    csv_lines = csv_data.split("\n")
    print("CSV:")
    print(f"Length:", len(csv_lines))
    print("Data (first 3 lines):", csv_lines[:3])

In [None]:
from enum import Enum

class AreaTypeEnum(Enum):
    """
    overview
        Overview data for the United Kingdom
    nation
        Nation data (England, Northern Ireland, Scotland, and Wales)
    region
        Region data
    nhsRegion
        NHS Region data
    utla
        Upper-tier local authority data
    ltla
        Lower-tier local authority data
    """
    OVERVIEW: str = "overview"
    NATION: str = "nation"
    REGION: str = "region"
    NHS_REGION: str = "nhsRegion"
    UPPER_TIER_LOCAL_AUTHORITY: str = "utla"
    LOWER_TIER_LOCAL_AUTHORITY: str = "ltla"
        
    def __str__(self):
        return self.value

In [None]:
import requests
import json
import logging
# import geopandas as gpd

from pathlib import Path


class AuthoritiesBoundaryData:
    """Data taken from https://data.gov.uk/dataset/d1647852-4b75-4ab2-8219-860bfef6ac9d/regions-december-2016-full-clipped-boundaries-in-england

    Published by:
        Office for National Statistics
    Last updated:
        12 June 2017
    Topic:
        Mapping
    Licence:
        Open Government Licence
    Summary:
        This file contains the digital vector boundaries for NHS Region (Geography) (NHSRG) in England as at April 2016. The boundaries available are:
    """

    filepath = 'uk_authorities.geojson'
    source_url = "http://geoportal1-ons.opendata.arcgis.com/datasets/687f346f5023410ba86615655ff33ca9_1.geojson"
    try:
        data = json.load(open(filepath))
    except IOError:
        logging.error('No region data found - downloading...')
        response = requests.get(source_url)
        with open(filepath, 'w') as f:
            json.dump(json.loads(response.text), f)
            logging.info(f'Wrote data to {Path(filepath).absolute()}')


In [None]:
from tqdm import tqdm
from io import BytesIO
import requests

# Streaming, so we can iterate over the response.
def get(url, *args, **kwargs):
    kwargs = kwargs.copy()
    kwargs.pop('stream', None)
    response = requests.get(url, stream=True, *args, **kwargs)
    total_size_in_bytes = int(response.headers.get('content-length', 0))
    block_size = 1024 #1 Kibibyte
    data = BytesIO()
    with tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True) as progress_bar:
        for chunk in response.iter_content(block_size):
            progress_bar.update(len(chunk))
            data.write(chunk)
    if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
        raise IOError("Something went wrong")
    return data.getvalue()

In [None]:
nhs_postcodes = get("https://www.arcgis.com/sharing/rest/content/items/b6e6715fa1984648b5e690b6a8519e53/data")

In [None]:
%time nhs_postcodes.decode('utf-8', errors='ignore').split('\n')[:2]

In [None]:
r = requests.get("https://www.arcgis.com/sharing/rest/content/items/b6e6715fa1984648b5e690b6a8519e53/data")

nhs_postcodes_csv = r.text()

In [None]:
gpd.GeoDataFrame.from_features(AuthoritiesBoundaryData.data).geometry.plot()

In [None]:
import requests
import json
import logging
import geopandas as gpd

from pathlib import Path


class BoundaryDataWrapper:
    try:
        filepath = 'counties.geojson'
        data_url = "http://geoportal1-ons.opendata.arcgis.com/datasets/f99b145881724e15a04a8a113544dfc5_0.geojson?outSR={%22latestWkid%22:27700,%22wkid%22:27700}"
        data = json.load(open(filepath))
    except IOError:
        logging.error('No regional data found - downloading...')
        response = requests.get(data_url)
        with open(filepath, 'w') as f:
            json.dump(json.loads(response.text), f)
            logging.info(f'Wrote data to {Path(filepath).absolute()}')

In [None]:
gpd.GeoDataFrame.from_features(UKCountiesBoundaryDataWrapper.data).geometry.plot()

In [None]:
import geopandas as gpd

In [None]:
import pandas as pd
import geopandas as gpd

In [None]:
gdf = gpd.GeoDataFrame.from_features(NHSBoundaryDataWrapper.data)

gdf

In [None]:
% matplotlib inline

In [None]:
df.geometry.plot()

In [None]:
import COVID19Py

In [None]:
COVID19Py.COVID19

In [None]:
query_structure = {
    "areaType": "areaType",  # Area type as string
    "areaName": "areaName",  # Area name as string
    "areaCode": "areaCode",  # Area Code as string
    "date": "date",  # Date as string [YYYY-MM-DD]
    "hash": "hash",  # Unique ID as string
    "newCasesByPublishDate": "newCasesByPublishDate",  # New cases by publish date
    "cumCasesByPublishDate": "cumCasesByPublishDate",  # Cumulative cases by publish date
    "cumCasesBySpecimenDateRate": "cumCasesBySpecimenDateRate",  # Rate of cumulative cases by publish date per 100k resident population
    "newCasesBySpecimenDate": "newCasesBySpecimenDate",  # New cases by specimen date
    "cumCasesBySpecimenDateRate": "cumCasesBySpecimenDateRate",  # Rate of cumulative cases by specimen date per 100k resident population
    "cumCasesBySpecimenDate": "cumCasesBySpecimenDate",  # Cumulative cases by specimen date
    "maleCases": "maleCases",  # Male cases (by age)
    "femaleCases": "femaleCases",  # Female cases (by age)
    "newPillarOneTestsByPublishDate": "newPillarOneTestsByPublishDate",  # New pillar one tests by publish date
    "cumPillarOneTestsByPublishDate": "cumPillarOneTestsByPublishDate",  # Cumulative pillar one tests by publish date
    "newPillarTwoTestsByPublishDate": "newPillarTwoTestsByPublishDate",  # New pillar two tests by publish date
    "cumPillarTwoTestsByPublishDate": "cumPillarTwoTestsByPublishDate",  # Cumulative pillar two tests by publish date
    "newPillarThreeTestsByPublishDate": "newPillarThreeTestsByPublishDate",  # New pillar three tests by publish date
    "cumPillarThreeTestsByPublishDate": "cumPillarThreeTestsByPublishDate",  # Cumulative pillar three tests by publish date
    "newPillarFourTestsByPublishDate": "newPillarFourTestsByPublishDate",  # New pillar four tests by publish date
    "cumPillarFourTestsByPublishDate": "cumPillarFourTestsByPublishDate",  # Cumulative pillar four tests by publish date
    "newAdmissions": "newAdmissions",  # New admissions
    "cumAdmissions": "cumAdmissions",  # Cumulative number of admissions
    "cumAdmissionsByAge": "cumAdmissionsByAge",  # Cumulative admissions by age
    "cumTestsByPublishDate": "cumTestsByPublishDate",  # Cumulative tests by publish date
    "newTestsByPublishDate": "newTestsByPublishDate",  # New tests by publish date
    "covidOccupiedMVBeds": "covidOccupiedMVBeds",  # COVID-19 occupied beds with mechanical ventilators
    "hospitalCases": "hospitalCases",  # Hospital cases
    "plannedCapacityByPublishDate": "plannedCapacityByPublishDate",  # Planned capacity by publish date
    "newDeaths28DaysByPublishDate": "newDeaths28DaysByPublishDate",  # Deaths within 28 days of positive test
    "cumDeaths28DaysByPublishDate": "cumDeaths28DaysByPublishDate",  # Cumulative deaths within 28 days of positive test
    "cumDeaths28DaysByPublishDateRate": "cumDeaths28DaysByPublishDateRate",  # Rate of cumulative deaths within 28 days of positive test per 100k resident population
    "newDeaths28DaysByDeathDate": "newDeaths28DaysByDeathDate",  # Deaths within 28 days of positive test by death date
    "cumDeaths28DaysByDeathDate": "cumDeaths28DaysByDeathDate",  # Cumulative deaths within 28 days of positive test by death date
    "cumDeaths28DaysByDeathDateRate": "cumDeaths28DaysByDeathDateRate",  # Rate of cumulative deaths within 28 days of positive test by death date per 100k resident population
}

In [None]:
csv_data = get_paginated_dataset(query_filters, query_structure, as_csv=True)

In [None]:
import pandas as pd

In [None]:
from typing import Iterable, Dict, Union, List, Optional
from json import dumps
from requests import get
from http import HTTPStatus
from enum import Enum


StructureType = Dict[str, Union[dict, str]]
FiltersType = Iterable[str]
APIResponseType = Union[List[StructureType], str]


class AreaTypeEnum(Enum):
    """
    overview
        Overview data for the United Kingdom
    nation
        Nation data (England, Northern Ireland, Scotland, and Wales)
    region
        Region data
    nhsRegion
        NHS Region data
    utla
        Upper-tier local authority data
    ltla
        Lower-tier local authority data
    """
    OVERVIEW: str = "overview"
    NATION: str = "nation"
    REGION: str = "region"
    NHS_REGION: str = "nhsRegion"
    UPPER_TIER_LOCAL_AUTHORITY: str = "utla"
    LOWER_TIER_LOCAL_AUTHORITY: str = "ltla"

    def __str__(self):
        return self.value


class GovUKCoronavirusData:

    DEFAULT_QUERY_STRUCTURE = {
        "areaType": "areaType",  # Area type as string
        "areaName": "areaName",  # Area name as string
        "areaCode": "areaCode",  # Area Code as string
        "date": "date",  # Date as string [YYYY-MM-DD]
        "hash": "hash",  # Unique ID as string
        "newCasesByPublishDate": "newCasesByPublishDate",  # New cases by publish date
        "cumCasesByPublishDate": "cumCasesByPublishDate",  # Cumulative cases by publish date
        "cumCasesBySpecimenDateRate": "cumCasesBySpecimenDateRate",  # Rate of cumulative cases by publish date per 100k resident population
        "newCasesBySpecimenDate": "newCasesBySpecimenDate",  # New cases by specimen date
        "cumCasesBySpecimenDateRate": "cumCasesBySpecimenDateRate",  # Rate of cumulative cases by specimen date per 100k resident population
        "cumCasesBySpecimenDate": "cumCasesBySpecimenDate",  # Cumulative cases by specimen date
        "maleCases": "maleCases",  # Male cases (by age)
        "femaleCases": "femaleCases",  # Female cases (by age)
        "newPillarOneTestsByPublishDate": "newPillarOneTestsByPublishDate",  # New pillar one tests by publish date
        "cumPillarOneTestsByPublishDate": "cumPillarOneTestsByPublishDate",  # Cumulative pillar one tests by publish date
        "newPillarTwoTestsByPublishDate": "newPillarTwoTestsByPublishDate",  # New pillar two tests by publish date
        "cumPillarTwoTestsByPublishDate": "cumPillarTwoTestsByPublishDate",  # Cumulative pillar two tests by publish date
        "newPillarThreeTestsByPublishDate": "newPillarThreeTestsByPublishDate",  # New pillar three tests by publish date
        "cumPillarThreeTestsByPublishDate": "cumPillarThreeTestsByPublishDate",  # Cumulative pillar three tests by publish date
        "newPillarFourTestsByPublishDate": "newPillarFourTestsByPublishDate",  # New pillar four tests by publish date
        "cumPillarFourTestsByPublishDate": "cumPillarFourTestsByPublishDate",  # Cumulative pillar four tests by publish date
        "newAdmissions": "newAdmissions",  # New admissions
        "cumAdmissions": "cumAdmissions",  # Cumulative number of admissions
        "cumAdmissionsByAge": "cumAdmissionsByAge",  # Cumulative admissions by age
        "cumTestsByPublishDate": "cumTestsByPublishDate",  # Cumulative tests by publish date
        "newTestsByPublishDate": "newTestsByPublishDate",  # New tests by publish date
        "covidOccupiedMVBeds": "covidOccupiedMVBeds",  # COVID-19 occupied beds with mechanical ventilators
        "hospitalCases": "hospitalCases",  # Hospital cases
        "plannedCapacityByPublishDate": "plannedCapacityByPublishDate",  # Planned capacity by publish date
        "newDeaths28DaysByPublishDate": "newDeaths28DaysByPublishDate",  # Deaths within 28 days of positive test
        "cumDeaths28DaysByPublishDate": "cumDeaths28DaysByPublishDate",  # Cumulative deaths within 28 days of positive test
        "cumDeaths28DaysByPublishDateRate": "cumDeaths28DaysByPublishDateRate",  # Rate of cumulative deaths within 28 days of positive test per 100k resident population
        "newDeaths28DaysByDeathDate": "newDeaths28DaysByDeathDate",  # Deaths within 28 days of positive test by death date
        "cumDeaths28DaysByDeathDate": "cumDeaths28DaysByDeathDate",  # Cumulative deaths within 28 days of positive test by death date
        "cumDeaths28DaysByDeathDateRate": "cumDeaths28DaysByDeathDateRate",  # Rate of cumulative deaths within 28 days of positive test by death date per 100k resident population
    }

    @classmethod
    def get_latest_data(cls, filters: Optional[FiltersType] = None,
                        structure: Optional[StructureType] = None) -> dict:
        if filters is None:
            filters = [f'areaType={AreaTypeEnum.REGION.value}']
        if structure is None:
            structure = cls.DEFAULT_QUERY_STRUCTURE
        return cls.get_paginated_dataset(filters, structure)

    @staticmethod
    def get_paginated_dataset(filters: FiltersType, structure: StructureType,
                              as_csv: bool = False) -> APIResponseType:
        """
        Extracts paginated data by requesting all of the pages
        and combining the results.

        Parameters
        ----------
        filters: Iterable[str]
            API filters. See the API documentations for additional
            information.

        structure: Dict[str, Union[dict, str]]
            Structure parameter. See the API documentations for
            additional information.

        as_csv: bool
            Return the data as CSV. [default: ``False``]

        Returns
        -------
        Union[List[StructureType], str]
            Comprehensive list of dictionaries containing all the data for
            the given ``filters`` and ``structure``.
        """
        endpoint = "https://api.coronavirus.data.gov.uk/v1/data"

        api_params = {
            "filters": str.join(";", filters),
            "structure": dumps(structure, separators=(",", ":")),
            "format": "json" if not as_csv else "csv"
        }

        data = list()

        page_number = 1

        while True:
            # Adding page number to query params
            api_params["page"] = page_number

            response = get(endpoint, params=api_params, timeout=10)

            if response.status_code >= HTTPStatus.BAD_REQUEST:
                raise RuntimeError(f'Request failed: {response.text}')
            elif response.status_code == HTTPStatus.NO_CONTENT:
                break

            if as_csv:
                csv_content = response.content.decode()

                # Removing CSV header (column names) where page
                # number is greater than 1.
                if page_number > 1:
                    data_lines = csv_content.split("\n")[1:]
                    csv_content = str.join("\n", data_lines)

                data.append(csv_content.strip())
                page_number += 1
                continue

            current_data = response.json()
            page_data: List[StructureType] = current_data['data']

            data.extend(page_data)

            # The "next" attribute in "pagination" will be `None`
            # when we reach the end.
            if current_data["pagination"]["next"] is None:
                break

            page_number += 1

        if not as_csv:
            return data

        # Concatenating CSV pages
        return str.join("\n", data)



def main():
    query_filters = [
        f"areaType=region"
    ]

    query_structure = {
        "date": "date",
        "name": "areaName",
        "code": "areaCode",
        "daily": "newCasesBySpecimenDate",
        "cumulative": "cumCasesBySpecimenDate"
    }

    json_data = get_paginated_dataset(query_filters, query_structure)
    print("JSON:")
    print(f"Length:", len(json_data))
    print("Data (first 3 items):", json_data[:3])

    print("---" * 10)
    
    csv_data = get_paginated_dataset(query_filters, query_structure, as_csv=True)
    csv_lines = csv_data.split("\n")
    print("CSV:")
    print(f"Length:", len(csv_lines))
    print("Data (first 3 lines):", csv_lines[:3])

In [None]:
data = GovUKCoronavirusData.get_latest_data(filters=['date=2020-09-01'])

df = pd.DataFrame.from_dict(data)

In [None]:
df.columns

In [None]:
from uk_covid19 import Cov19API

In [None]:
from tqdm import tqdm 

df = pd.DataFrame()
for area in tqdm(AreaTypeEnum):
    api = Cov19API(filters=[f"areaType={area.value}"], structure=GovUKCoronavirusData.DEFAULT_QUERY_STRUCTURE)
    df = pd.concat([df, api.get_dataframe()], sort=False)

df.areaName.unique()

In [None]:
%matplotlib inline

In [None]:
df[df.covidOccupiedMVBeds.fillna(0) > 0].covidOccupiedMVBeds.iloc[:200].plot.bar()

In [None]:
df.sample(50)

In [None]:
api = Cov19API(filters=["areaType=nation"], structure=GovUKCoronavirusData.DEFAULT_QUERY_STRUCTURE)

In [None]:
gov_df = api.get_dataframe()

In [None]:
gov_df

In [None]:
https://api.coronavirus.data.gov.uk/v1/data?filters=areaName=United%2520Kingdom;areaType=overview&structure=%7B%22areaType%22:%22areaType%22,%22areaName%22:%22areaName%22,%22areaCode%22:%22areaCode%22,%22date%22:%22date%22,%22newPillarOneTestsByPublishDate%22:%22newPillarOneTestsByPublishDate%22,%22newPillarTwoTestsByPublishDate%22:%22newPillarTwoTestsByPublishDate%22,%22newPillarThreeTestsByPublishDate%22:%22newPillarThreeTestsByPublishDate%22,%22newPillarFourTestsByPublishDate%22:%22newPillarFourTestsByPublishDate%22,%22newTestsByPublishDate%22:%22newTestsByPublishDate%22,%22cumPillarOneTestsByPublishDate%22:%22cumPillarOneTestsByPublishDate%22,%22cumPillarTwoTestsByPublishDate%22:%22cumPillarTwoTestsByPublishDate%22,%22cumPillarThreeTestsByPublishDate%22:%22cumPillarThreeTestsByPublishDate%22,%22cumPillarFourTestsByPublishDate%22:%22cumPillarFourTestsByPublishDate%22,%22cumTestsByPublishDate%22:%22cumTestsByPublishDate%22%7D&format=json

In [None]:
df[df.areaType == 'utla']