In [1]:
import urllib3
import pandas as pd
import polars as pl
import json as j

In [2]:
def get_ukhsa_data(theme, sub_theme, topic, geography_type, geography, metric, 
                   stratum=None, age=None, sex=None, year=None, month=None, 
                   epiweek=None, date=None, in_reporting_delay_period=None):
    
    http = urllib3.PoolManager()

    base_url = 'https://api.ukhsa-dashboard.data.gov.uk/themes/{theme}/sub_themes/{sub_theme}/topics/{topic}/geography_types/{geography_type}/geographies/{geography}/metrics/{metric}'

    page = 1 # start with the first page of data pagination

    request_url = base_url.format(
        theme=theme,
        sub_theme=sub_theme,
        topic=topic,
        geography_type=geography_type,
        geography=geography,
        metric=metric
    )

    optional_params = {
        'stratum': stratum,
        'age': age,
        'sex': sex,
        'year': year,
        'month': month,
        'epiweek': epiweek,
        'date': date,
        'in_reporting_delay_period': in_reporting_delay_period,
        'page_size' : 365,       # take it to the max. It is 5 records per page by default.
        'page' : page
    }

    query_params = {param_name: param_value for param_name, param_value in optional_params.items() if param_value is not None}

    while True:

        response = http.request(
            'GET',
            request_url,
            fields=query_params
        )

        data = j.loads(response.data)
        results_json = data.get('results',[])

        if not results_json:
            break

        for json in results_json:
            yield {
                'theme': json['theme'],
                'sub_theme': json['sub_theme'],
                'topic': json['topic'],
                'geography_type': json['geography_type'],
                'geography': json['geography'],
                'geography_code': json['geography_code'],
                'metric': json['metric'],
                'metric_group': json['metric_group'],
                'stratum': json['stratum'],
                'sex': json['sex'],
                'age': json['age'],
                'year': json['year'],
                'month': json['month'],
                'epiweek': json['epiweek'],
                'date': json['date'],
                'metric_value': json['metric_value'],
                'in_reporting_delay_period': json['in_reporting_delay_period']
            }
        page += 1
        query_params['page'] = page

data = get_ukhsa_data('infectious_disease','respiratory','COVID-19','Nation','England','COVID-19_testing_PCRcountByDay')

all_data = [page for page in data]

df = pd.DataFrame(all_data)


In [3]:
df

Unnamed: 0,theme,sub_theme,topic,geography_type,geography,geography_code,metric,metric_group,stratum,sex,age,year,month,epiweek,date,metric_value,in_reporting_delay_period
0,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2020,2,6,2020-02-08,535.0,False
1,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2020,2,6,2020-02-09,798.0,False
2,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2020,2,7,2020-02-10,1170.0,False
3,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2020,2,7,2020-02-11,1572.0,False
4,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2020,2,7,2020-02-12,2068.0,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2060,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2025,9,40,2025-09-29,21007.0,False
2061,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2025,9,40,2025-09-30,21027.0,False
2062,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2025,10,40,2025-10-01,19773.0,False
2063,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2025,10,40,2025-10-02,17250.0,False


In [4]:
data_2023 = get_ukhsa_data('infectious_disease','respiratory','COVID-19','Nation','England','COVID-19_testing_PCRcountByDay',year='2023')

all_2023_data = []

for page in data_2023: # could swap with list comp above
    all_2023_data.append(page)

df_2023 = pd.DataFrame(all_2023_data)

In [5]:
df_2023

Unnamed: 0,theme,sub_theme,topic,geography_type,geography,geography_code,metric,metric_group,stratum,sex,age,year,month,epiweek,date,metric_value,in_reporting_delay_period
0,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2023,1,52,2023-01-01,148569.0,False
1,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2023,1,1,2023-01-02,150798.0,False
2,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2023,1,1,2023-01-03,158654.0,False
3,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2023,1,1,2023-01-04,155653.0,False
4,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2023,1,1,2023-01-05,151557.0,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
360,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2023,12,52,2023-12-27,36273.0,False
361,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2023,12,52,2023-12-28,37183.0,False
362,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2023,12,52,2023-12-29,38217.0,False
363,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2023,12,52,2023-12-30,39055.0,False


Just want to check it's going onto the next page. 2024 was a leap year, so 366 days, one day more than a max page.

In [6]:
data_2024 = get_ukhsa_data('infectious_disease','respiratory','COVID-19','Nation','England','COVID-19_testing_PCRcountByDay',sex='all')

all_2024_data = []

for page in data_2024: # could swap with list comp above
    all_2024_data.append(page)

df_2024 = pd.DataFrame(all_2024_data)

In [7]:
df_2024

Unnamed: 0,theme,sub_theme,topic,geography_type,geography,geography_code,metric,metric_group,stratum,sex,age,year,month,epiweek,date,metric_value,in_reporting_delay_period
0,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2020,2,6,2020-02-08,535.0,False
1,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2020,2,6,2020-02-09,798.0,False
2,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2020,2,7,2020-02-10,1170.0,False
3,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2020,2,7,2020-02-11,1572.0,False
4,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2020,2,7,2020-02-12,2068.0,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2060,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2025,9,40,2025-09-29,21007.0,False
2061,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2025,9,40,2025-09-30,21027.0,False
2062,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2025,10,40,2025-10-01,19773.0,False
2063,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2025,10,40,2025-10-02,17250.0,False


It would be good to have methods or access attributes that give a list of the options at each level. Maybe even some representation of the whole tree (though that might get too big).

In [8]:
http2 = urllib3.PoolManager()

url = 'https://api.ukhsa-dashboard.data.gov.uk/themes'

response = http2.request('GET',url)

data2 = j.loads(response.data)

df2 = pd.DataFrame(data2)


In [9]:
df2

Unnamed: 0,name,link
0,climate_and_environment,https://api.ukhsa-dashboard.data.gov.uk/themes...
1,immunisation,https://api.ukhsa-dashboard.data.gov.uk/themes...
2,infectious_disease,https://api.ukhsa-dashboard.data.gov.uk/themes...


In [10]:
http_sub_themes = urllib3.PoolManager()

url_sub_themes = 'https://api.ukhsa-dashboard.data.gov.uk/themes/climate_and_environment/sub_themes'

response_sub_themes = http_sub_themes.request('GET',url_sub_themes)

data_sub_themes = j.loads(response_sub_themes.data)

df_sub_themes = pd.DataFrame(data_sub_themes)

In [11]:
df_sub_themes

Unnamed: 0,name,link
0,chemical_exposure,https://api.ukhsa-dashboard.data.gov.uk/themes...
1,seasonal_environmental,https://api.ukhsa-dashboard.data.gov.uk/themes...
2,vectors,https://api.ukhsa-dashboard.data.gov.uk/themes...


In [12]:
http_topics = urllib3.PoolManager()

url_topics = 'https://api.ukhsa-dashboard.data.gov.uk/themes/climate_and_environment/sub_themes/seasonal_environmental/topics'

response_topics = http_topics.request('GET',url_topics)

data_topics = j.loads(response_topics.data)

df_topics = pd.DataFrame(data_topics)

In [13]:
df_topics

Unnamed: 0,name,link
0,heat-or-sunburn,https://api.ukhsa-dashboard.data.gov.uk/themes...
1,heat-or-sunstroke,https://api.ukhsa-dashboard.data.gov.uk/themes...


In [14]:
http_geo = urllib3.PoolManager()

url_geo = 'https://api.ukhsa-dashboard.data.gov.uk/themes/climate_and_environment/sub_themes/seasonal_environmental/topics/heat-or-sunstroke/geography_types'

response_geo = http_geo.request('GET',url_geo)

data_geo = j.loads(response_geo.data)

df_geo = pd.DataFrame(data_geo)

In [15]:
df_geo

Unnamed: 0,name,link
0,Nation,https://api.ukhsa-dashboard.data.gov.uk/themes...
1,UKHSA Region,https://api.ukhsa-dashboard.data.gov.uk/themes...


In [16]:
http_geos = urllib3.PoolManager()

url_geos = 'https://api.ukhsa-dashboard.data.gov.uk/themes/climate_and_environment/sub_themes/seasonal_environmental/topics/heat-or-sunstroke/geography_types/UKHSA%20Region/geographies'

response_geos = http_geos.request('GET',url_geos)

data_geos = j.loads(response_geos.data)

df_geos = pd.DataFrame(data_geos)

In [17]:
df_geos

Unnamed: 0,name,link
0,East Midlands,https://api.ukhsa-dashboard.data.gov.uk/themes...
1,East of England,https://api.ukhsa-dashboard.data.gov.uk/themes...
2,London,https://api.ukhsa-dashboard.data.gov.uk/themes...
3,North East,https://api.ukhsa-dashboard.data.gov.uk/themes...
4,North West,https://api.ukhsa-dashboard.data.gov.uk/themes...
5,South East,https://api.ukhsa-dashboard.data.gov.uk/themes...
6,South West,https://api.ukhsa-dashboard.data.gov.uk/themes...
7,West Midlands,https://api.ukhsa-dashboard.data.gov.uk/themes...
8,Yorkshire and Humber,https://api.ukhsa-dashboard.data.gov.uk/themes...


In [18]:
http_met = urllib3.PoolManager()

url_met = 'https://api.ukhsa-dashboard.data.gov.uk/themes/climate_and_environment/sub_themes/seasonal_environmental/topics/heat-or-sunstroke/geography_types/UKHSA%20Region/geographies/South%20East/metrics'

response_met = http_met.request('GET',url_met)

data_met = j.loads(response_met.data)

df_met = pd.DataFrame(data_met)

Would be nice for this one to return both the "name" and a legible version.

In [19]:
df_met

Unnamed: 0,name,link
0,heat-or-sunstroke_syndromic_emergencyDepartmen...,https://api.ukhsa-dashboard.data.gov.uk/themes...
1,heat-or-sunstroke_syndromic_emergencyDepartmen...,https://api.ukhsa-dashboard.data.gov.uk/themes...
2,heat-or-sunstroke_syndromic_emergencyDepartmen...,https://api.ukhsa-dashboard.data.gov.uk/themes...
3,heat-or-sunstroke_syndromic_GPInHours_averageR...,https://api.ukhsa-dashboard.data.gov.uk/themes...
4,heat-or-sunstroke_syndromic_GPInHours_baseline...,https://api.ukhsa-dashboard.data.gov.uk/themes...
5,heat-or-sunstroke_syndromic_GPInHours_rateByDay,https://api.ukhsa-dashboard.data.gov.uk/themes...


- Want to include some exception handling messages for when there is a bad response or empty data.
- Would be nice to have an options tree printout as a kind of help attribute. Could work out how to build one from `treelib`
- "epiweek" is the Epidemiological Week, which runs from Sunday to Saturday and the first one of the year is on or after the 4th January.
- "sex" can be m, f or all.
- "date" refers to a specific yyyy-mm-dd date
- "age" I'm not sure about, because it mentions age bands but also suggests that age can be filtered using whatever range you like.