In [1]:
import urllib3
import pandas as pd
import polars as pl
import json as j

In [2]:
class UKHSA:
    base_url = 'https://api.ukhsa-dashboard.data.gov.uk/'

    page_size = 365

    def __init__(self):
        pass

    # This is being kept separate since I want it to be reusable so that it can be used to build 
    # queries in different ways to "_get_data()"    
    def _get_params(theme, sub_theme, topic, geography_type, geography, metric, 
                        stratum=None, age=None, sex=None, year=None, month=None, 
                        epiweek=None, date=None, in_reporting_delay_period=None):

        base_url = UKHSA.base_url

        mandatory_url_template = 'themes/{theme}/sub_themes/{sub_theme}/topics/{topic}/geography_types/{geography_type}/geographies/{geography}/metrics/{metric}'

        mandatory_url = mandatory_url_template.format(
        theme=theme,
        sub_theme=sub_theme,
        topic=topic,
        geography_type=geography_type,
        geography=geography,
        metric=metric
        )

        mandatory_url_complete = base_url + mandatory_url

        optional_params = {
            'stratum': stratum,
            'age': age,
            'sex': sex,
            'year': year,
            'month': month,
            'epiweek': epiweek,
            'date': date,
            'in_reporting_delay_period': in_reporting_delay_period,
            'page_size' : 365,       # take it to the max. It is 5 records per page by default.
            'page' : 1               # start on first page of paginated data.
        }

        optional_params_consolidated = {param_name: param_value for param_name, param_value in optional_params.items() if param_value is not None}

        return mandatory_url_complete, optional_params_consolidated
    
    def _get_data(theme, sub_theme, topic, geography_type, geography, metric,
                 stratum=None, age=None, sex=None, year=None, month=None,epiweek=None,
                 date=None, in_reporting_delay_period=None):
        
        mandatory_url_complete, optional_params_consolidated = UKHSA._get_params(theme, sub_theme, topic, geography_type, 
                                        geography, metric, stratum, age, sex, year, 
                                        month, epiweek,date,in_reporting_delay_period
                                        )
        
        http = urllib3.PoolManager()

        page = optional_params_consolidated['page']
        
        while True:

            response = http.request(
            'GET',
            mandatory_url_complete,
            fields=optional_params_consolidated
            )
            
            data = j.loads(response.data)
            results_json = data.get('results',[])

            if not results_json:
                break

            for json in results_json:
                yield {
                    'theme': json['theme'],
                    'sub_theme': json['sub_theme'],
                    'topic': json['topic'],
                    'geography_type': json['geography_type'],
                    'geography': json['geography'],
                    'geography_code': json['geography_code'],
                    'metric': json['metric'],
                    'metric_group': json['metric_group'],
                    'stratum': json['stratum'],
                    'sex': json['sex'],
                    'age': json['age'],
                    'year': json['year'],
                    'month': json['month'],
                    'epiweek': json['epiweek'],
                    'date': json['date'],
                    'metric_value': json['metric_value'],
                    'in_reporting_delay_period': json['in_reporting_delay_period']
                }
            page += 1
            optional_params_consolidated['page'] = page

    def dataframe(theme, sub_theme, topic, geography_type, geography, metric,
                       stratum=None, age=None, sex=None, year=None, month=None,epiweek=None,
                       date=None, in_reporting_delay_period=None,type=None):
        
        data = UKHSA._get_data(theme, sub_theme, topic, geography_type, 
                              geography, metric, stratum, age, sex, year,
                              month, epiweek,date,in_reporting_delay_period
                              )
        
        consolidated_data = [page for page in data]

        # is this the best way to do this?
        if type == 'pandas':
            df = pd.DataFrame(consolidated_data)
        elif type == 'polars':
            df = pl.DataFrame(consolidated_data)
        else:
            df = pd.DataFrame(consolidated_data)

        return df
    

In [3]:
test_pd = UKHSA.dataframe('infectious_disease','respiratory','COVID-19','Nation','England','COVID-19_testing_PCRcountByDay')

test_pd

Unnamed: 0,theme,sub_theme,topic,geography_type,geography,geography_code,metric,metric_group,stratum,sex,age,year,month,epiweek,date,metric_value,in_reporting_delay_period
0,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2020,2,6,2020-02-08,535.0,False
1,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2020,2,6,2020-02-09,798.0,False
2,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2020,2,7,2020-02-10,1170.0,False
3,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2020,2,7,2020-02-11,1572.0,False
4,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2020,2,7,2020-02-12,2068.0,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2074,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2025,10,42,2025-10-13,25194.0,False
2075,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2025,10,42,2025-10-14,24743.0,False
2076,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2025,10,42,2025-10-15,22783.0,False
2077,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2025,10,42,2025-10-16,19314.0,False


In [4]:
test_pl = UKHSA.dataframe('infectious_disease','respiratory','COVID-19','Nation','England','COVID-19_testing_PCRcountByDay',type='polars')

test_pl

theme,sub_theme,topic,geography_type,geography,geography_code,metric,metric_group,stratum,sex,age,year,month,epiweek,date,metric_value,in_reporting_delay_period
str,str,str,str,str,str,str,str,str,str,str,i64,i64,i64,str,f64,bool
"""infectious_disease""","""respiratory""","""COVID-19""","""Nation""","""England""","""E92000001""","""COVID-19_testing_PCRcountByDay""","""testing""","""default""","""all""","""all""",2020,2,6,"""2020-02-08""",535.0,false
"""infectious_disease""","""respiratory""","""COVID-19""","""Nation""","""England""","""E92000001""","""COVID-19_testing_PCRcountByDay""","""testing""","""default""","""all""","""all""",2020,2,6,"""2020-02-09""",798.0,false
"""infectious_disease""","""respiratory""","""COVID-19""","""Nation""","""England""","""E92000001""","""COVID-19_testing_PCRcountByDay""","""testing""","""default""","""all""","""all""",2020,2,7,"""2020-02-10""",1170.0,false
"""infectious_disease""","""respiratory""","""COVID-19""","""Nation""","""England""","""E92000001""","""COVID-19_testing_PCRcountByDay""","""testing""","""default""","""all""","""all""",2020,2,7,"""2020-02-11""",1572.0,false
"""infectious_disease""","""respiratory""","""COVID-19""","""Nation""","""England""","""E92000001""","""COVID-19_testing_PCRcountByDay""","""testing""","""default""","""all""","""all""",2020,2,7,"""2020-02-12""",2068.0,false
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""infectious_disease""","""respiratory""","""COVID-19""","""Nation""","""England""","""E92000001""","""COVID-19_testing_PCRcountByDay""","""testing""","""default""","""all""","""all""",2025,10,42,"""2025-10-13""",25194.0,false
"""infectious_disease""","""respiratory""","""COVID-19""","""Nation""","""England""","""E92000001""","""COVID-19_testing_PCRcountByDay""","""testing""","""default""","""all""","""all""",2025,10,42,"""2025-10-14""",24743.0,false
"""infectious_disease""","""respiratory""","""COVID-19""","""Nation""","""England""","""E92000001""","""COVID-19_testing_PCRcountByDay""","""testing""","""default""","""all""","""all""",2025,10,42,"""2025-10-15""",22783.0,false
"""infectious_disease""","""respiratory""","""COVID-19""","""Nation""","""England""","""E92000001""","""COVID-19_testing_PCRcountByDay""","""testing""","""default""","""all""","""all""",2025,10,42,"""2025-10-16""",19314.0,false


In [5]:
test_pd2 = UKHSA.dataframe('infectious_disease','respiratory','COVID-19','Nation','England','COVID-19_testing_PCRcountByDay',type='pandas')

test_pd2

Unnamed: 0,theme,sub_theme,topic,geography_type,geography,geography_code,metric,metric_group,stratum,sex,age,year,month,epiweek,date,metric_value,in_reporting_delay_period
0,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2020,2,6,2020-02-08,535.0,False
1,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2020,2,6,2020-02-09,798.0,False
2,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2020,2,7,2020-02-10,1170.0,False
3,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2020,2,7,2020-02-11,1572.0,False
4,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2020,2,7,2020-02-12,2068.0,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2074,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2025,10,42,2025-10-13,25194.0,False
2075,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2025,10,42,2025-10-14,24743.0,False
2076,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2025,10,42,2025-10-15,22783.0,False
2077,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2025,10,42,2025-10-16,19314.0,False


### Returning options for each level. Examples to see how that looks.

In [6]:
http2 = urllib3.PoolManager()

url = 'https://api.ukhsa-dashboard.data.gov.uk/themes'

response = http2.request('GET',url)

data2 = j.loads(response.data)

df2 = pd.DataFrame(data2)

In [7]:
df2

Unnamed: 0,name,link
0,climate_and_environment,https://api.ukhsa-dashboard.data.gov.uk/themes...
1,immunisation,https://api.ukhsa-dashboard.data.gov.uk/themes...
2,infectious_disease,https://api.ukhsa-dashboard.data.gov.uk/themes...


In [8]:
http_sub_themes = urllib3.PoolManager()

url_sub_themes = 'https://api.ukhsa-dashboard.data.gov.uk/themes/climate_and_environment/sub_themes'

response_sub_themes = http_sub_themes.request('GET',url_sub_themes)

data_sub_themes = j.loads(response_sub_themes.data)

df_sub_themes = pd.DataFrame(data_sub_themes)

In [9]:
df_sub_themes

Unnamed: 0,name,link
0,chemical_exposure,https://api.ukhsa-dashboard.data.gov.uk/themes...
1,seasonal_environmental,https://api.ukhsa-dashboard.data.gov.uk/themes...
2,vectors,https://api.ukhsa-dashboard.data.gov.uk/themes...


In [10]:
http_topics = urllib3.PoolManager()

url_topics = 'https://api.ukhsa-dashboard.data.gov.uk/themes/climate_and_environment/sub_themes/seasonal_environmental/topics'

response_topics = http_topics.request('GET',url_topics)

data_topics = j.loads(response_topics.data)

df_topics = pd.DataFrame(data_topics)

In [11]:
df_topics

Unnamed: 0,name,link
0,heat-or-sunburn,https://api.ukhsa-dashboard.data.gov.uk/themes...
1,heat-or-sunstroke,https://api.ukhsa-dashboard.data.gov.uk/themes...


In [12]:
http_geo = urllib3.PoolManager()

url_geo = 'https://api.ukhsa-dashboard.data.gov.uk/themes/climate_and_environment/sub_themes/seasonal_environmental/topics/heat-or-sunstroke/geography_types'

response_geo = http_geo.request('GET',url_geo)

data_geo = j.loads(response_geo.data)

df_geo = pd.DataFrame(data_geo)

In [13]:
df_geo

Unnamed: 0,name,link
0,Nation,https://api.ukhsa-dashboard.data.gov.uk/themes...
1,UKHSA Region,https://api.ukhsa-dashboard.data.gov.uk/themes...


In [14]:
http_geos = urllib3.PoolManager()

url_geos = 'https://api.ukhsa-dashboard.data.gov.uk/themes/climate_and_environment/sub_themes/seasonal_environmental/topics/heat-or-sunstroke/geography_types/UKHSA%20Region/geographies'

response_geos = http_geos.request('GET',url_geos)

data_geos = j.loads(response_geos.data)

df_geos = pd.DataFrame(data_geos)

In [15]:
df_geos

Unnamed: 0,name,link
0,East Midlands,https://api.ukhsa-dashboard.data.gov.uk/themes...
1,East of England,https://api.ukhsa-dashboard.data.gov.uk/themes...
2,London,https://api.ukhsa-dashboard.data.gov.uk/themes...
3,North East,https://api.ukhsa-dashboard.data.gov.uk/themes...
4,North West,https://api.ukhsa-dashboard.data.gov.uk/themes...
5,South East,https://api.ukhsa-dashboard.data.gov.uk/themes...
6,South West,https://api.ukhsa-dashboard.data.gov.uk/themes...
7,West Midlands,https://api.ukhsa-dashboard.data.gov.uk/themes...
8,Yorkshire and Humber,https://api.ukhsa-dashboard.data.gov.uk/themes...


In [16]:
http_met = urllib3.PoolManager()

url_met = 'https://api.ukhsa-dashboard.data.gov.uk/themes/climate_and_environment/sub_themes/seasonal_environmental/topics/heat-or-sunstroke/geography_types/UKHSA%20Region/geographies/South%20East/metrics'

response_met = http_met.request('GET',url_met)

data_met = j.loads(response_met.data)

df_met = pd.DataFrame(data_met)

In [17]:
df_met

Unnamed: 0,name,link
0,heat-or-sunstroke_syndromic_emergencyDepartmen...,https://api.ukhsa-dashboard.data.gov.uk/themes...
1,heat-or-sunstroke_syndromic_emergencyDepartmen...,https://api.ukhsa-dashboard.data.gov.uk/themes...
2,heat-or-sunstroke_syndromic_emergencyDepartmen...,https://api.ukhsa-dashboard.data.gov.uk/themes...
3,heat-or-sunstroke_syndromic_GPInHours_averageR...,https://api.ukhsa-dashboard.data.gov.uk/themes...
4,heat-or-sunstroke_syndromic_GPInHours_baseline...,https://api.ukhsa-dashboard.data.gov.uk/themes...
5,heat-or-sunstroke_syndromic_GPInHours_rateByDay,https://api.ukhsa-dashboard.data.gov.uk/themes...


Would be nice for this one to return both the "name" and a legible version.