In [1]:
import urllib3
import pandas as pd
import polars as pl
import json as j

In [None]:
class UKHSA:
    base_url = 'https://api.ukhsa-dashboard.data.gov.uk/'

    page_size = 365

    def __init__(self):
        pass

    # This is being kept separate since I want it to be reusable so that it can be used to build 
    # queries in different ways to "_get_data()"    
    def _get_params(theme, sub_theme, topic, geography_type, geography, metric, 
                        stratum=None, age=None, sex=None, year=None, month=None, 
                        epiweek=None, date=None, in_reporting_delay_period=None):

        base_url = UKHSA.base_url

        mandatory_url_template = 'themes/{theme}/sub_themes/{sub_theme}/topics/{topic}/geography_types/{geography_type}/geographies/{geography}/metrics/{metric}'

        mandatory_url = mandatory_url_template.format(
        theme=theme,
        sub_theme=sub_theme,
        topic=topic,
        geography_type=geography_type,
        geography=geography,
        metric=metric
        )

        mandatory_url_complete = base_url + mandatory_url

        optional_params = {
            'stratum': stratum,
            'age': age,
            'sex': sex,
            'year': year,
            'month': month,
            'epiweek': epiweek,
            'date': date,
            'in_reporting_delay_period': in_reporting_delay_period,
            'page_size' : 365,       # take it to the max. It is 5 records per page by default.
            'page' : 1               # start on first page of paginated data.
        }

        optional_params_consolidated = {param_name: param_value for param_name, param_value in optional_params.items() if param_value is not None}

        return mandatory_url_complete, optional_params_consolidated
    
    def _get_data(theme, sub_theme, topic, geography_type, geography, metric,
                 stratum=None, age=None, sex=None, year=None, month=None,epiweek=None,
                 date=None, in_reporting_delay_period=None):
        
        mandatory_url_complete, optional_params_consolidated = UKHSA._get_params(theme, sub_theme, topic, geography_type, 
                                        geography, metric, stratum, age, sex, year, 
                                        month, epiweek,date,in_reporting_delay_period
                                        )
        
        http = urllib3.PoolManager()

        page = optional_params_consolidated['page']
        
        while True:

            response = http.request(
            'GET',
            mandatory_url_complete,
            fields=optional_params_consolidated
            )
            
            data = j.loads(response.data)
            results_json = data.get('results',[])

            if not results_json:
                break

            for json in results_json:
                yield {
                    'theme': json['theme'],
                    'sub_theme': json['sub_theme'],
                    'topic': json['topic'],
                    'geography_type': json['geography_type'],
                    'geography': json['geography'],
                    'geography_code': json['geography_code'],
                    'metric': json['metric'],
                    'metric_group': json['metric_group'],
                    'stratum': json['stratum'],
                    'sex': json['sex'],
                    'age': json['age'],
                    'year': json['year'],
                    'month': json['month'],
                    'epiweek': json['epiweek'],
                    'date': json['date'],
                    'metric_value': json['metric_value'],
                    'in_reporting_delay_period': json['in_reporting_delay_period']
                }
            page += 1
            optional_params_consolidated['page'] = page

    def dataframe(theme, sub_theme, topic, geography_type, geography, metric,
                       stratum=None, age=None, sex=None, year=None, month=None,epiweek=None,
                       date=None, in_reporting_delay_period=None,type=None):
        
        data = UKHSA._get_data(theme, sub_theme, topic, geography_type, 
                              geography, metric, stratum, age, sex, year,
                              month, epiweek,date,in_reporting_delay_period
                              )
        
        consolidated_data = [page for page in data]

        # is this the best way to do this?
        if type == 'pandas':
            df = pd.DataFrame(consolidated_data)
        elif type == 'polars':
            df = pl.DataFrame(consolidated_data)
        else:
            df = pd.DataFrame(consolidated_data)

        return df
    

In [14]:
test_pd = UKHSA.dataframe('infectious_disease','respiratory','COVID-19','Nation','England','COVID-19_testing_PCRcountByDay')

test_pd

Unnamed: 0,theme,sub_theme,topic,geography_type,geography,geography_code,metric,metric_group,stratum,sex,age,year,month,epiweek,date,metric_value,in_reporting_delay_period
0,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2020,2,6,2020-02-08,535.0,False
1,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2020,2,6,2020-02-09,798.0,False
2,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2020,2,7,2020-02-10,1170.0,False
3,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2020,2,7,2020-02-11,1572.0,False
4,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2020,2,7,2020-02-12,2068.0,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2060,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2025,9,40,2025-09-29,21007.0,False
2061,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2025,9,40,2025-09-30,21027.0,False
2062,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2025,10,40,2025-10-01,19773.0,False
2063,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2025,10,40,2025-10-02,17250.0,False


In [11]:
test_pl = UKHSA.dataframe('infectious_disease','respiratory','COVID-19','Nation','England','COVID-19_testing_PCRcountByDay',type='polars')

test_pl

theme,sub_theme,topic,geography_type,geography,geography_code,metric,metric_group,stratum,sex,age,year,month,epiweek,date,metric_value,in_reporting_delay_period
str,str,str,str,str,str,str,str,str,str,str,i64,i64,i64,str,f64,bool
"""infectious_disease""","""respiratory""","""COVID-19""","""Nation""","""England""","""E92000001""","""COVID-19_testing_PCRcountByDay""","""testing""","""default""","""all""","""all""",2020,2,6,"""2020-02-08""",535.0,false
"""infectious_disease""","""respiratory""","""COVID-19""","""Nation""","""England""","""E92000001""","""COVID-19_testing_PCRcountByDay""","""testing""","""default""","""all""","""all""",2020,2,6,"""2020-02-09""",798.0,false
"""infectious_disease""","""respiratory""","""COVID-19""","""Nation""","""England""","""E92000001""","""COVID-19_testing_PCRcountByDay""","""testing""","""default""","""all""","""all""",2020,2,7,"""2020-02-10""",1170.0,false
"""infectious_disease""","""respiratory""","""COVID-19""","""Nation""","""England""","""E92000001""","""COVID-19_testing_PCRcountByDay""","""testing""","""default""","""all""","""all""",2020,2,7,"""2020-02-11""",1572.0,false
"""infectious_disease""","""respiratory""","""COVID-19""","""Nation""","""England""","""E92000001""","""COVID-19_testing_PCRcountByDay""","""testing""","""default""","""all""","""all""",2020,2,7,"""2020-02-12""",2068.0,false
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""infectious_disease""","""respiratory""","""COVID-19""","""Nation""","""England""","""E92000001""","""COVID-19_testing_PCRcountByDay""","""testing""","""default""","""all""","""all""",2025,9,40,"""2025-09-29""",21007.0,false
"""infectious_disease""","""respiratory""","""COVID-19""","""Nation""","""England""","""E92000001""","""COVID-19_testing_PCRcountByDay""","""testing""","""default""","""all""","""all""",2025,9,40,"""2025-09-30""",21027.0,false
"""infectious_disease""","""respiratory""","""COVID-19""","""Nation""","""England""","""E92000001""","""COVID-19_testing_PCRcountByDay""","""testing""","""default""","""all""","""all""",2025,10,40,"""2025-10-01""",19773.0,false
"""infectious_disease""","""respiratory""","""COVID-19""","""Nation""","""England""","""E92000001""","""COVID-19_testing_PCRcountByDay""","""testing""","""default""","""all""","""all""",2025,10,40,"""2025-10-02""",17250.0,false


In [12]:
test_pd2 = UKHSA.dataframe('infectious_disease','respiratory','COVID-19','Nation','England','COVID-19_testing_PCRcountByDay',type='pandas')

test_pd2

Unnamed: 0,theme,sub_theme,topic,geography_type,geography,geography_code,metric,metric_group,stratum,sex,age,year,month,epiweek,date,metric_value,in_reporting_delay_period
0,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2020,2,6,2020-02-08,535.0,False
1,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2020,2,6,2020-02-09,798.0,False
2,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2020,2,7,2020-02-10,1170.0,False
3,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2020,2,7,2020-02-11,1572.0,False
4,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2020,2,7,2020-02-12,2068.0,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2060,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2025,9,40,2025-09-29,21007.0,False
2061,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2025,9,40,2025-09-30,21027.0,False
2062,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2025,10,40,2025-10-01,19773.0,False
2063,infectious_disease,respiratory,COVID-19,Nation,England,E92000001,COVID-19_testing_PCRcountByDay,testing,default,all,all,2025,10,40,2025-10-02,17250.0,False
