In [7]:
import requests
import json
import time
import pandas as pd
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display, clear_output

"""data visualization dashboard to explore COVID-19 time-series data interactively"""

class APIwrapper:
    # class variables shared among all instances
    _access_point="https://api.ukhsa-dashboard.data.gov.uk"
    _last_access=0.0 # time of last api access
    
    def __init__(self, theme, sub_theme, topic, geography_type, geography, metric):
        """ Init the APIwrapper object, constructing the endpoint from the structure parameters """
        url_path=(f"/themes/{theme}/sub_themes/{sub_theme}/topics/{topic}/geography_types/" +
                  f"{geography_type}/geographies/{geography}/metrics/{metric}")
        # our starting API endpoint
        self._start_url=APIwrapper._access_point+url_path
        self._filters=None
        self._page_size=-1
        # will contain the number of items
        self.count=None

    def get_page(self, filters={}, page_size=5):
        """ Access the API and download the next page of data. """
        # Check page size is within range
        if page_size>365:
            raise ValueError("Max supported page size is 365")
        # restart from first page if page or filters have changed
        if filters!=self._filters or page_size!=self._page_size:
            self._filters=filters
            self._page_size=page_size
            self._next_url=self._start_url
        # signal the end of data condition
        if self._next_url==None: 
            return [] # we already fetched the last page
        # simple rate limiting to avoid bans
        curr_time=time.time() # Unix time: number of seconds since the Epoch
        deltat=curr_time-APIwrapper._last_access
        if deltat<0.33: # max 3 requests/second
            time.sleep(0.33-deltat)
        APIwrapper._last_access=curr_time
        # build parameter dictionary by removing all the None
        # values from filters and adding page_size
        parameters={x: y for x, y in filters.items() if y!=None}
        parameters['page_size']=page_size
        # the page parameter is already included in _next_url.
        response = requests.get(self._next_url, params=parameters).json()
        # update url so we'll fetch the next page
        self._next_url=response['next']
        self.count=response['count']
        # data are in the nested 'results' list
        return response['results'] 

    def get_all_pages(self, filters={}, page_size=365):
        """ Access the API and download all available data pages of data. """
        data=[] # build up all data here
        while True:
            # use get_page to do the job, including the pacing
            next_page=self.get_page(filters, page_size)
            if next_page==[]:
                break # we are done
            data.extend(next_page)
        return data


# Data Loading Function
def load_and_prepare_timeseries_data():
    """Load COVID-19 data and prepare DataFrame"""
    # Load JSON files
    with open("cases.json", "rt") as INFILE:
        cases = json.load(INFILE)
    with open("admissions.json", "rt") as INFILE:
        admissions = json.load(INFILE)
    with open("deaths.json", "rt") as INFILE:
        deaths = json.load(INFILE)
    with open("occupied_beds.json", "rt") as INFILE:
        occupied_beds = json.load(INFILE)
        
    # Data preparation dictionary
    data = {}
    metrics = {
        'cases': 'COVID-19_cases_casesByDay',
        'admissions': 'COVID-19_healthcare_admissionByDay',
        'deaths': 'COVID-19_deaths_ONSByDay',
        'occupied_beds': 'COVID-19_healthcare_occupiedBedsByDay'
    }
    
    # Wrangling the data
    # Extract data from each entry: date, metric type, and value
    for dataset in [cases, admissions, deaths, occupied_beds]:
        for entry in dataset:
            date = entry['date']
            metric = entry['metric']
            value = entry['metric_value']

            # Group data by date, storing metrics and their values
            if date not in data:
                data[date] = {}
            data[date][metric] = value
    
    # Sort dates
    dates = sorted(list(data.keys()))
    
    # Create DataFrame
    startdate = pd.to_datetime(dates[0])
    enddate = pd.to_datetime(dates[-1])
    
    index = pd.date_range(startdate, enddate, freq='D')
    timeseriesdf = pd.DataFrame(index=index, columns=['cases', 'admissions', 'deaths', 'occupied_beds'])
    
    # Fill DataFrame
    for date, entry in data.items():
        pd_date = pd.to_datetime(date)
        for column in ['cases', 'admissions', 'deaths', 'occupied_beds']:
            metric_name = metrics[column]
            value = entry.get(metric_name, 0.0)
            timeseriesdf.loc[date, column] = value
    
    timeseriesdf.fillna(0.0, inplace=True)
    return timeseriesdf


def create_dashboard():
    """Create an interactive dashboard for exploring COVID-19 time-series data."""
    # Load data
    timeseries_df = load_and_prepare_timeseries_data()
    
    # Select multiple metrics to display on the plot
    metric_multiselect = widgets.SelectMultiple(
        options=['cases', 'admissions', 'deaths', 'occupied_beds'],
        value=['cases', 'admissions', 'deaths', 'occupied_beds'],
        description='Metrics:',
        disabled=False,
        layout=widgets.Layout(width='300px')
    )

    # Widget: Scale selection (linear or logarithmic)
    scale_radio = widgets.RadioButtons(
        options=['linear', 'log'],
        value='linear',
        description='Scale:',
        disabled=False,
        layout=widgets.Layout(width='150px')
    )
    # Widget: Button to reset the metric selection
    reload_button = widgets.Button(
        description="Show all Metrics",
        button_style='primary', 
        tooltip='Reload and show all metrics',
        icon='refresh' 
    )

    # Widget: Button to fetch the latest data
    fetch_button = widgets.Button(
        description="Refresh ",
        button_style='info', 
        tooltip='Fetch the latest data',
        icon='cloud-download' 
    )
    # Static description area for metrics
    description_text = """
    - Cases: Daily reported COVID-19 cases in England
    - Admissions: Daily reported hospital admissions due to COVID-19
    - Deaths: Daily reported deaths attributed to COVID-19
    - Occupied Beds: Daily count of hospital beds occupied by COVID-19 patients
    All data from UK Health Security Agency
    """
    description_area = widgets.Textarea(
        value=description_text,
        description='Description:',
        disabled=True,
        layout=widgets.Layout(width='680px', height='150px')
    )
    
    output = widgets.Output()

    def plot_timeseries(df, log_scale=False, metrics=['cases', 'admissions','occupied_beds', 'deaths']):
        """Plot time series data with optional log scale"""
        plt.figure(figsize=(12, 6))
        
        if log_scale:
            for metric in metrics:
                plt.semilogy(df.index, df[metric], label=metric.capitalize())
            plt.title('Daily COVID-19 Metrics (Log Scale)')
        else:
            for metric in metrics:
                plt.plot(df.index, df[metric], label=metric.capitalize())
            plt.title('Daily COVID-19 Metrics')
        
        plt.xlabel('Date')
        plt.ylabel('Count')
        plt.legend()
        plt.xticks(rotation=45)
        plt.tight_layout()
        plt.show()

    def update_plot(change=None):
        with output:
            clear_output(wait=True)
            log_scale = (scale_radio.value == 'log')  # Log scale if log is selected
            plot_timeseries(
                timeseries_df, 
                log_scale=log_scale, 
                metrics=metric_multiselect.value
            )
    
    def reload_metrics(change=None):
        """ Reset the metric selection to all metrics """
        metric_multiselect.value = ['cases', 'admissions', 'deaths', 'occupied_beds']
        update_plot()
    
    
    def fetch_all_metrics(change=None):
        """Fetch the latest data for all metrics and update the DataFrame."""
        # Fetching new data for each metric
        metrics = ['cases', 'admissions', 'deaths', 'occupied_beds']
        
        # Define the API structure for each metric
        metric_structures = {
            "cases": {
                "theme": "infectious_disease", 
                "sub_theme": "respiratory",
                "topic": "COVID-19",
                "geography_type": "Nation", 
                "geography": "England",
                "metric": "COVID-19_cases_casesByDay"
            },
            "admissions": {
                "theme": "infectious_disease", 
                "sub_theme": "respiratory",
                "topic": "COVID-19",
                "geography_type": "Nation", 
                "geography": "England",
                "metric": "COVID-19_healthcare_admissionByDay"
            },
            "deaths": {
                "theme": "infectious_disease", 
                "sub_theme": "respiratory",
                "topic": "COVID-19",
                "geography_type": "Nation", 
                "geography": "England",
                "metric": "COVID-19_deaths_ONSByDay"
            },
            "occupied_beds": {
                "theme": "infectious_disease", 
                "sub_theme": "respiratory",
                "topic": "COVID-19",
                "geography_type": "Nation", 
                "geography": "England",
                "metric": "COVID-19_healthcare_occupiedBedsByDay"
            }
        }
        
        filters = {
            "stratum": None,
            "age": None,
            "sex": None,
            "year": None,
            "month": None,
            "epiweek": None,
            "date": None,
            "in_reporting_delay_period": None
        }
    
        # Loop through each metric to fetch and update the data
        for metric in metrics:
            structure = metric_structures[metric]
            api = APIwrapper(**structure)
            data = api.get_all_pages(filters)
            
            # Save the new data to the respective JSON file
            with open(f"{metric}.json", "wt") as OUTF:
                json.dump(data, OUTF)
            
            # Update the DataFrame with the new data for each metric
            for entry in data:
                date = pd.to_datetime(entry['date'])
                value = entry['metric_value']
                timeseries_df.loc[date, metric] = value
    
        # Fill missing values with 0.0
        timeseries_df.fillna(0.0, inplace=True)
        update_plot()  # Re-plot with the updated data

    # Update the button to fetch all metrics
    fetch_button.description = "Refresh All Metrics"
    fetch_button.on_click(fetch_all_metrics)
    
        
    # Observers
    metric_multiselect.observe(update_plot, names='value')
    scale_radio.observe(update_plot, names='value')
    reload_button.on_click(reload_metrics)
    
    # Initial plot
    with output:
        update_plot()
    
    # Layout
    dashboard = widgets.VBox([
        widgets.HBox([metric_multiselect, scale_radio, reload_button, fetch_button]),
        output, 
        description_area
    ])
    
    return dashboard

# Usage
if __name__ == '__main__':
    dashboard = create_dashboard()
    display(dashboard)

VBox(children=(HBox(children=(SelectMultiple(description='Metrics:', index=(0, 1, 2, 3), layout=Layout(width='…