In [1]:
# Import required libraries
import json
import requests
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import clear_output
import ipywidgets as wdg

In [2]:
%matplotlib inline
# make figures larger
plt.rcParams['figure.dpi'] = 100

In [3]:
# Load initial Json data from disk
jsondata = {}
with open("hospital_admissions.json", "rt") as INFILE:
    jsondata["hospital_admissions"] = json.load(INFILE)
with open("positive_testings.json", "rt") as INFILE:
    jsondata["positive_testings"] = json.load(INFILE)

In [4]:
# Wrangle the data
def wrangle_data(rawdata):
    """
    Processes raw JSON data into a cleaned and structured pandas DataFrame.

    Parameters:
        rawdata (dict): Raw JSON data containing 'hospital_admissions' and 'positive_testings'.

    Returns:
        pandas.DataFrame: Cleaned DataFrame with columns ['hospital_admissions', 'positive_testings'],
                          indexed by weekly date (start of the week).
    """
    # Step 1: Organize raw data into a dictionary structure keyed by date
    data = {}
    for dataset in [rawdata["hospital_admissions"], rawdata["positive_testings"]]:
        for entry in dataset:
            date = entry['date']  
            metric = entry['metric']  
            value = entry['metric_value']  
            
            if date not in data:
                data[date] = {}  # Initialize nested dictionary for each date
            
            # Append values to a list for each metric to handle multiple entries per date
            if metric not in data[date]:
                data[date][metric] = []
            data[date][metric].append(value)

    # Step 2: Compute the average value for each metric on each date
    # Some dates may have multiple entries for the same metric (e.g., multiple positive_testing values).
    # To ensure data consistency, we calculate the average value for each metric on a given date.
    averaged_data = {}
    for date, metrics in data.items():
        averaged_data[date] = {}
        for metric, values in metrics.items():
            # Compute the mean value for all entries of the same metric on the same date
            averaged_data[date][metric] = sum(values) / len(values)

    # Step 3: Create a pandas DataFrame with a datetime index
    # Initialize an empty DataFrame with the expected columns and date-based index
    dates = sorted(averaged_data.keys())  # Sort dates for consistent indexing
    index = pd.to_datetime(dates)  
    df = pd.DataFrame(index=index, columns=['hospital_admissions', 'positive_testings'])

    # Step 4: Map metrics to DataFrame columns and populate with values
    metrics = {
        'hospital_admissions': 'RSV_healthcare_admissionRateByWeek',
        'positive_testings': 'RSV_testing_positivityByWeek'
    }
    for date, entry in averaged_data.items():
        for column, metric_name in metrics.items():
            # Populate the DataFrame with values, using None for missing metrics
            value = entry.get(metric_name, None)
            df.loc[date, column] = value

    # Step 5: Ensure the index reflects weekly frequency, starting on the week's start day
    df.index = pd.to_datetime(df.index).to_period('W').start_time

    # Step 6: Align data columns by finding the latest available start date
    # This step ensures all columns have data available from the same starting date.
    first_non_na_dates = []
    for column in df.columns:
        # Find the earliest date with non-NaN values for each column
        non_na_dates = df.index[~df[column].isna()]
        if not non_na_dates.empty:
            first_non_na_dates.append(non_na_dates.min())

    # Determine the latest start date across all columns to synchronize data availability
    if first_non_na_dates:
        global_start_date = max(first_non_na_dates)
        df = df[df.index >= global_start_date]  # Filter the DataFrame to start from this date

    # Step 7: Fill missing values with 0 to handle gaps in data
    # After filtering, any remaining NaN values are replaced with 0 for consistency
    df.fillna(0.0, inplace=True)

    return df

# Global variable to hold the processed DataFrame
global df
df = wrangle_data(jsondata)

  df.fillna(0.0, inplace=True)


In [5]:
# Download current data
def access_api():
    base_url = "https://api.ukhsa-dashboard.data.gov.uk/themes/infectious_disease/sub_themes/respiratory/topics/RSV/geography_types/Nation/geographies/England/metrics"
    
    metrics = {
        'hospital_admissions': 'RSV_healthcare_admissionRateByWeek',
        'positive_testings': 'RSV_testing_positivityByWeek'
    }
    
    data = {}
    try:
        for key, metric in metrics.items():
            url = f"{base_url}/{metric}"
            params = {'page_size': 100, 'page': 1}  # Start pagination with page 1
            all_results = []
            
            while True:
                response = requests.get(url, params=params)
                if response.status_code != 200:
                    print(f"Error fetching {key}: Status {response.status_code}")
                    break
                
                result = response.json()
                if 'results' in result:
                    all_results.extend(result['results'])
                    if 'next' in result and result['next']:
                        # Move to the next page
                        params['page'] += 1
                    else:
                        break
                else:
                    break
            
            data[key] = all_results
            print(f"Successfully retrieved {len(data[key])} records for {key}")
        
        if all(key in data for key in metrics.keys()):
            print("\nAPI data fetch completed successfully")
            return data
        else:
            print("\nSome data missing, falling back to local data")
            return None
            
    except Exception as e:
        print(f"Error fetching data: {e}")
        return None

In [6]:
# Button callback function
def api_button_callback(button):
    global df
    try:
        # Set button to loading state
        button.description = "Loading..."
        button.icon = "spinner"
        button.button_style = ""
        
        # Fetch fresh data from API
        apidata = access_api()
        
        df = wrangle_data(apidata)
        
        # Force a redraw of the graph
        refresh_graph()
        
        # Update button to success state
        button.description = "Data Updated"
        button.icon = "check"
        button.button_style = "success"
    except Exception as e:
        print(f"Error or fallback: {e}")
        
        # Fallback to local data
        df = wrangle_data(jsondata)
        
        # Update button to warning state
        button.description = "Using Local Data"
        button.icon = "warning"
        button.button_style = "warning"

# Create refresh button
apibutton = wdg.Button(
    description='Refresh Data',
    disabled=False,
    button_style='info',
    tooltip="Click to download current data",
    icon='sync'
)

apibutton.on_click(api_button_callback)
display(apibutton)

Button(button_style='info', description='Refresh Data', icon='sync', style=ButtonStyle(), tooltip='Click to do…

In [15]:
# Graphs and Analysis
def plot_rsv(walk):
    """
    Plots RSV (Respiratory Syncytial Virus) metrics based on the input column name.

    Parameters:
        walk (str): Column name to be plotted. If None, default to plotting 'hospital_admissions' 
                    and 'positive_testings'. 
    """
    try:
        if walk is None:
            # If no specific column is provided, plot default metrics: 'hospital_admissions' and 'positive_testings'
            df[['hospital_admissions', 'positive_testings']].plot(linewidth=1)
            # Set the plot title and y-axis label for the default case
            plt.title("England Weekly RSV Metrics: Hospital Admissions and Positive Testing Rates")
            plt.ylabel("Rate")
        elif walk in df.columns:
            # Check if the provided column name exists in the DataFrame
            df[[walk]].plot(linewidth=1)  # Plot the specified column
            metric_name = walk.replace('_', ' ').title()  # Format the column name for display in the title
            plt.title(f"England Weekly RSV Metrics: {metric_name}")  
            # Set a specific y-axis label based on the selected column
            if walk == 'hospital_admissions':
                plt.ylabel("Hospital Admission Rate per 100,000")
            elif walk == 'positive_testings':
                plt.ylabel("Positive Testing Rate (%)")
        else:
            # If the provided column name is not valid, print an error message
            print(f"Error: '{walk}' is not a valid column. Available columns are: {list(df.columns)}")
            return  # Exit the function early to avoid further errors

        # Set x-axis label and customize the grid and layout for better readability
        plt.xlabel("Week Ending Date")
        plt.grid(True, alpha=0.3)  # Add a light grid for better visual clarity
        plt.xticks(rotation=45)  # Rotate x-axis labels for better visibility
        plt.tight_layout()  # Adjust layout to prevent overlapping of elements
        plt.show()  # Display the plot
    except Exception as e:
        # Handle unexpected errors and print an error message
        print(f"Plotting Error: {e}")


# Dropdown widget with an option to display both initially
whichwalk = wdg.Dropdown(
    options=[
        ('Both (initial view)', None),
        ('Hospital Admissions (weekly)', 'hospital_admissions'),
        ('Positive Testings (weekly)', 'positive_testings')
    ],
    value=None,  # Initial value is `None`, showing both metrics
    description='Options:',
    disabled=False,
)

In [16]:
# Function to update the graph
def refresh_graph():
    valid_columns = [None] + list(df.columns)  # Allow `None` for initial dual-display
    if whichwalk.value not in valid_columns:
        print(f"Invalid column '{whichwalk.value}'. Valid options: {valid_columns}")
    current = whichwalk.value
    other = None if current != 'hospital_admissions' else 'positive_testings'
    whichwalk.value = other  # Force redraw by toggling values
    whichwalk.value = current

# Connect the plotting function and the widget
graph = wdg.interactive_output(plot_rsv, {'walk': whichwalk})

# Display the dropdown and graph
display(whichwalk, graph)

Dropdown(description='Options:', options=(('Both (initial view)', None), ('Hospital Admissions (weekly)', 'hos…

Output()

1. Positive Testing Rate：The percentage of all respiratory syncytial virus (RSV) PCR tests conducted in the 7 days up to and including the specified date that returned a positive result. 

2. Hospital Admission Rate：The number of individuals per 100,000 population admitted to the hospital with confirmed RSV infection in the 7 days up to and including the specified date. 
