# UKHSA respiratory viruses: RSV vs Influenza (England)

## Goal
Use UKHSA dashboard API data to compare **weekly testing positivity** for RSV and influenza in England.

## Data provenance
- Source: UKHSA dashboard API (`https://ukhsa-dashboard.data.gov.uk/access-our-data`)
- Log final dataset choice in `../docs/DATA_SOURCES.md`

## Notes
This notebook downloads public, aggregated data (no personal data).


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

from epidemiology_project.paths import raw_data_dir, figures_dir
from epidemiology_project.ukhsa import UksHaMetricQuery, fetch_metric


## Download data (UKHSA API)


In [None]:
rsv_q = UksHaMetricQuery(
    theme='infectious_disease',
    sub_theme='respiratory',
    topic='RSV',
    geography_type='Nation',
    geography='England',
    metric='RSV_testing_positivityByWeek',
)
flu_q = UksHaMetricQuery(
    theme='infectious_disease',
    sub_theme='respiratory',
    topic='Influenza',
    geography_type='Nation',
    geography='England',
    metric='influenza_testing_positivityByWeek',
)

df_rsv = fetch_metric(rsv_q)
df_flu = fetch_metric(flu_q)

df_rsv[['date','metric_value']].head()


## Save raw extracts (optional but recommended)


In [None]:
out_rsv = raw_data_dir() / 'ukhsa_rsv_testing_positivity_weekly_england.csv'
out_flu = raw_data_dir() / 'ukhsa_influenza_testing_positivity_weekly_england.csv'

df_rsv.to_csv(out_rsv, index=False)
df_flu.to_csv(out_flu, index=False)
out_rsv, out_flu


## Clean + align


In [None]:
rsv = df_rsv[['date', 'metric_value']].rename(columns={'metric_value': 'rsv_positivity'})
flu = df_flu[['date', 'metric_value']].rename(columns={'metric_value': 'flu_positivity'})

df = pd.merge(rsv, flu, on='date', how='outer').sort_values('date')
df.tail()


## Plot time series


In [None]:
fig, ax = plt.subplots(figsize=(11, 5))
ax.plot(df['date'], df['rsv_positivity'], label='RSV positivity (weekly)')
ax.plot(df['date'], df['flu_positivity'], label='Influenza positivity (weekly)')
ax.set_title('England: weekly testing positivity (UKHSA dashboard API)')
ax.set_xlabel('Date')
ax.set_ylabel('Positivity (%) or proportion (check units)')
ax.legend()
ax.grid(True, alpha=0.3)
fig.tight_layout()

fig_path = figures_dir(final=True) / 'ukhsa_rsv_vs_flu_positivity_england.png'
fig.savefig(fig_path, dpi=200)
fig_path


## Peak week per year (simple summary)


In [None]:
df['year'] = df['date'].dt.year

def peak_by_year(series_col: str):
    tmp = df.dropna(subset=[series_col]).copy()
    idx = tmp.groupby('year')[series_col].idxmax()
    return tmp.loc[idx, ['year', 'date', series_col]].sort_values('year')

peak_rsv = peak_by_year('rsv_positivity')
peak_flu = peak_by_year('flu_positivity')
peak_rsv.tail(), peak_flu.tail()
