### Acute Respiratory Infection Cases by Day

In [None]:
import pandas as pd
import numpy as np

# Load the acute respiratory infections data
ari_df = pd.read_csv('acute_respiratory_infections.csv')

In [None]:
#  Filter for countsByDay metric
ari_df = ari_df[ari_df['metric'] == 'acute-respiratory-infection_syndromic_emergencyDepartment_countsByDay']

In [None]:
#  Keep only relevant columns and rename metric_value
ari_df = ari_df[['date', 'metric_value']].rename(columns={'metric_value': 'acute_respiratory_infection_counts'})

In [None]:
#  Convert date to datetime and then to DD-MM-YYYY format
ari_df['date'] = pd.to_datetime(ari_df['date'])

ari_df.head()

Unnamed: 0,date,acute_respiratory_infection_counts
1,2025-06-08,149.0
3,2025-06-07,144.0
5,2025-06-06,137.0
8,2025-06-05,149.0
11,2025-06-04,144.0


In [32]:
# Check for missing values
ari_df[ari_df.isna().any(axis=1)]     # Show all rows with any missing values

Unnamed: 0,date,acute_respiratory_infection_counts


In [33]:
# Step 5: Fill missing ARI_counts with mean of previous 7 days
def fill_with_past_7_days_mean(row, df, column, window_days=7):
    if pd.isna(row[column]):
        start_date = row['date'] - pd.Timedelta(days=window_days)
        end_date = row['date'] - pd.Timedelta(days=1)
        past_data = df[(df['date'] >= start_date) & (df['date'] <= end_date)]
        mean_value = past_data[column].mean()
        return mean_value
    return row[column]

# Apply the function to fill missing ARI_counts
ari_df['acute_respiratory_infection_counts'] = ari_df.apply(lambda row: fill_with_past_7_days_mean(row, ari_df, 'acute_respiratory_infection_counts'), axis=1)

In [35]:
ari_df = ari_df.sort_values('date', ascending=True)
ari_df.head()

Unnamed: 0,date,acute_respiratory_infection_counts
1085,2024-06-09,176.0
1084,2024-06-10,166.0
1083,2024-06-11,155.0
1082,2024-06-12,157.0
1079,2024-06-13,137.0


In [36]:
ari_df.to_csv('acute_respiratory_illness_cd.csv', index=False)

### Acute Bronchiolitis Cases by Day

In [37]:
# Load the acute respiratory infections data
ab_df = pd.read_csv('acute_bronchiolitis.csv')
ab_df.head()

Unnamed: 0,theme,sub_theme,topic,geography_type,geography,metric,sex,age,stratum,year,date,metric_value,in_reporting_delay_period
0,infectious_disease,respiratory,acute-bronchiolitis,UKHSA Region,West Midlands,acute-bronchiolitis_syndromic_emergencyDepartm...,all,all,default,2025,2025-06-08,9.31,False
1,infectious_disease,respiratory,acute-bronchiolitis,UKHSA Region,West Midlands,acute-bronchiolitis_syndromic_emergencyDepartm...,all,all,default,2025,2025-06-08,8.0,False
2,infectious_disease,respiratory,acute-bronchiolitis,UKHSA Region,West Midlands,acute-bronchiolitis_syndromic_emergencyDepartm...,all,all,default,2025,2025-06-07,9.34,False
3,infectious_disease,respiratory,acute-bronchiolitis,UKHSA Region,West Midlands,acute-bronchiolitis_syndromic_emergencyDepartm...,all,all,default,2025,2025-06-07,5.0,False
4,infectious_disease,respiratory,acute-bronchiolitis,UKHSA Region,West Midlands,acute-bronchiolitis_syndromic_emergencyDepartm...,all,all,default,2025,2025-06-06,9.38,False


In [38]:
#  Filter for countsByDay metric
ab_df = ab_df[ab_df['metric'] == 'acute-bronchiolitis_syndromic_emergencyDepartment_countsByDay']

In [39]:
#  Keep only relevant columns and rename metric_value
ab_df = ab_df[['date', 'metric_value']].rename(columns={'metric_value': 'acute_bronchiolitis_syndromic_counts'})

In [40]:
#  Convert date to datetime and then to DD-MM-YYYY format
ab_df['date'] = pd.to_datetime(ab_df['date'])

ab_df.head()

Unnamed: 0,date,acute_bronchiolitis_syndromic_counts
1,2025-06-08,8.0
3,2025-06-07,5.0
5,2025-06-06,6.0
8,2025-06-05,9.0
11,2025-06-04,10.0


In [41]:
# Check for missing values
ab_df[ab_df.isna().any(axis=1)]     # Show all rows with any missing values

Unnamed: 0,date,acute_bronchiolitis_syndromic_counts


In [43]:
# Step 5: Fill missing ARI_counts with mean of previous 7 days
def fill_with_past_7_days_mean(row, df, column, window_days=7):
    if pd.isna(row[column]):
        start_date = row['date'] - pd.Timedelta(days=window_days)
        end_date = row['date'] - pd.Timedelta(days=1)
        past_data = df[(df['date'] >= start_date) & (df['date'] <= end_date)]
        mean_value = past_data[column].mean()
        return mean_value
    return row[column]

# Apply the function to fill missing acute_bronchiolitis_syndromic_counts
ab_df['acute_bronchiolitis_syndromic_counts'] = ab_df.apply(lambda row: fill_with_past_7_days_mean(row, ab_df, 'acute_bronchiolitis_syndromic_counts'), axis=1)

In [45]:
ab_df = ab_df.sort_values('date', ascending=True)
ab_df.head()

Unnamed: 0,date,acute_bronchiolitis_syndromic_counts
1085,2024-06-09,2.0
1084,2024-06-10,7.0
1083,2024-06-11,6.0
1082,2024-06-12,2.0
1079,2024-06-13,3.0


In [47]:
ab_df.to_csv('acute_bronchiolitis_syndromic_cd.csv', index=False)

### Influenza like Syndromic Illness Cases by Day

In [48]:
# Load the acute respiratory infections data
ili_df = pd.read_csv('influenza_like_illness.csv')
ili_df.head()

Unnamed: 0,theme,sub_theme,topic,geography_type,geography,metric,sex,age,stratum,year,date,metric_value,in_reporting_delay_period
0,infectious_disease,respiratory,influenza-like,UKHSA Region,West Midlands,influenza-like_syndromic_emergencyDepartment_b...,all,all,default,2025,2025-06-08,0.66,False
1,infectious_disease,respiratory,influenza-like,UKHSA Region,West Midlands,influenza-like_syndromic_emergencyDepartment_c...,all,all,default,2025,2025-06-08,2.0,False
2,infectious_disease,respiratory,influenza-like,UKHSA Region,West Midlands,influenza-like_syndromic_emergencyDepartment_b...,all,all,default,2025,2025-06-07,0.67,False
3,infectious_disease,respiratory,influenza-like,UKHSA Region,West Midlands,influenza-like_syndromic_emergencyDepartment_c...,all,all,default,2025,2025-06-07,1.0,False
4,infectious_disease,respiratory,influenza-like,UKHSA Region,West Midlands,influenza-like_syndromic_emergencyDepartment_b...,all,all,default,2025,2025-06-06,0.68,False


In [49]:
#  Filter for countsByDay metric
ili_df = ili_df[ili_df['metric'] == 'influenza-like_syndromic_emergencyDepartment_countsByDay']

In [50]:
#  Keep only relevant columns and rename metric_value
ili_df = ili_df[['date', 'metric_value']].rename(columns={'metric_value': 'influenza_like_syndromic_counts'})

In [51]:
#  Convert date to datetime and then to DD-MM-YYYY format
ili_df['date'] = pd.to_datetime(ab_df['date'])

ili_df.head()

Unnamed: 0,date,influenza_like_syndromic_counts
1,2025-06-08,2.0
3,2025-06-07,1.0
5,2025-06-06,1.0
8,2025-06-05,3.0
11,2025-06-04,0.0


In [52]:
# Check for missing values
ili_df[ili_df.isna().any(axis=1)]     # Show all rows with any missing values

Unnamed: 0,date,influenza_like_syndromic_counts


In [53]:
# Step 5: Fill missing ARI_counts with mean of previous 7 days
def fill_with_past_7_days_mean(row, df, column, window_days=7):
    if pd.isna(row[column]):
        start_date = row['date'] - pd.Timedelta(days=window_days)
        end_date = row['date'] - pd.Timedelta(days=1)
        past_data = df[(df['date'] >= start_date) & (df['date'] <= end_date)]
        mean_value = past_data[column].mean()
        return mean_value
    return row[column]

# Apply the function to fill missing influenza_like_syndromic_counts
ili_df['influenza_like_syndromic_counts'] = ili_df.apply(lambda row: fill_with_past_7_days_mean(row, ili_df, 'influenza_like_syndromic_counts'), axis=1)

In [54]:
ili_df = ili_df.sort_values('date', ascending=True)
ili_df.head()

Unnamed: 0,date,influenza_like_syndromic_counts
1085,2024-06-09,2.0
1084,2024-06-10,2.0
1083,2024-06-11,0.0
1082,2024-06-12,0.0
1079,2024-06-13,0.0


In [55]:
ili_df.to_csv('influenza_like_syndromic_cd.csv', index=False)

### Scarlet Fever Cases by Day

In [56]:
# Load the acute respiratory infections data
sf_df = pd.read_csv('scarlet_fever.csv')
sf_df.head()

Unnamed: 0,theme,sub_theme,topic,geography_type,geography,metric,sex,age,stratum,year,date,metric_value,in_reporting_delay_period
0,infectious_disease,childhood_illness,Scarlet-fever,UKHSA Region,West Midlands,scarlet-fever_syndromic_emergencyDepartment_ba...,all,all,default,2025,2025-06-08,0.34,False
1,infectious_disease,childhood_illness,Scarlet-fever,UKHSA Region,West Midlands,scarlet-fever_syndromic_emergencyDepartment_co...,all,all,default,2025,2025-06-08,0.0,False
2,infectious_disease,childhood_illness,Scarlet-fever,UKHSA Region,West Midlands,scarlet-fever_syndromic_emergencyDepartment_ba...,all,all,default,2025,2025-06-07,0.35,False
3,infectious_disease,childhood_illness,Scarlet-fever,UKHSA Region,West Midlands,scarlet-fever_syndromic_emergencyDepartment_co...,all,all,default,2025,2025-06-07,0.0,False
4,infectious_disease,childhood_illness,Scarlet-fever,UKHSA Region,West Midlands,scarlet-fever_syndromic_emergencyDepartment_ba...,all,all,default,2025,2025-06-06,0.35,False


In [57]:
#  Filter for countsByDay metric
sf_df = sf_df[sf_df['metric'] == 'scarlet-fever_syndromic_emergencyDepartment_countsByDay']

In [58]:
#  Keep only relevant columns and rename metric_value
sf_df = sf_df[['date', 'metric_value']].rename(columns={'metric_value': 'scarlet_fever_syndromic_counts'})

In [59]:
#  Convert date to datetime and then to DD-MM-YYYY format
sf_df['date'] = pd.to_datetime(sf_df['date'])

sf_df.head()

Unnamed: 0,date,scarlet_fever_syndromic_counts
1,2025-06-08,0.0
3,2025-06-07,0.0
5,2025-06-06,0.0
8,2025-06-05,1.0
11,2025-06-04,0.0


In [60]:
# Check for missing values
sf_df[sf_df.isna().any(axis=1)]     # Show all rows with any missing values

Unnamed: 0,date,scarlet_fever_syndromic_counts


In [61]:
# Step 5: Fill missing ARI_counts with mean of previous 7 days
def fill_with_past_7_days_mean(row, df, column, window_days=7):
    if pd.isna(row[column]):
        start_date = row['date'] - pd.Timedelta(days=window_days)
        end_date = row['date'] - pd.Timedelta(days=1)
        past_data = df[(df['date'] >= start_date) & (df['date'] <= end_date)]
        mean_value = past_data[column].mean()
        return mean_value
    return row[column]

# Apply the function to fill missing influenza_like_syndromic_counts
sf_df['scarlet_fever_syndromic_counts'] = sf_df.apply(lambda row: fill_with_past_7_days_mean(row, sf_df, 'scarlet_fever_syndromic_counts'), axis=1)

In [62]:
sf_df = sf_df.sort_values('date', ascending=True)
sf_df.head()

Unnamed: 0,date,scarlet_fever_syndromic_counts
1085,2024-06-09,3.0
1084,2024-06-10,0.0
1083,2024-06-11,0.0
1082,2024-06-12,0.0
1079,2024-06-13,0.0


In [63]:
sf_df.to_csv('scarlet_fever_syndromic_cd.csv', index=False)