# Visualise the COVID Infection Survey with seaborn
The ONS produce a survey of the estimated COVID infection rate in nine English regions over a recent six week period.
The data provided the best estimate (the central value) but also the upper and lower ranges of 95% credible intervals.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
survey = pd.read_csv("Datasets/Covid Infection Survey 2021-03-06.csv", parse_dates= ['Date'])
survey.info()
survey.head(2)

In [None]:
# Show the infection rate in the London region over time
sns.relplot(data=survey, x='Date', y ='London-Central', kind='line')
#sns.relplot(data=survey, x='Date', y =['London-Central', 'London-Upper'], kind='line') # This did not work!
plt.show()

In [None]:
# To visualise in seaborn, we need to reshape the dataset
# Unpivot the data to get 3 columns: Date, Attribute and Rate
survey_long = survey.melt(id_vars=['Date'], var_name='Attribute', value_name='Rate')

In [None]:
# Split the Attribute column into Region and ValueType, and then remove it
survey_long[['Region', 'ValueType']] = survey_long['Attribute'].str.split('-', expand=True)
survey_long = survey_long.drop(columns = 'Attribute')

In [None]:
# Create a line chart comparing the estimated infection rate over time in two regions 
survey_long_central = survey_long.loc[survey_long['ValueType'] == 'Central', :]
survey_long_central_filtered = survey_long_central.loc[survey_long_central['Region'].isin(['London', 'North East']), :]

sns.relplot(data=survey_long_central_filtered, x='Date', y='Rate', hue ='Region', kind='line', style='Region')

In [None]:
# Create a small multiples chart - one chart for each region
sns.relplot(data=survey_long, x='Date', y='Rate', col ='Region', hue='ValueType', col_wrap=3, kind='line')