# pandas Example - COVID Cases UK 

A short example to show the Python pandas package for data analysis.

The source data is a CSV file containing the number of COVID cases by date in each of the fouur countries of the UK. We import this data into a pandas dataframe, filter and group the data and plot several time-series charts.

In [None]:
# Import the modules
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# Load our data from  CSV file
COVID_DATA_URL = "https://raw.githubusercontent.com/MarkWilcock/CourseDatasets/main/Coronavirus%20UK/cases.csv"
df_uk= pd.read_csv(COVID_DATA_URL, parse_dates = ['date'])
df_uk.head(2)

In [None]:
df_uk.info()


In [None]:
# tidy the dataframe: drop some columns and rename others
df_uk = df_uk.drop(['areaCode', 'areaType'], axis=1)
df_uk.columns = ('Country', 'Date', 'Cases', 'CumulativeCases')
df_uk.head(2)


In [None]:
# the describe method gives us some basic stats on the data
df_uk.describe()

In [None]:
# group by country and sum the cases
df_uk_grouped = df_uk.groupby(['Country'])[['Cases']].sum()
df_uk_grouped

In [None]:
# pivot the data prior to plotting it
df_uk_wide = df_uk.pivot(index = 'Date', columns='Country', values='Cases')
df_uk_wide.head(2)

In [None]:
df_uk_wide.plot()

##  England only analysis

In [None]:
# Filer the cases to those in England only
df_eng = df_uk.loc[df_uk.Country == 'England']
# since we have only one country, we can drop the country column
df_eng = df_eng.drop(['Country'], axis=1)
# Now that we have unique values in the Date column, we can set it as the index
df_eng = df_eng.set_index('Date')
df_eng = df_eng.sort_index()
df_eng.head(2)


In [None]:
df_eng.Cases.plot()
plt.show()

In [None]:
df_eng.loc['2021','Cases'].plot()  # Show 2021 year only
plt.show()

In [None]:
df_eng.loc['2021-03','Cases'].plot()  # Show March 2021  only
plt.show()