In [1]:
import pandas as pd
from matplotlib import pyplot as plt
import altair as alt
import seaborn as sns
import numpy as np

### 1. load cross reference data and group by LSOA11CD

In [2]:
cross_ref = pd.read_csv("Output_Area_to_LSOA_to_MSOA_to_Local_Authority_District_(December_2017)_Lookup_with_Area_Classifications_in_Great_Britain.csv")

cross_ref.drop(['OA11CD'], axis=1, inplace=True)
cross_ref.drop(['LAD17CD'], axis=1, inplace=True)

In [3]:
lsoa_to_msoa = cross_ref.groupby(['LSOA11CD','LSOA11NM','MSOA11CD','MSOA11NM']).count().reset_index()
lsoa_to_msoa.count()

LSOA11CD    4835
LSOA11NM    4835
MSOA11CD    4835
MSOA11NM    4835
dtype: int64

### 2. load london only covid data and group it all into one table
From this one dataset (containing data up to 28 May, 2020) we load and then merge the following:
- deaths 
- population over 70
- ethnic_group
- medical_conditions

In [4]:
deaths = pd.read_excel("underlying_data_2020_06_01.xlsx", sheet_name='1 deaths')
population = pd.read_excel("underlying_data_2020_06_01.xlsx", sheet_name='2 population')
ethnicity = pd.read_excel("underlying_data_2020_06_01.xlsx", sheet_name='4 ethnic_group')
health = pd.read_excel("underlying_data_2020_06_01.xlsx", sheet_name='7 medical_conditions')

london_covid_total = pd.merge(deaths, population, left_on='MSOA11CD', right_on='MSOA11CD', how = 'inner')
london_covid_total = pd.merge(london_covid_total, ethnicity, left_on='MSOA11CD', right_on='MSOA11CD', how = 'inner')
london_covid_total = pd.merge(london_covid_total, health, left_on='MSOA11CD', right_on='MSOA11CD', how = 'inner')

london_covid_total.count()

MSOA11CD                         983
MSOA11NM_x                       983
Local authority_x                983
covid_19_deaths                  983
covid_19_deaths_per_thousand     983
MSOA11NM_y                       983
Local authority_y                983
total_population_mid_2018        983
over_70_prop                     983
MSOA11NM_x                       983
Local Authority                  983
all_bame_prop                    983
all_black_prop                   983
pakistani_or_bangladeshi_prop    983
all_indian_prop                  983
MSOA11NM_y                       983
Local authority                  983
total_registered_patients        983
Hypertension                     983
Obesity (18+)                    983
Diabetes                         983
Asthma                           983
Coronary heart disease           983
dtype: int64

In [5]:
london_covid_total.drop(['MSOA11NM_x'], axis=1, inplace=True)
london_covid_total.drop(['MSOA11NM_y'], axis=1, inplace=True)
london_covid_total.drop(['Local authority_x'], axis=1, inplace=True)
london_covid_total.drop(['Local authority_y'], axis=1, inplace=True)
london_covid_total.drop(['Local Authority'], axis=1, inplace=True)
london_covid_total.head()

Unnamed: 0,MSOA11CD,covid_19_deaths,covid_19_deaths_per_thousand,total_population_mid_2018,over_70_prop,all_bame_prop,all_black_prop,pakistani_or_bangladeshi_prop,all_indian_prop,Local authority,total_registered_patients,Hypertension,Obesity (18+),Diabetes,Asthma,Coronary heart disease
0,E02000001,3,0.34459,8706,0.123593,0.213695,0.026169,0.033627,0.029288,City of London,8584,7.95,3.62,2.7,3.08,1.5
1,E02000002,2,0.259067,7720,0.103886,0.350111,0.167232,0.04797,0.039852,Barking and Dagenham,8315,9.26,6.15,4.99,3.91,1.88
2,E02000003,3,0.271469,11051,0.085965,0.453858,0.161075,0.104629,0.080836,Barking and Dagenham,11873,10.33,7.6,5.79,4.64,1.83
3,E02000004,9,1.366535,6586,0.124658,0.19023,0.104982,0.016338,0.007603,Barking and Dagenham,6852,9.34,8.36,4.95,3.58,1.93
4,E02000005,5,0.488806,10229,0.066282,0.337304,0.168769,0.058748,0.035155,Barking and Dagenham,11150,9.37,8.79,5.15,4.26,1.47


### 3. load deprivation data
The domains of deprivation gives us a ranking from 1 to many thousands for deprivation along a number of dimensions:
- income_rank
- environment_rank
- education_rank
- health_rank
- housing_rank
- living_environment_rank

In [6]:
uk_deprivation_data = pd.read_excel("File_2_-_IoD2019_Domains_of_Deprivation.xlsx", sheet_name='IoD2019 Domains',
                                    usecols = "A,B,C,D,G,I,K, M, Q, S", names=['lsoa_code', 'lsoa_name', 
                                                            'local_auth_code', 'local_auth_name',  'income_rank', 
                                                            'employment_rank', 'education_rank', 'health_rank',
                                                            'housing_rank', 'living_environment_rank'])

london_deprivation = uk_deprivation_data[uk_deprivation_data['local_auth_code'].str.contains('E09')]
print(london_deprivation.count())
london_deprivation.head()

lsoa_code                  4835
lsoa_name                  4835
local_auth_code            4835
local_auth_name            4835
income_rank                4835
employment_rank            4835
education_rank             4835
health_rank                4835
housing_rank               4835
living_environment_rank    4835
dtype: int64


Unnamed: 0,lsoa_code,lsoa_name,local_auth_code,local_auth_name,income_rank,employment_rank,education_rank,health_rank,housing_rank,living_environment_rank
0,E01000001,City of London 001A,E09000001,City of London,32831,32742,32842,32113,7319,7789
1,E01000002,City of London 001B,E09000001,City of London,29901,31190,32832,29705,11707,13070
2,E01000003,City of London 001C,E09000001,City of London,18510,15103,26386,17600,2157,4092
3,E01000005,City of London 001E,E09000001,City of London,6029,7833,12370,17907,2217,9397
4,E01000006,Barking and Dagenham 016A,E09000002,Barking and Dagenham,14023,21692,17511,21581,1033,10629


### Now merge deprivation and cross reference to get the link between lsoa and msoa

In [7]:
# Now merge with cross ref and then group by msoa code

print(london_deprivation.count())
london_deprivation_new = pd.merge(london_deprivation, lsoa_to_msoa, left_on='lsoa_code', right_on='LSOA11CD', how = 'inner')

print(london_deprivation_new.count())

london_deprivation_new.head()

lsoa_code                  4835
lsoa_name                  4835
local_auth_code            4835
local_auth_name            4835
income_rank                4835
employment_rank            4835
education_rank             4835
health_rank                4835
housing_rank               4835
living_environment_rank    4835
dtype: int64
lsoa_code                  4835
lsoa_name                  4835
local_auth_code            4835
local_auth_name            4835
income_rank                4835
employment_rank            4835
education_rank             4835
health_rank                4835
housing_rank               4835
living_environment_rank    4835
LSOA11CD                   4835
LSOA11NM                   4835
MSOA11CD                   4835
MSOA11NM                   4835
dtype: int64


Unnamed: 0,lsoa_code,lsoa_name,local_auth_code,local_auth_name,income_rank,employment_rank,education_rank,health_rank,housing_rank,living_environment_rank,LSOA11CD,LSOA11NM,MSOA11CD,MSOA11NM
0,E01000001,City of London 001A,E09000001,City of London,32831,32742,32842,32113,7319,7789,E01000001,City of London 001A,E02000001,City of London 001
1,E01000002,City of London 001B,E09000001,City of London,29901,31190,32832,29705,11707,13070,E01000002,City of London 001B,E02000001,City of London 001
2,E01000003,City of London 001C,E09000001,City of London,18510,15103,26386,17600,2157,4092,E01000003,City of London 001C,E02000001,City of London 001
3,E01000005,City of London 001E,E09000001,City of London,6029,7833,12370,17907,2217,9397,E01000005,City of London 001E,E02000001,City of London 001
4,E01000006,Barking and Dagenham 016A,E09000002,Barking and Dagenham,14023,21692,17511,21581,1033,10629,E01000006,Barking and Dagenham 016A,E02000017,Barking and Dagenham 016


### Now group by MSOA and then merge with London covid data to get one large dataset

In [8]:
london_deprivation_msoa = london_deprivation_new.groupby(['local_auth_code', 
                                                          'local_auth_name', 'MSOA11CD', 'MSOA11NM']).median().reset_index()

print(london_deprivation_msoa.count())
london_deprivation_msoa.head()

local_auth_code            983
local_auth_name            983
MSOA11CD                   983
MSOA11NM                   983
income_rank                983
employment_rank            983
education_rank             983
health_rank                983
housing_rank               983
living_environment_rank    983
dtype: int64


Unnamed: 0,local_auth_code,local_auth_name,MSOA11CD,MSOA11NM,income_rank,employment_rank,education_rank,health_rank,housing_rank,living_environment_rank
0,E09000001,City of London,E02000001,City of London 001,30837.5,30316.5,30856.5,23587.0,2635.5,5940.5
1,E09000002,Barking and Dagenham,E02000002,Barking and Dagenham 001,4670.0,6519.5,7695.5,14751.5,844.5,14801.0
2,E09000002,Barking and Dagenham,E02000003,Barking and Dagenham 002,11740.5,16384.0,15123.5,15674.5,1912.5,10359.0
3,E09000002,Barking and Dagenham,E02000004,Barking and Dagenham 003,12869.0,14473.0,13759.5,15516.0,4320.0,16398.5
4,E09000002,Barking and Dagenham,E02000005,Barking and Dagenham 004,9665.0,10100.0,8974.0,14418.0,1689.0,6036.0


### Now merge with the covid deaths data based on msoa code

In [9]:
london_covid_all = pd.merge(london_covid_total, london_deprivation_msoa, 
                            left_on='MSOA11CD', right_on='MSOA11CD', how = 'inner')

print(london_covid_all.count())
london_covid_all.head()

MSOA11CD                         983
covid_19_deaths                  983
covid_19_deaths_per_thousand     983
total_population_mid_2018        983
over_70_prop                     983
all_bame_prop                    983
all_black_prop                   983
pakistani_or_bangladeshi_prop    983
all_indian_prop                  983
Local authority                  983
total_registered_patients        983
Hypertension                     983
Obesity (18+)                    983
Diabetes                         983
Asthma                           983
Coronary heart disease           983
local_auth_code                  983
local_auth_name                  983
MSOA11NM                         983
income_rank                      983
employment_rank                  983
education_rank                   983
health_rank                      983
housing_rank                     983
living_environment_rank          983
dtype: int64


Unnamed: 0,MSOA11CD,covid_19_deaths,covid_19_deaths_per_thousand,total_population_mid_2018,over_70_prop,all_bame_prop,all_black_prop,pakistani_or_bangladeshi_prop,all_indian_prop,Local authority,...,Coronary heart disease,local_auth_code,local_auth_name,MSOA11NM,income_rank,employment_rank,education_rank,health_rank,housing_rank,living_environment_rank
0,E02000001,3,0.34459,8706,0.123593,0.213695,0.026169,0.033627,0.029288,City of London,...,1.5,E09000001,City of London,City of London 001,30837.5,30316.5,30856.5,23587.0,2635.5,5940.5
1,E02000002,2,0.259067,7720,0.103886,0.350111,0.167232,0.04797,0.039852,Barking and Dagenham,...,1.88,E09000002,Barking and Dagenham,Barking and Dagenham 001,4670.0,6519.5,7695.5,14751.5,844.5,14801.0
2,E02000003,3,0.271469,11051,0.085965,0.453858,0.161075,0.104629,0.080836,Barking and Dagenham,...,1.83,E09000002,Barking and Dagenham,Barking and Dagenham 002,11740.5,16384.0,15123.5,15674.5,1912.5,10359.0
3,E02000004,9,1.366535,6586,0.124658,0.19023,0.104982,0.016338,0.007603,Barking and Dagenham,...,1.93,E09000002,Barking and Dagenham,Barking and Dagenham 003,12869.0,14473.0,13759.5,15516.0,4320.0,16398.5
4,E02000005,5,0.488806,10229,0.066282,0.337304,0.168769,0.058748,0.035155,Barking and Dagenham,...,1.47,E09000002,Barking and Dagenham,Barking and Dagenham 004,9665.0,10100.0,8974.0,14418.0,1689.0,6036.0


In [10]:
london_covid_all.drop(['Local authority'], axis=1, inplace=True)
london_covid_all.drop(['total_registered_patients'], axis=1, inplace=True)

london_covid_all.rename(columns = {'MSOA11CD':'msoa_code'}, inplace = True)
london_covid_all.rename(columns = {'MSOA11NM':'msoa_name'}, inplace = True)
london_covid_all.rename(columns = {'Hypertension':'hypertension'}, inplace = True)
london_covid_all.rename(columns = {'Obesity (18+)':'obesity'}, inplace = True)
london_covid_all.rename(columns = {'Diabetes':'diabetes'}, inplace = True)
london_covid_all.rename(columns = {'Asthma':'asthma'}, inplace = True)
london_covid_all.rename(columns = {'Coronary heart disease':'heart_disease'}, inplace = True)

london_covid_all.head()


Unnamed: 0,msoa_code,covid_19_deaths,covid_19_deaths_per_thousand,total_population_mid_2018,over_70_prop,all_bame_prop,all_black_prop,pakistani_or_bangladeshi_prop,all_indian_prop,hypertension,...,heart_disease,local_auth_code,local_auth_name,msoa_name,income_rank,employment_rank,education_rank,health_rank,housing_rank,living_environment_rank
0,E02000001,3,0.34459,8706,0.123593,0.213695,0.026169,0.033627,0.029288,7.95,...,1.5,E09000001,City of London,City of London 001,30837.5,30316.5,30856.5,23587.0,2635.5,5940.5
1,E02000002,2,0.259067,7720,0.103886,0.350111,0.167232,0.04797,0.039852,9.26,...,1.88,E09000002,Barking and Dagenham,Barking and Dagenham 001,4670.0,6519.5,7695.5,14751.5,844.5,14801.0
2,E02000003,3,0.271469,11051,0.085965,0.453858,0.161075,0.104629,0.080836,10.33,...,1.83,E09000002,Barking and Dagenham,Barking and Dagenham 002,11740.5,16384.0,15123.5,15674.5,1912.5,10359.0
3,E02000004,9,1.366535,6586,0.124658,0.19023,0.104982,0.016338,0.007603,9.34,...,1.93,E09000002,Barking and Dagenham,Barking and Dagenham 003,12869.0,14473.0,13759.5,15516.0,4320.0,16398.5
4,E02000005,5,0.488806,10229,0.066282,0.337304,0.168769,0.058748,0.035155,9.37,...,1.47,E09000002,Barking and Dagenham,Barking and Dagenham 004,9665.0,10100.0,8974.0,14418.0,1689.0,6036.0


### ONS monthly death data
The London death data only extends to 17th April and so doesn't really encompass the entire first wave, and for that we would need data extending to the end of June, 2020. 
- We therefore load additional ONS monthly death data from March to end of July to provide a more comprehensive set of death data that covers the vast majority of deaths within the first wave. THis data is also at MSOA level

In [11]:
ONS_covid_deaths = pd.read_excel("covidlocalareadeprivationupdate.xlsx", sheet_name='Table 5', 
                                 usecols = "A,Q:U", names=['msoa_code', 'march_deaths', 
                                                           'april_deaths', 'may_deaths', 'june_deaths', 'july_deaths'], skiprows=12)

ONS_covid_deaths['ons_total_deaths'] = ONS_covid_deaths.march_deaths + ONS_covid_deaths.april_deaths + ONS_covid_deaths.may_deaths + ONS_covid_deaths.june_deaths + ONS_covid_deaths.july_deaths

print(ONS_covid_deaths.count())
ONS_covid_deaths.head()

msoa_code           7201
march_deaths        7201
april_deaths        7201
may_deaths          7201
june_deaths         7201
july_deaths         7201
ons_total_deaths    7201
dtype: int64


Unnamed: 0,msoa_code,march_deaths,april_deaths,may_deaths,june_deaths,july_deaths,ons_total_deaths
0,E02000001,1,2,1,0,0,4
1,E02000002,1,7,0,0,0,8
2,E02000003,0,7,1,0,0,8
3,E02000004,2,8,2,0,0,12
4,E02000005,2,4,1,0,0,7


#### Merge with london_covid_all

In [12]:
london_covid_all_ons = pd.merge(london_covid_all, ONS_covid_deaths, 
                                left_on='msoa_code', right_on='msoa_code', how = 'inner')

print(london_covid_all_ons.count())
london_covid_all_ons.head(10)

msoa_code                        983
covid_19_deaths                  983
covid_19_deaths_per_thousand     983
total_population_mid_2018        983
over_70_prop                     983
all_bame_prop                    983
all_black_prop                   983
pakistani_or_bangladeshi_prop    983
all_indian_prop                  983
hypertension                     983
obesity                          983
diabetes                         983
asthma                           983
heart_disease                    983
local_auth_code                  983
local_auth_name                  983
msoa_name                        983
income_rank                      983
employment_rank                  983
education_rank                   983
health_rank                      983
housing_rank                     983
living_environment_rank          983
march_deaths                     983
april_deaths                     983
may_deaths                       983
june_deaths                      983
j

Unnamed: 0,msoa_code,covid_19_deaths,covid_19_deaths_per_thousand,total_population_mid_2018,over_70_prop,all_bame_prop,all_black_prop,pakistani_or_bangladeshi_prop,all_indian_prop,hypertension,...,education_rank,health_rank,housing_rank,living_environment_rank,march_deaths,april_deaths,may_deaths,june_deaths,july_deaths,ons_total_deaths
0,E02000001,3,0.34459,8706,0.123593,0.213695,0.026169,0.033627,0.029288,7.95,...,30856.5,23587.0,2635.5,5940.5,1,2,1,0,0,4
1,E02000002,2,0.259067,7720,0.103886,0.350111,0.167232,0.04797,0.039852,9.26,...,7695.5,14751.5,844.5,14801.0,1,7,0,0,0,8
2,E02000003,3,0.271469,11051,0.085965,0.453858,0.161075,0.104629,0.080836,10.33,...,15123.5,15674.5,1912.5,10359.0,0,7,1,0,0,8
3,E02000004,9,1.366535,6586,0.124658,0.19023,0.104982,0.016338,0.007603,9.34,...,13759.5,15516.0,4320.0,16398.5,2,8,2,0,0,12
4,E02000005,5,0.488806,10229,0.066282,0.337304,0.168769,0.058748,0.035155,9.37,...,8974.0,14418.0,1689.0,6036.0,2,4,1,0,0,7
5,E02000007,9,0.889065,10123,0.080707,0.328177,0.227164,0.026846,0.008076,10.12,...,8286.0,11879.0,375.0,14002.0,5,5,2,1,0,13
6,E02000008,10,0.791452,12635,0.072497,0.30357,0.17374,0.035699,0.016337,10.26,...,9793.0,10842.0,1183.0,7318.0,0,11,0,0,0,11
7,E02000009,7,0.603865,11592,0.047533,0.388207,0.188326,0.071352,0.031328,9.74,...,9315.5,10245.0,950.5,7396.0,0,7,0,0,0,7
8,E02000010,6,0.652387,9197,0.064586,0.307951,0.190482,0.03401,0.010795,10.45,...,10318.0,12006.0,902.0,11351.0,0,7,0,0,0,7
9,E02000011,6,0.85923,6983,0.092081,0.33635,0.185227,0.047357,0.027154,10.88,...,11974.0,12000.5,2195.0,11636.0,1,7,2,2,0,12


In [13]:
# first add a field for ons deaths as a proportion of population and then reorder
london_covid_all_ons['ons_deaths_per_thousand'] = (1000*london_covid_all_ons.ons_total_deaths) / london_covid_all_ons.total_population_mid_2018
# now reorder before melting
london_covid_all_ons = london_covid_all_ons[['msoa_code', 'msoa_name', 'local_auth_code', 'local_auth_name', 
                                     'covid_19_deaths', 'covid_19_deaths_per_thousand', 'ons_total_deaths', 
                                     'ons_deaths_per_thousand', 'march_deaths', 'april_deaths', 'may_deaths',
                                     'june_deaths', 'july_deaths', 'total_population_mid_2018',
                                     'over_70_prop', 'all_bame_prop', 'all_black_prop', 'pakistani_or_bangladeshi_prop',
                                     'all_indian_prop', 'hypertension',
                                     'obesity', 'diabetes', 'asthma', 'heart_disease', 'income_rank', 
                                     'education_rank', 'health_rank', 'housing_rank', 'living_environment_rank'
                                    ]]

print(london_covid_all_ons.count())
london_covid_all_ons.head()

msoa_code                        983
msoa_name                        983
local_auth_code                  983
local_auth_name                  983
covid_19_deaths                  983
covid_19_deaths_per_thousand     983
ons_total_deaths                 983
ons_deaths_per_thousand          983
march_deaths                     983
april_deaths                     983
may_deaths                       983
june_deaths                      983
july_deaths                      983
total_population_mid_2018        983
over_70_prop                     983
all_bame_prop                    983
all_black_prop                   983
pakistani_or_bangladeshi_prop    983
all_indian_prop                  983
hypertension                     983
obesity                          983
diabetes                         983
asthma                           983
heart_disease                    983
income_rank                      983
education_rank                   983
health_rank                      983
h

Unnamed: 0,msoa_code,msoa_name,local_auth_code,local_auth_name,covid_19_deaths,covid_19_deaths_per_thousand,ons_total_deaths,ons_deaths_per_thousand,march_deaths,april_deaths,...,hypertension,obesity,diabetes,asthma,heart_disease,income_rank,education_rank,health_rank,housing_rank,living_environment_rank
0,E02000001,City of London 001,E09000001,City of London,3,0.34459,4,0.459453,1,2,...,7.95,3.62,2.7,3.08,1.5,30837.5,30856.5,23587.0,2635.5,5940.5
1,E02000002,Barking and Dagenham 001,E09000002,Barking and Dagenham,2,0.259067,8,1.036269,1,7,...,9.26,6.15,4.99,3.91,1.88,4670.0,7695.5,14751.5,844.5,14801.0
2,E02000003,Barking and Dagenham 002,E09000002,Barking and Dagenham,3,0.271469,8,0.723916,0,7,...,10.33,7.6,5.79,4.64,1.83,11740.5,15123.5,15674.5,1912.5,10359.0
3,E02000004,Barking and Dagenham 003,E09000002,Barking and Dagenham,9,1.366535,12,1.822047,2,8,...,9.34,8.36,4.95,3.58,1.93,12869.0,13759.5,15516.0,4320.0,16398.5
4,E02000005,Barking and Dagenham 004,E09000002,Barking and Dagenham,5,0.488806,7,0.684329,2,4,...,9.37,8.79,5.15,4.26,1.47,9665.0,8974.0,14418.0,1689.0,6036.0


### We now want to normalise the ranking and other fields so they are between 0 and 1
And then replace in london_covid_all

In [21]:
from sklearn.preprocessing import MinMaxScaler
min_max_scaler = MinMaxScaler()

df_numeric = london_covid_all_ons[['hypertension', 'obesity', 'diabetes', 'asthma', 'heart_disease',
                                  'income_rank', 'education_rank', 'health_rank', 
                                   'housing_rank', 'living_environment_rank']].copy()
min_max = min_max_scaler.fit_transform(df_numeric)

num_col_names = df_numeric.columns
df_min_max = pd.DataFrame(min_max, columns=num_col_names)
df_min_max.head(10)

Unnamed: 0,hypertension,obesity,diabetes,asthma,heart_disease,income_rank,education_rank,health_rank,housing_rank,living_environment_rank
0,0.324074,0.210938,0.120575,0.296296,0.332103,0.937041,0.933212,0.678423,0.089869,0.206279
1,0.434343,0.493304,0.373894,0.488426,0.472325,0.100337,0.134818,0.371421,0.028259,0.559091
2,0.524411,0.655134,0.462389,0.657407,0.453875,0.326416,0.390872,0.403492,0.064998,0.382217
3,0.441077,0.739955,0.369469,0.412037,0.490775,0.3625,0.343853,0.397985,0.147816,0.6227
4,0.443603,0.787946,0.391593,0.569444,0.321033,0.260052,0.17889,0.359833,0.05731,0.210082
5,0.506734,0.616071,0.348451,0.481481,0.372694,0.149898,0.155173,0.271612,0.012109,0.527276
6,0.518519,0.799107,0.439159,0.564815,0.357934,0.171769,0.207122,0.23558,0.039904,0.261129
7,0.474747,0.628348,0.384956,0.511574,0.332103,0.121409,0.190662,0.214837,0.031906,0.264235
8,0.534512,0.824777,0.359513,0.418981,0.317343,0.13503,0.225219,0.276025,0.030237,0.421717
9,0.570707,0.741071,0.373894,0.372685,0.332103,0.284849,0.282304,0.275834,0.074716,0.433065


In [23]:
# now merge the datasets, having first dropped the original columns
london_covid_new = london_covid_all_ons[['msoa_code', 'msoa_name', 'local_auth_code', 'local_auth_name', 'covid_19_deaths',
                                        'covid_19_deaths_per_thousand', 'ons_total_deaths', 'ons_deaths_per_thousand', 
                                        'march_deaths', 'april_deaths', 'may_deaths', 'june_deaths', 'july_deaths',
                                        'total_population_mid_2018', 'over_70_prop', 'all_bame_prop', 'all_black_prop',
                                        'pakistani_or_bangladeshi_prop', 'all_indian_prop']].copy()

london_covid_merged = pd.merge(left=london_covid_new, left_index=True,
                  right=df_min_max, right_index=True,
                  how='inner')

london_covid_merged.head()

Unnamed: 0,msoa_code,msoa_name,local_auth_code,local_auth_name,covid_19_deaths,covid_19_deaths_per_thousand,ons_total_deaths,ons_deaths_per_thousand,march_deaths,april_deaths,...,hypertension,obesity,diabetes,asthma,heart_disease,income_rank,education_rank,health_rank,housing_rank,living_environment_rank
0,E02000001,City of London 001,E09000001,City of London,3,0.34459,4,0.459453,1,2,...,0.324074,0.210938,0.120575,0.296296,0.332103,0.937041,0.933212,0.678423,0.089869,0.206279
1,E02000002,Barking and Dagenham 001,E09000002,Barking and Dagenham,2,0.259067,8,1.036269,1,7,...,0.434343,0.493304,0.373894,0.488426,0.472325,0.100337,0.134818,0.371421,0.028259,0.559091
2,E02000003,Barking and Dagenham 002,E09000002,Barking and Dagenham,3,0.271469,8,0.723916,0,7,...,0.524411,0.655134,0.462389,0.657407,0.453875,0.326416,0.390872,0.403492,0.064998,0.382217
3,E02000004,Barking and Dagenham 003,E09000002,Barking and Dagenham,9,1.366535,12,1.822047,2,8,...,0.441077,0.739955,0.369469,0.412037,0.490775,0.3625,0.343853,0.397985,0.147816,0.6227
4,E02000005,Barking and Dagenham 004,E09000002,Barking and Dagenham,5,0.488806,7,0.684329,2,4,...,0.443603,0.787946,0.391593,0.569444,0.321033,0.260052,0.17889,0.359833,0.05731,0.210082


In [36]:
# now load up weekly cases by cases
uk_covid_cases = pd.read_excel("MSOAs_latest.xlsx", sheet_name='AmendedData')
uk_covid_cases.head()


Unnamed: 0,msoa_code,msoa_name,5,6,7,8,9,10,11,12,...,33,34,35,36,37,38,39,40,41,42
0,E02000001,City of London,0,0,0,0,0,0,0,4,...,0,4,0,0,0,0,6,5,5,4
1,E02000002,Marks Gate,0,0,0,0,0,0,0,0,...,0,0,0,0,0,5,7,4,6,10
2,E02000003,Chadwell Heath East,0,0,0,0,0,0,0,0,...,4,0,0,4,10,9,8,9,10,11
3,E02000004,Eastbrookend,0,0,0,0,0,0,0,0,...,0,0,0,0,0,4,9,0,12,10
4,E02000005,Becontree Heath,0,0,0,0,0,0,0,0,...,0,0,0,5,3,3,9,6,6,14


In [None]:
london_covid_all_ons.to_csv('output_data/london_covid_all_ons.csv', index=False)

# Check scatter plots and correlations
This is just to test that we can produce reasonable scatter plots and that the correlations remain the same irrespective of having scaled the data (we can, they do)

In [24]:
col_names = london_covid_merged.columns.tolist()
col_names = col_names[8:29]

london_covid_stats = pd.melt(london_covid_merged, id_vars=['msoa_code', 
                                                        'msoa_name', 
                                                        'local_auth_code', 
                                                        'local_auth_name',
                                                        'covid_19_deaths', 
                                                        'covid_19_deaths_per_thousand',
                                                        'ons_total_deaths',
                                                        'ons_deaths_per_thousand'
                                                       ], 
                        value_vars=col_names, var_name = 'measure', value_name='value')

print(london_covid_stats.count())
london_covid_stats.head()

msoa_code                       20643
msoa_name                       20643
local_auth_code                 20643
local_auth_name                 20643
covid_19_deaths                 20643
covid_19_deaths_per_thousand    20643
ons_total_deaths                20643
ons_deaths_per_thousand         20643
measure                         20643
value                           20643
dtype: int64


Unnamed: 0,msoa_code,msoa_name,local_auth_code,local_auth_name,covid_19_deaths,covid_19_deaths_per_thousand,ons_total_deaths,ons_deaths_per_thousand,measure,value
0,E02000001,City of London 001,E09000001,City of London,3,0.34459,4,0.459453,march_deaths,1.0
1,E02000002,Barking and Dagenham 001,E09000002,Barking and Dagenham,2,0.259067,8,1.036269,march_deaths,1.0
2,E02000003,Barking and Dagenham 002,E09000002,Barking and Dagenham,3,0.271469,8,0.723916,march_deaths,0.0
3,E02000004,Barking and Dagenham 003,E09000002,Barking and Dagenham,9,1.366535,12,1.822047,march_deaths,2.0
4,E02000005,Barking and Dagenham 004,E09000002,Barking and Dagenham,5,0.488806,7,0.684329,march_deaths,2.0


In [30]:
import altair as alt
alt.data_transformers.disable_max_rows()

ignore_values = ['total_population_mid_2018', 'march_deaths', 'april_deaths', 'may_deaths', 'june_deaths', 'july_deaths']

alt.Chart(london_covid_stats[~london_covid_stats.measure.isin(ignore_values)]).mark_point().encode(
    x='value:Q',
    y='ons_deaths_per_thousand:Q',     
    tooltip = ['msoa_name']
).properties(
    width=180,
    height=180
).facet(
    column='measure:N'
).interactive()

In [32]:
london_corr_merged_input = london_covid_merged[['ons_deaths_per_thousand', 'total_population_mid_2018',
                                     'over_70_prop', 'all_bame_prop', 'all_black_prop', 'pakistani_or_bangladeshi_prop',
                                     'all_indian_prop', 'hypertension',
                                     'obesity', 'diabetes', 'asthma', 'heart_disease', 'income_rank', 
                                     'education_rank', 'health_rank', 'housing_rank', 'living_environment_rank']]
london_corr_merged = london_corr_merged_input.corr()

corr_deaths_merged=london_corr_merged[['ons_deaths_per_thousand']]
#sort by the amount of correlation
corr_deaths_merged=corr_deaths_merged.sort_values(by ='ons_deaths_per_thousand',ascending=True)

corr_deaths_merged

Unnamed: 0,ons_deaths_per_thousand
housing_rank,-0.126508
total_population_mid_2018,-0.086453
education_rank,-0.084951
income_rank,-0.065966
pakistani_or_bangladeshi_prop,-0.030106
health_rank,0.023688
all_black_prop,0.060508
all_bame_prop,0.135869
obesity,0.153203
asthma,0.175149


# Old stuff

In [14]:
col_names = london_covid_all_ons.columns.tolist()
col_names = col_names[8:29]

london_covid_stats = pd.melt(london_covid_all_ons, id_vars=['msoa_code', 
                                                        'msoa_name', 
                                                        'local_auth_code', 
                                                        'local_auth_name',
                                                        'covid_19_deaths', 
                                                        'covid_19_deaths_per_thousand',
                                                        'ons_total_deaths',
                                                        'ons_deaths_per_thousand'
                                                       ], 
                        value_vars=col_names, var_name = 'measure', value_name='value')

print(london_covid_stats.count())
london_covid_stats.head()

msoa_code                       20643
msoa_name                       20643
local_auth_code                 20643
local_auth_name                 20643
covid_19_deaths                 20643
covid_19_deaths_per_thousand    20643
ons_total_deaths                20643
ons_deaths_per_thousand         20643
measure                         20643
value                           20643
dtype: int64


Unnamed: 0,msoa_code,msoa_name,local_auth_code,local_auth_name,covid_19_deaths,covid_19_deaths_per_thousand,ons_total_deaths,ons_deaths_per_thousand,measure,value
0,E02000001,City of London 001,E09000001,City of London,3,0.34459,4,0.459453,march_deaths,1.0
1,E02000002,Barking and Dagenham 001,E09000002,Barking and Dagenham,2,0.259067,8,1.036269,march_deaths,1.0
2,E02000003,Barking and Dagenham 002,E09000002,Barking and Dagenham,3,0.271469,8,0.723916,march_deaths,0.0
3,E02000004,Barking and Dagenham 003,E09000002,Barking and Dagenham,9,1.366535,12,1.822047,march_deaths,2.0
4,E02000005,Barking and Dagenham 004,E09000002,Barking and Dagenham,5,0.488806,7,0.684329,march_deaths,2.0


### Now we create groups of similar stats that we can compare with covid death rate to identify correlations
We we will create 3 groups:
- age and ethnicity profile
- health profile
- deprivation profile

- potential field values
msoa_code	msoa_name	local_auth_code	local_auth_name	covid_19_deaths	covid_19_deaths_per_thousand	total_population_mid_2018	over_70_prop	all_bame_prop	all_black_prop	pakistani_or_bangladeshi_prop	all_indian_prop	hypertension	obesity	diabetes	asthma	heart_disease	income_rank	environment_rank	education_rank	health_rank	housing_rank	living_environment_rank

In [None]:
london_demographic = london_covid_stats[['msoa_code', 
                                       'msoa_name',
                                        'measure',
                                        'value',
                                        'covid_19_deaths',
                                        'covid_19_deaths_per_thousand',
                                        'ons_total_deaths',
                                        'ons_deaths_per_thousand']][london_covid_stats.measure.isin(['',
                                                                               'over_70_prop', 
                                                                               'all_black_prop', 
                                                                               'pakistani_or_bangladeshi_prop', 
                                                                               'all_indian_prop'])].copy().reset_index()

london_demographic.head()

In [None]:
import altair as alt

alt.Chart(london_demographic).mark_point().encode(
    x='covid_19_deaths_per_thousand:Q',
    y='value:Q', 
    tooltip = ['msoa_name']
).properties(
    width=180,
    height=180
).facet(
    column='measure:N'
).interactive()

In [None]:
import altair as alt

alt.Chart(london_demographic).mark_point().encode(
    x='ons_deaths_per_thousand:Q',
    y='value:Q', 
    tooltip = ['msoa_name']
).properties(
    width=180,
    height=180
).facet(
    column='measure:N'
).interactive()

In [None]:
london_health = london_covid_stats[['msoa_code', 
                                       'msoa_name',
                                        'measure',
                                        'value',
                                        'covid_19_deaths',
                                        'covid_19_deaths_per_thousand',
                                        'ons_total_deaths',
                                        'ons_deaths_per_thousand']][london_covid_stats.measure.isin(['hypertension',
                                                                               'obesity', 
                                                                               'diabetes',
                                                                               'heart_disease',
                                                                               'asthma'])].copy().reset_index()

london_health.head()

In [None]:
import altair as alt

alt.Chart(london_health).mark_point().encode(
    x='covid_19_deaths_per_thousand:Q',
    y='value:Q', 
    tooltip = ['msoa_name']
).properties(
    width=180,
    height=180
).facet(
    column='measure:N'
).interactive()

In [None]:
import altair as alt

alt.Chart(london_health).mark_point().encode(
    x='ons_deaths_per_thousand:Q',
    y='value:Q', 
    tooltip = ['msoa_name']
).properties(
    width=180,
    height=180
).facet(
    column='measure:N'
).interactive()

In [None]:
london_deprivation = london_covid_stats[['msoa_code', 
                                       'msoa_name',
                                        'measure',
                                        'value',
                                        'covid_19_deaths',
                                        'covid_19_deaths_per_thousand',
                                        'ons_total_deaths',
                                        'ons_deaths_per_thousand']][london_covid_stats.measure.isin(['income_rank',
                                                                               'environment_rank', 
                                                                               'education_rank',
                                                                               'housing_rank',
                                                                               'living_environment_rank'])].copy().reset_index()

london_health.head()

In [None]:
import altair as alt

alt.Chart(london_deprivation).mark_point().encode(
    x='covid_19_deaths_per_thousand:Q',
    y='value:Q', 
    tooltip = ['msoa_name']
).properties(
    width=180,
    height=180
).facet(
    column='measure:N'
).interactive()

In [None]:
import altair as alt

alt.Chart(london_deprivation).mark_point().encode(
    x='ons_deaths_per_thousand:Q',
    y='value:Q', 
    tooltip = ['msoa_name']
).properties(
    width=180,
    height=180
).facet(
    column='measure:N'
).interactive()

### No obvious correlations 
There aren't any obvious correlations between deaths per thousand and the demographic, health or deprivation profiles in the MSOA levels. Let's confirm this by formally checking the correlations

In [None]:
london_corr_input = london_covid_all_ons[['covid_19_deaths_per_thousand', 'total_population_mid_2018',
                                     'over_70_prop', 'all_bame_prop', 'all_black_prop', 'pakistani_or_bangladeshi_prop',
                                     'all_indian_prop', 'hypertension',
                                     'obesity', 'diabetes', 'asthma', 'heart_disease', 'income_rank', 
                                     'education_rank', 'health_rank', 'housing_rank', 'living_environment_rank']]
london_corr = london_corr_input.corr()

corr_deaths=london_corr[['covid_19_deaths_per_thousand']]
#sort by the amount of correlation
corr_deaths=corr_deaths.sort_values(by ='covid_19_deaths_per_thousand',ascending=True)

corr_deaths

### Now repeat for ONS deaths

In [31]:
london_corr_input_ons = london_covid_all_ons[['ons_deaths_per_thousand', 'total_population_mid_2018',
                                     'over_70_prop', 'all_bame_prop', 'all_black_prop', 'pakistani_or_bangladeshi_prop',
                                     'all_indian_prop', 'hypertension',
                                     'obesity', 'diabetes', 'asthma', 'heart_disease', 'income_rank', 
                                     'education_rank', 'health_rank', 'housing_rank', 'living_environment_rank']]
london_corr_ons = london_corr_input_ons.corr()

corr_deaths_ons=london_corr_ons[['ons_deaths_per_thousand']]
#sort by the amount of correlation
corr_deaths_ons=corr_deaths_ons.sort_values(by ='ons_deaths_per_thousand',ascending=True)

corr_deaths_ons

Unnamed: 0,ons_deaths_per_thousand
housing_rank,-0.126508
total_population_mid_2018,-0.086453
education_rank,-0.084951
income_rank,-0.065966
pakistani_or_bangladeshi_prop,-0.030106
health_rank,0.023688
all_black_prop,0.060508
all_bame_prop,0.135869
obesity,0.153203
asthma,0.175149


In [None]:
fig = plt.figure(figsize = (20,10))
plt.subplot(1, 2, 1)

sns.heatmap(
    corr_deaths, 
    vmin=-1, vmax=1, center=0,
    cmap=sns.diverging_palette(20, 220, n=200),
    square=True
)

plt.subplot(1, 2, 2)
sns.heatmap(
    corr_deaths_ons, 
    vmin=-1, vmax=1, center=0,
    cmap=sns.diverging_palette(20, 220, n=200),
    square=True
)

plt.show()

In [None]:
# THis is the beginnings of code to create a dataframe with the individual values
# so I can create a bar chart rather than a heatmap (better to compare) - doesn't 
# work yet.

#corr_ons = pd.DataFrame([corr_deaths_ons['ons_deaths_per_thousand'].iloc[0], 
#            corr_deaths_ons['ons_deaths_per_thousand'].iloc[2]])
#            
#corr_ons

#def create_corr_df(label, corr, n):
#    lst = []
#    for x in range(0, n):
#        lst.append((corr[label].iloc[x], ))
#        
#    df = pd.DataFrame(lst)
#    df['name'] = label
#    
#    return df
#        
#corr_ons = create_corr_df('ons_deaths_per_thousand', corr_deaths_ons, 17)
#
#corr_ons



### Conclusions
- Even incorporating death data up until the end of the first wave, it is reasonable to confirm the London Datastore findings that there are no correlations between demographic, health or deprivation factors and the total number of covid deaths in a London boroughs when considered in isolation. So a further avenue of research would be to perform a regression analysis, incrementally adding in each feature to understand which combination of borough characteristics have the biggest correlation with the number of deaths within the borough

### Next steps
The next step is to see whether there was any correlation between neighbouring locations over time between the beginning of March and the end of July. This will show if the virus spread in pockets.

#### Load weekly covid cases by MSOA

In [None]:
uk_covid_cases = pd.read_excel("MSOAs_latest.xlsx", sheet_name='AmendedData')
print(uk_covid_cases.count())
uk_covid_cases.head()

##### Now we want just msoa for London so get a list and then merge

In [None]:
london_covid_codes = london_covid_all_ons[['msoa_code', 'msoa_name', 'local_auth_code', 
                                           'local_auth_name', 'covid_19_deaths', 
                                           'ons_total_deaths','total_population_mid_2018']].copy()
london_covid_cases = pd.merge(london_covid_codes, uk_covid_cases, left_on='msoa_code', right_on='msoa_code', how = 'inner')
print(london_covid_cases.count())
london_covid_cases.head()

In [None]:
london_covid_cases.drop(['msoa_name_y'], axis=1, inplace=True)
london_covid_cases.rename(columns = {'msoa_name_x':'msoa_name'}, inplace = True)
london_covid_cases.head()

In [None]:
col_names = london_covid_cases.columns.tolist()
col_names

In [None]:
col_names = london_covid_cases.columns.tolist()
col_names = col_names[7:45]

london_weekly_cases_msoa = pd.melt(london_covid_cases, id_vars=['msoa_code', 
                                                        'msoa_name', 
                                                        'local_auth_code', 
                                                        'local_auth_name',
                                                        'covid_19_deaths',
                                                        'ons_total_deaths',
                                                        'total_population_mid_2018'], 
                        value_vars=col_names, var_name = 'week_number', value_name='cases')

print(london_weekly_cases_msoa.count())
london_weekly_cases_msoa.head()

In [None]:
temp = london_weekly_cases_msoa.groupby(["msoa_code"]).apply(lambda x: x['cases'][london_weekly_cases_msoa.week_number < 27].sum()).reset_index()
london_weekly_cases_msoa = pd.merge(london_weekly_cases_msoa, temp, left_on='msoa_code', right_on='msoa_code', how = 'inner')
london_weekly_cases_msoa.rename(columns = {0:'cases_to_end_june'}, inplace = True)

london_weekly_cases_msoa.head()

#### Compare deaths to covid cases to see if correlated

In [None]:
import altair as alt

alt.data_transformers.disable_max_rows()

alt.Chart(london_weekly_cases_msoa).mark_point().encode(
    x='ons_total_deaths:Q',
    y='cases_to_end_june:Q', 
    tooltip = ['msoa_name']
).properties(
    width=180,
    height=180
).interactive()

In [None]:
import scipy.stats as stats

corrPearson, pValPearson = stats.pearsonr(london_weekly_cases_msoa.ons_total_deaths, london_weekly_cases_msoa.cases_to_end_june)
corrSpearman,pValSpearman = stats.spearmanr(london_weekly_cases_msoa.ons_total_deaths, london_weekly_cases_msoa.cases_to_end_june)

print("Cased versus deaths: Pearson = " + str(corrPearson) + ", Spearman = " + str(corrSpearman) + "," + str(pValPearson))

### Conclusions
There is MODERATE correlation between cases and deaths and so it will be instructive to see how cases by region progress over time. 

First lets load appropriate shape files and merge with our data

In [None]:
import geopandas as gpd

gb = gpd.read_file("Shapefiles/Middle_Layer_Super_Output_Areas__December_2001__Boundaries_EW_BGC.shp") # a gis format that has geographical boundaries QGIS is a package for looking at shape files
gb.crs = "epsg:27700" # code for the UK national grid

In [None]:
gb.head()

In [None]:
london_covid_geo = pd.merge(gb, london_weekly_cases_msoa, left_on='MSOA01CD', right_on='msoa_code', how = 'inner')

In [None]:
data_geo = alt.InlineData(values = london_covid_geo.to_json(), #geopandas to geojson string
                       format = alt.DataFormat(property='features',type='json'))

alt.Chart(data_geo).mark_geoshape(strokeWidth=1,stroke='lightgray',strokeOpacity=0.2
).encode(
    color=alt.Color('properties.ons_total_deaths:Q'),
    tooltip=['properties.msoa_name:N', 'properties.ons_total_deaths:Q']
).properties(
    projection={'type': 'identity','reflectY': True},
    width=800,
    height=1200,
    title='deaths by msoa - to jul'
)

### Discussion - we can see the pockets of London having with the highest concentration of covid deaths by July but now I want to see cases by week in a waffle chart

In [None]:
london_covid_temp = london_covid_all_ons[['msoa_code', 'msoa_name', 'local_auth_code', 'local_auth_name', 
                                          'covid_19_deaths', 'covid_19_deaths_per_thousand', 'ons_total_deaths',
                                          'ons_deaths_per_thousand', 'march_deaths', 'april_deaths', 'may_deaths',
                                          'june_deaths', 'july_deaths', 'total_population_mid_2018']].copy()

london_cases_temp = london_covid_cases[['msoa_code', 10, 11, 12, 13, 14, 15,
                                          16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]].copy()

london_coords_temp = pd.merge(london_covid_temp, london_cases_temp, 
                              left_on='msoa_code', right_on='msoa_code', how = 'inner')

print(london_coords_temp.count())
london_coords_temp.head()

In [None]:
soa_shape_map = gpd.read_file('Shapefiles/Middle_Layer_Super_Output_Areas__December_2001__Boundaries_EW_BGC.shp')

soa_shape_map_geo = soa_shape_map.to_crs(epsg=4326)

soa_shape_map_geo['long'] = soa_shape_map_geo.geometry.centroid.x
soa_shape_map_geo['lat'] = soa_shape_map_geo.geometry.centroid.y

london_covid_shapes = pd.merge(soa_shape_map_geo, london_coords_temp, left_on='MSOA01CD', right_on='msoa_code', how = 'inner')

print(london_covid_shapes.count())
london_covid_shapes

In [None]:
london_covid_shapes.to_csv('output_data/london_covid_shapes.csv', index=False)

In [None]:
print("max-lon=" + str(london_covid_shapes.long.max()))
print("min-lon=" + str(london_covid_shapes.long.min()))
print("max-lat=" + str(london_covid_shapes.lat.max()))
print("min-lat=" + str(london_covid_shapes.lat.min()))

In [None]:
lon_range = (-0.5, 0.35)
lon_cells = 15

lat_range = (51.2, 51.8)
lat_cells = 13

In [None]:
from shapely.geometry import Polygon

lon_incr = (lon_range[1] - lon_range[0]) / lon_cells
lat_incr = (lat_range[1] - lat_range[0]) / lat_cells
x0, y0 = lon_range[0], lat_range[0]

cell_ids = []
grid_cells = []
for c in range(lon_cells):
    x1 = x0 + lon_incr
    for r in range(lat_cells):
        y1 = y0 + lat_incr
        grid_cells.append(Polygon([(x0,y0),(x0,y1),(x1,y1),(x1,y0)]))
        cell_ids.append('{:02d}_{:02d}'.format(c, r))
        y0 = y1
    x0 = x1
    y0 = lat_range[0]


In [None]:
london_grid_temp = pd.melt(london_covid_shapes, id_vars=['msoa_code', 
                                                        'long', 
                                                        'lat'], 
                        value_vars=[10, 11, 12, 13, 14, 15, 16, 17, 18, 
                                    19, 20, 21, 22, 23, 24, 25, 26], var_name = 'week', value_name='cases')

print(london_grid_temp.count())
london_grid_temp.head()

In [None]:
london_grid_temp['grid_x']   = np.floor((london_grid_temp['long'] - lon_range[0]) / (lon_range[1] - lon_range[0]) * lon_cells).astype(int)
london_grid_temp['grid_y']   = np.floor((london_grid_temp['lat']  - lat_range[0]) / (lat_range[1] - lat_range[0]) * lat_cells).astype(int)
# The cell_id column will be used to link our aggregate data to the grid GeoJSON object for plotting
london_grid_temp['cell_id']  = london_grid_temp[['grid_x','grid_y']].apply(lambda x: '{:02d}_{:02d}'.format(x.grid_x, x.grid_y), axis=1)
london_grid_temp['interval'] = london_grid_temp['week']

london_grid_temp.head()

In [None]:
london_grid = london_grid_temp.groupby(['cell_id', 'grid_x', 'grid_y', 'week']).sum().reset_index()
london_grid['cummulative_deaths'] = london_grid.groupby(['cell_id', 'grid_x', 'grid_y'])['cases'].cumsum()

london_grid.to_csv('output_data/london_grid.csv', index=False)
london_grid.head()

In [None]:
alt.Chart(london_grid).mark_square().encode(
    x='grid_x:O',
    y='grid_y:O',
    color = alt.Color('cummulative_deaths:Q', scale=alt.Scale(scheme='reds')), 
    tooltip = ['cummulative_deaths']
).properties(
    width=180,
    height=180
).facet(
    column='week:O'
).interactive()