In [1]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from geopandas import GeoDataFrame as gdf

## Import data for Multiple Deprivation Indices on the LSOA level

In [2]:
df_imd = gpd.read_file('data/Lower_Super_Output_Area_(LSOA)_IMD_2019__(OSGB1936).geojson')

In [3]:
df_imd.crs

{'init': 'epsg:4326'}

In [4]:
df_imd.to_crs({'init': 'epsg:27700'}, inplace=True)

In [5]:
print(df_imd.crs)
df_imd.head()
df_imd.shape

{'init': 'epsg:27700'}


(34753, 8)

In [6]:
df_imd['IMDRank'].nunique()

32819

In [7]:
df_imd['IMDDecil'].nunique()

11

In [9]:
df_imd.head()

Unnamed: 0,FID,lsoa11cd,lsoa11nm,IMDRank,IMDDecil,Shape__Area,Shape__Length,geometry
0,1,E01000001,City of London 001A,29199,9,343907.41983,3682.43942,"POLYGON ((532106.897 182011.476, 532164.295 18..."
1,2,E01000002,City of London 001B,30379,10,583474.041779,3910.38724,"POLYGON ((532748.627 181787.125, 532673.498 18..."
2,3,E01000003,City of London 001C,14915,5,147839.506081,1834.93132,"POLYGON ((532136.953 182198.368, 532160.057 18..."
3,4,E01000005,City of London 001E,8678,3,491918.093037,3483.179208,"POLYGON ((533809.768 180767.968, 533650.881 18..."
4,5,E01000006,Barking and Dagenham 016A,14486,5,372257.321186,3108.610781,"POLYGON ((545123.928 184315.103, 545273.798 18..."


In [10]:
df_imd = df_imd[['lsoa11cd', 'IMDRank','IMDDecil']]
df_imd.head()

Unnamed: 0,lsoa11cd,IMDRank,IMDDecil
0,E01000001,29199,9
1,E01000002,30379,10
2,E01000003,14915,5
3,E01000005,8678,3
4,E01000006,14486,5


## Map LSOA data to MSOA

In [11]:
msoa_to_lsoa = pd.read_csv('data/Output_Area_to_LSOA_to_MSOA_to_Local_Authority_District_(December_2017)_Lookup_with_Area_Classifications_in_Great_Britain.csv')

In [13]:
msoa_to_lsoa = msoa_to_lsoa[['LSOA11CD','MSOA11CD']]
msoa_to_lsoa.drop_duplicates(inplace=True)
msoa_to_lsoa.shape

(41729, 2)

In [14]:
msoa_to_lsoa.drop(msoa_to_lsoa[~msoa_to_lsoa['LSOA11CD'].isin(df_imd['lsoa11cd'])].index,inplace=True)

In [15]:
msoa_to_lsoa.shape

(34753, 2)

In [16]:
msoa_imd = df_imd.merge(msoa_to_lsoa, left_on='lsoa11cd', right_on='LSOA11CD', how ='inner')
msoa_imd.head()

Unnamed: 0,lsoa11cd,IMDRank,IMDDecil,LSOA11CD,MSOA11CD
0,E01000001,29199,9,E01000001,E02000001
1,E01000002,30379,10,E01000002,E02000001
2,E01000003,14915,5,E01000003,E02000001
3,E01000005,8678,3,E01000005,E02000001
4,E01000006,14486,5,E01000006,E02000017


In [26]:
msoa1 = msoa_imd[['MSOA11CD', 'IMDRank']].groupby(['MSOA11CD']).mean()

In [27]:
msoa2 = msoa_imd[['MSOA11CD', 'IMDDecil']].groupby(['MSOA11CD']).median()

In [32]:
msoa_imd = msoa1.merge(msoa2, on=['MSOA11CD'])

In [30]:
#msoa_imd.to_csv('data/msoa_imd.csv')

In [33]:
msoa_imd.head()

Unnamed: 0_level_0,IMDRank,IMDDecil
MSOA11CD,Unnamed: 1_level_1,Unnamed: 2_level_1
E02000001,20275.166667,6.5
E02000002,6621.500000,2.0
E02000003,11123.166667,4.0
E02000004,12229.000000,4.5
E02000005,8005.200000,3.0
...,...,...
W02000419,0.000000,0.0
W02000420,0.000000,0.0
W02000421,0.000000,0.0
W02000422,0.000000,0.0


## RUC Classification

In [35]:
df_ruc = pd.read_csv('data/Rural_Urban_Classification_(2011)_of_Lower_Layer_Super_Output_Areas_in_England_and_Wales.csv')

In [39]:
df_ruc = df_ruc[['LSOA11CD','RUC11']]

In [42]:
# merge with MSOA
msoa_ruc = df_ruc.merge(msoa_to_lsoa, on='LSOA11CD', how ='inner')
msoa_ruc.head()

Unnamed: 0,LSOA11CD,RUC11,MSOA11CD
0,E01000205,Urban major conurbation,E02000058
1,E01000001,Urban major conurbation,E02000001
2,E01000206,Urban major conurbation,E02000056
3,E01000207,Urban major conurbation,E02000056
4,E01000002,Urban major conurbation,E02000001


In [49]:
msoa_ruc = msoa_ruc[['RUC11','MSOA11CD']].groupby(['MSOA11CD']).first()

In [51]:
#msoa_ruc.to_csv('data/msoa_ruc.csv')