In [16]:
import pandas as pd
import requests
import json
import numpy as np

In [17]:
class NumpyEncoder(json.JSONEncoder):
    """ Special json encoder for numpy types """
    def default(self, obj):
        if isinstance(obj, (np.int_, np.intc, np.intp, np.int8,
            np.int16, np.int32, np.int64, np.uint8,
            np.uint16, np.uint32, np.uint64)):
            return int(obj)
        elif isinstance(obj, (np.float_, np.float16, np.float32, 
            np.float64)):
            return float(obj)
        elif isinstance(obj,(np.ndarray,)):
            return obj.tolist()
        return json.JSONEncoder.default(self, obj)

In [2]:
# IMD_XLSX = "https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/833970/File_1_-_IMD2019_Index_of_Multiple_Deprivation.xlsx"
# IMD_XLSX = "https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/833973/File_2_-_IoD2019_Domains_of_Deprivation.xlsx"
IMD_CSV = "https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/845345/File_7_-_All_IoD2019_Scores__Ranks__Deciles_and_Population_Denominators_3.csv"
# LSOA_BOUNDARIES = "https://opendata.arcgis.com/datasets/da831f80764346889837c72508f046fa_3.geojson"
# LSOA_BOUNDARIES = "https://opendata.arcgis.com/datasets/da831f80764346889837c72508f046fa_0.geojson"
LSOA_BOUNDARIES = "https://opendata.arcgis.com/datasets/da831f80764346889837c72508f046fa_2.geojson"

In [3]:
cols = {
    "LSOA code (2011)": "lsoa11cd",
    "LSOA name (2011)": "lsoa11nm",
    "Local Authority District code (2019)": "la19cd",
    "Local Authority District name (2019)": "la19nm",
    "Index of Multiple Deprivation (IMD) Score": "imd2019_score",
    "Index of Multiple Deprivation (IMD) Rank (where 1 is most deprived)": "imd2019_rank",
    "Index of Multiple Deprivation (IMD) Decile (where 1 is most deprived 10% of LSOAs)": "imd2019_decile",
    "Income Score (rate)": "imd2019_income_score",
    "Income Rank (where 1 is most deprived)": "imd2019_income_rank",
    "Income Decile (where 1 is most deprived 10% of LSOAs)": "imd2019_income_decile",
    "Employment Score (rate)": "imd2019_employment_score",
    "Employment Rank (where 1 is most deprived)": "imd2019_employment_rank",
    "Employment Decile (where 1 is most deprived 10% of LSOAs)": "imd2019_employment_decile",
    "Education, Skills and Training Score": "imd2019_education_score",
    "Education, Skills and Training Rank (where 1 is most deprived)": "imd2019_education_rank",
    "Education, Skills and Training Decile (where 1 is most deprived 10% of LSOAs)": "imd2019_education_decile",
    "Health Deprivation and Disability Score": "imd2019_health_score",
    "Health Deprivation and Disability Rank (where 1 is most deprived)": "imd2019_health_rank",
    "Health Deprivation and Disability Decile (where 1 is most deprived 10% of LSOAs)": "imd2019_health_decile",
    "Crime Score": "imd2019_crime_score",
    "Crime Rank (where 1 is most deprived)": "imd2019_crime_rank",
    "Crime Decile (where 1 is most deprived 10% of LSOAs)": "imd2019_crime_decile",
    "Barriers to Housing and Services Score": "imd2019_housing_score",
    "Barriers to Housing and Services Rank (where 1 is most deprived)": "imd2019_housing_rank",
    "Barriers to Housing and Services Decile (where 1 is most deprived 10% of LSOAs)": "imd2019_housing_decile",
    "Living Environment Score": "imd2019_environment_score",
    "Living Environment Rank (where 1 is most deprived)": "imd2019_environment_rank",
    "Living Environment Decile (where 1 is most deprived 10% of LSOAs)": "imd2019_environment_decile",
    'Income Deprivation Affecting Children Index (IDACI) Score (rate)': "imd2019_income_children_score",
    'Income Deprivation Affecting Children Index (IDACI) Rank (where 1 is most deprived)': "imd2019_income_children_rank",
    'Income Deprivation Affecting Children Index (IDACI) Decile (where 1 is most deprived 10% of LSOAs)': "imd2019_income_children_decile",
    'Income Deprivation Affecting Older People (IDAOPI) Score (rate)': "imd2019_income_older_score",
    'Income Deprivation Affecting Older People (IDAOPI) Rank (where 1 is most deprived)': "imd2019_income_older_rank",
    'Income Deprivation Affecting Older People (IDAOPI) Decile (where 1 is most deprived 10% of LSOAs)': "imd2019_income_older_decile",
    'Children and Young People Sub-domain Score': "imd2019_education_childrenyp_score",
    'Children and Young People Sub-domain Rank (where 1 is most deprived)': "imd2019_education_childrenyp_rank",
    'Children and Young People Sub-domain Decile (where 1 is most deprived 10% of LSOAs)': "imd2019_education_childrenyp_decile",
    'Adult Skills Sub-domain Score': "imd2019_education_adultskills_score",
    'Adult Skills Sub-domain Rank (where 1 is most deprived)': "imd2019_education_adultskills_rank",
    'Adult Skills Sub-domain Decile (where 1 is most deprived 10% of LSOAs)': "imd2019_education_adultskills_decile",
    'Geographical Barriers Sub-domain Score': "imd2019_housing_geobarriers_score",
    'Geographical Barriers Sub-domain Rank (where 1 is most deprived)': "imd2019_housing_geobarriers_rank",
    'Geographical Barriers Sub-domain Decile (where 1 is most deprived 10% of LSOAs)': "imd2019_housing_geobarriers_decile",
    'Wider Barriers Sub-domain Score': "imd2019_housing_widerbarriers_score",
    'Wider Barriers Sub-domain Rank (where 1 is most deprived)': "imd2019_housing_widerbarriers_rank",
    'Wider Barriers Sub-domain Decile (where 1 is most deprived 10% of LSOAs)': "imd2019_housing_widerbarriers_decile",
    'Indoors Sub-domain Score': "imd2019_environment_indoors_score",
    'Indoors Sub-domain Rank (where 1 is most deprived)': "imd2019_environment_indoors_rank",
    'Indoors Sub-domain Decile (where 1 is most deprived 10% of LSOAs)': "imd2019_environment_indoors_decile",
    'Outdoors Sub-domain Score': "imd2019_environment_outdoors_score",
    'Outdoors Sub-domain Rank (where 1 is most deprived)': "imd2019_environment_outdoors_rank",
    'Outdoors Sub-domain Decile (where 1 is most deprived 10% of LSOAs)': "imd2019_environment_outdoors_decile",
    'Total population: mid 2015 (excluding prisoners)': "population_2015",
    'Dependent Children aged 0-15: mid 2015 (excluding prisoners)': "population_0_15_2015",
    'Population aged 16-59: mid 2015 (excluding prisoners)': "population_16_59_2015",
    'Older population aged 60 and over: mid 2015 (excluding prisoners)': "population_60plus_2015",
    'Working age population 18-59/64: for use with Employment Deprivation Domain (excluding prisoners) ': "population_workingage_2015"
}

In [4]:
imd = pd.read_csv(IMD_CSV).rename(columns=cols).set_index('lsoa11cd')

In [5]:
for i in ['population_0_15_2015', 'population_16_59_2015', 'population_60plus_2015', 'population_workingage_2015']:
    imd.loc[:, i + "_pc"] = imd[i] / imd["population_2015"]

In [6]:
imd.sample(5)

Unnamed: 0_level_0,lsoa11nm,la19cd,la19nm,imd2019_score,imd2019_rank,imd2019_decile,imd2019_income_score,imd2019_income_rank,imd2019_income_decile,imd2019_employment_score,...,imd2019_environment_outdoors_decile,population_2015,population_0_15_2015,population_16_59_2015,population_60plus_2015,population_workingage_2015,population_0_15_2015_pc,population_16_59_2015_pc,population_60plus_2015_pc,population_workingage_2015_pc
lsoa11cd,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
E01018076,Fenland 010B,E07000010,Fenland,17.082,16915,6,0.109,15121,5,0.085,...,10,2142,377,1184,581,1187,0.176004,0.552754,0.271242,0.554155
E01011336,Leeds 058C,E08000035,Leeds,58.939,1019,1,0.284,2742,1,0.224,...,2,1466,248,808,410,826,0.169168,0.55116,0.279673,0.563438
E01005372,Oldham 034A,E08000004,Oldham,32.309,6964,3,0.192,7173,3,0.142,...,4,1178,263,664,251,664,0.22326,0.563667,0.213073,0.563667
E01012995,East Riding of Yorkshire 041B,E06000011,East Riding of Yorkshire,21.373,13252,5,0.14,11464,4,0.115,...,7,2027,462,1185,380,1186,0.227923,0.584608,0.187469,0.585101
E01024546,Shepway 009D,E07000112,Folkestone and Hythe,20.065,14292,5,0.086,18573,6,0.092,...,9,1687,276,891,520,911,0.163604,0.528156,0.308239,0.540012


In [7]:
imd[['lsoa11nm', 'la19nm', 'population_60plus_2015_pc']].sort_values('population_60plus_2015_pc', ascending=False).head(5)

Unnamed: 0_level_0,lsoa11nm,la19nm,population_60plus_2015_pc
lsoa11cd,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
E01026707,King's Lynn and West Norfolk 017D,King's Lynn and West Norfolk,0.689794
E01020932,Eastbourne 012B,Eastbourne,0.681466
E01019355,South Lakeland 013D,South Lakeland,0.674803
E01019957,East Devon 012B,East Devon,0.668129
E01019913,East Devon 020B,East Devon,0.657706


In [8]:
imd[['lsoa11nm', 'la19nm', 'population_0_15_2015_pc']].sort_values('population_0_15_2015_pc', ascending=False).head(5)

Unnamed: 0_level_0,lsoa11nm,la19nm,population_0_15_2015_pc
lsoa11cd,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
E01005614,Salford 016E,Salford,0.529848
E01005044,Bury 026E,Bury,0.479339
E01002052,Haringey 029C,Haringey,0.450137
E01033183,Richmondshire 004G,Richmondshire,0.440051
E01031988,Wiltshire 045B,Wiltshire,0.432031


In [30]:
# r = requests.get(LSOA_BOUNDARIES)
# lsoa = r.json()
with open("lsoa.geojson") as a:
    lsoa = json.load(a)

In [31]:
add_cols = imd.dtypes[imd.dtypes.apply(pd.api.types.is_numeric_dtype)].index.tolist()
add_cols = [c for c in imd.columns if c.endswith('_rank') or c.endswith('nm')] + ['imd2019_decile']
new_lsoa = {
    "type": 'FeatureCollection',
    "features": []
}
for i in lsoa['features']:
    if i["properties"]['lsoa11cd'] in imd.index:
        i['properties'].update(
            imd.loc[i["properties"]['lsoa11cd'], add_cols].to_dict()
        )
        new_lsoa["features"].append(i)
lsoa = new_lsoa

In [32]:
for i in lsoa['features'][0:10]:
    print(json.dumps(i['properties'], indent=4, cls=NumpyEncoder))

{
    "objectid": 1,
    "lsoa11cd": "E01000001",
    "lsoa11nm": "City of London 001A",
    "lsoa11nmw": "City of London 001A",
    "st_areashape": 133320.7688715,
    "st_lengthshape": 2291.846072147722,
    "la19nm": "City of London",
    "imd2019_rank": 29199,
    "imd2019_income_rank": 32831,
    "imd2019_employment_rank": 32742,
    "imd2019_education_rank": 32842,
    "imd2019_health_rank": 32113,
    "imd2019_crime_rank": 32662,
    "imd2019_housing_rank": 7319,
    "imd2019_environment_rank": 7789,
    "imd2019_income_children_rank": 32806,
    "imd2019_income_older_rank": 32820,
    "imd2019_education_childrenyp_rank": 32777,
    "imd2019_education_adultskills_rank": 32843,
    "imd2019_housing_geobarriers_rank": 22985,
    "imd2019_housing_widerbarriers_rank": 3216,
    "imd2019_environment_indoors_rank": 16364,
    "imd2019_environment_outdoors_rank": 1615,
    "imd2019_decile": 9
}
{
    "objectid": 2,
    "lsoa11cd": "E01000002",
    "lsoa11nm": "City of London 001B",
   

In [33]:
with open('imd2019.geojson', 'w') as a:
    json.dump(lsoa, a, cls=NumpyEncoder)

In [34]:
len(lsoa["features"])

32844