# Notebook to engineer IMD and sub-indices, in the UK

In [None]:
import csv
import numpy as np
import pandas as pd

In [None]:
imd = pd.read_csv("../original_data/imd_uk_2019.csv")
llsoa_to_ward = pd.read_csv("../original_data/llsoa_to_ward_2017.csv")

### Filtering the data

In [None]:
columns = ['LSOA code (2011)',
 'LSOA name (2011)',
 'Index of Multiple Deprivation (IMD) Score',
 'Income Score (rate)',
 'Employment Score (rate)',
 'Education, Skills and Training Score',
 'Health Deprivation and Disability Score',
 'Crime Score',
 'Barriers to Housing and Services Score',
 'Living Environment Score',
 'Income Deprivation Affecting Children Index (IDACI) Score (rate)',
 'Income Deprivation Affecting Older People (IDAOPI) Score (rate)',
 'Children and Young People Sub-domain Score',
 'Adult Skills Sub-domain Score',
 'Geographical Barriers Sub-domain Score',
 'Wider Barriers Sub-domain Score',
 'Indoors Sub-domain Score',
 'Outdoors Sub-domain Score']

imd_per_lsoa = imd[columns]
lsoa_to_ward = llsoa_to_ward[['LSOA11CD', 'WD17CD']]

### Computing the median, per ward

In [None]:
imd_per_lsoa_ward = imd_per_lsoa.merge(lsoa_to_ward, left_on="LSOA code (2011)", right_on="LSOA11CD")
imd_per_lsoa_ward = imd_per_lsoa_ward.drop(columns=['LSOA11CD'])

imd_per_ward = pd.DataFrame(columns=imd_per_lsoa_ward.columns).drop(columns=['LSOA code (2011)','LSOA name (2011)'])
for i in imd_per_lsoa_ward['WD17CD'].unique():
    imd_per_ward = imd_per_ward.append(imd_per_lsoa_ward[imd_per_lsoa_ward['WD17CD'] == i].median().append(pd.Series([i], ['WD17CD'])), ignore_index=True)

### Adding a column with the ward number

In [None]:
col_name = "WD17CD"
first_col = imd_per_ward.pop(col_name)
imd_per_ward.insert(0, col_name, first_col)
imd_per_ward.head()

### Saving to .csv

In [None]:
imd_per_ward.to_csv("../data/imd_per_ward.csv", index=False)