# Importing packages

In [3]:
import pandas as pd
import numpy as np
import openpyxl
import os
import requests
from helpers import data_folder #function to check for data folder and create it if not there
from helpers import data_checker #function to check for a specific file inside the data folder
import json
import folium

data_folder()

Data checker will look for the data folder. If none is found then it will create it.
Successfully created the directory Data 


# Importing Index of Deprivation Data
These data are published separately for England, Scotland, Wales and Northern Ireland.

First we will import the data for England, which is available from the MHCLG website.

In [8]:
if not data_checker('england_imd_2019.xlsx'):
    #this url links to the MHCLG website that hosts the table on English IMD by LSOA. The data come in an xlsx file.
    url = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/833970/File_1_-_IMD2019_Index_of_Multiple_Deprivation.xlsx'
    r = requests.get(url, allow_redirects = True)
    with open (os.path.join('Data', 'england_imd_2019.xlsx'), 'wb') as f:
        f.write(r.content)

Data has already been downloaded


In [20]:
#import data into pandas dataframe
df = pd.read_excel(os.path.join('Data', 'england_imd_2019.xlsx'), sheet_name = 'IMD2019')
df.shape

(32844, 6)

In [21]:
df.head()

Unnamed: 0,LSOA code (2011),LSOA name (2011),Local Authority District code (2019),Local Authority District name (2019),Index of Multiple Deprivation (IMD) Rank,Index of Multiple Deprivation (IMD) Decile
0,E01000001,City of London 001A,E09000001,City of London,29199,9
1,E01000002,City of London 001B,E09000001,City of London,30379,10
2,E01000003,City of London 001C,E09000001,City of London,14915,5
3,E01000005,City of London 001E,E09000001,City of London,8678,3
4,E01000006,Barking and Dagenham 016A,E09000002,Barking and Dagenham,14486,5


## Clean up the data

In [24]:
#first, change column titles to something a bit better...
df = df.rename(columns = {'LSOA code (2011)': 'lsoa_code_2011', 'LSOA name (2011)': 'lsoa_name_211', 'Local Authority District code (2019)': 'local_authority_district_code', 'Local Authority District name (2019)': 'local_authority_district', 'Index of Multiple Deprivation (IMD) Rank': 'imd_rank', 'Index of Multiple Deprivation (IMD) Decile': 'imd_decile'})


In [25]:
df.head()

Unnamed: 0,lsoa_code_2011,lsoa_name_211,local_authority_district_code,local_authority_district,imd_rank,imd_decile
0,E01000001,City of London 001A,E09000001,City of London,29199,9
1,E01000002,City of London 001B,E09000001,City of London,30379,10
2,E01000003,City of London 001C,E09000001,City of London,14915,5
3,E01000005,City of London 001E,E09000001,City of London,8678,3
4,E01000006,Barking and Dagenham 016A,E09000002,Barking and Dagenham,14486,5


In [26]:
df.dtypes

lsoa_code_2011                   object
lsoa_name_211                    object
local_authority_district_code    object
local_authority_district         object
imd_rank                          int64
imd_decile                        int64
dtype: object

## Importing Geographical data and displaying in folium

In [20]:
m = folium.Map(location = [55.09, -1.5], zoom_start = 5) #pretty much centres it on the UK
#m #uncomment to view starting map


In [None]:
url = 'https://opendata.arcgis.com/datasets/c892586698ad4d268f9288f1df20ab77_0.geojson'

folium.TopoJson(json.loads(requests.get(url).text), 'objects.ana')