# Importing packages

In [39]:
import pandas as pd
import numpy as np
import openpyxl
import os
import requests
from helpers import data_folder #function to check for data folder and create it if not there
from helpers import data_checker #function to check for a specific file inside the data folder
import json
import folium
import zipfile

data_folder()

Data checker will look for the data folder. If none is found then it will create it.
Successfully created the directory Data 


# Importing Index of Deprivation Data
These data are published separately for England, Scotland, Wales and Northern Ireland.

First we will import the data for England, which is available from the MHCLG website.

#### First, download the English data

In [40]:
if not data_checker('england_imd_2019.xlsx'):
    #this url links to the MHCLG website that hosts the table on English IMD by LSOA. The data come in an xlsx file.
    url = 'https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/833970/File_1_-_IMD2019_Index_of_Multiple_Deprivation.xlsx'
    r = requests.get(url, allow_redirects = True)
    with open (os.path.join('Data', 'england_imd_2019.xlsx'), 'wb') as f:
        f.write(r.content)

Data has already been downloaded


In [41]:
#import data into pandas dataframe
df = pd.read_excel(os.path.join('Data', 'england_imd_2019.xlsx'), sheet_name = 'IMD2019')
df.shape

(32844, 6)

In [42]:
df.head()

Unnamed: 0,LSOA code (2011),LSOA name (2011),Local Authority District code (2019),Local Authority District name (2019),Index of Multiple Deprivation (IMD) Rank,Index of Multiple Deprivation (IMD) Decile
0,E01000001,City of London 001A,E09000001,City of London,29199,9
1,E01000002,City of London 001B,E09000001,City of London,30379,10
2,E01000003,City of London 001C,E09000001,City of London,14915,5
3,E01000005,City of London 001E,E09000001,City of London,8678,3
4,E01000006,Barking and Dagenham 016A,E09000002,Barking and Dagenham,14486,5


## Clean up the data

In [43]:
#first, change column titles to something a bit better...
df = df.rename(columns = {'LSOA code (2011)': 'lsoa_code_2011', 'LSOA name (2011)': 'lsoa_name_211', 'Local Authority District code (2019)': 'local_authority_district_code', 'Local Authority District name (2019)': 'local_authority_district', 'Index of Multiple Deprivation (IMD) Rank': 'imd_rank', 'Index of Multiple Deprivation (IMD) Decile': 'imd_decile'})


In [44]:
df.head()

Unnamed: 0,lsoa_code_2011,lsoa_name_211,local_authority_district_code,local_authority_district,imd_rank,imd_decile
0,E01000001,City of London 001A,E09000001,City of London,29199,9
1,E01000002,City of London 001B,E09000001,City of London,30379,10
2,E01000003,City of London 001C,E09000001,City of London,14915,5
3,E01000005,City of London 001E,E09000001,City of London,8678,3
4,E01000006,Barking and Dagenham 016A,E09000002,Barking and Dagenham,14486,5


In [49]:
df.dtypes

lsoa_code_2011                   object
lsoa_name_211                    object
local_authority_district_code    object
local_authority_district         object
imd_rank                          int64
imd_decile                        int64
dtype: object

#### Now, get the Scottish data

## Download LSOA to Postcode lookup (additional location labels)
Use data_checker to check whether postcodes.csv is already in the data file. If not, then script will download and extract a lookup csv spreadsheet from the UK government geoportal containing postcodes to LSOA to MSOA info. This will allow us to join data gathered on households to particular areas, and facilitate further analysis and mapping.

In [46]:
if not data_checker('postcodes.csv'):
    print('Downloading LSOA to postcode lookup...')
    #This is data for the whole of the UK:
    lookup_url = 'https://www.arcgis.com/sharing/rest/content/items/940cf89bf08e4459bdf0470bbd345424/data' 
    r = requests.get(lookup_url, allow_redirects=True)
    
    #first, create the path destination for the downloaded zip file
    zip_dir = os.path.join('Data', 'postcodes.zip') 
    
    #now create the zip file and write the content of our requests object (i.e. the zip file)
    with open(zip_dir, 'wb') as zip_f: 
        zip_f.write(r.content)
    print('File downloaded, now extracting data...')
    
    #extract the csv file from the zip archive
    with zipfile.ZipFile(zip_dir, 'r') as zip_f: 
        zip_f.extract('PCD_OA_LSOA_MSOA_LAD_NOV19_UK_LU.csv', path = 'Data')
        
    #get rid of unneeded zip file
    os.remove(zip_dir) 
    
    #rename unnecessarily long csv name to 'postcodes'csv:
    os.rename(os.path.join('Data', 'PCD_OA_LSOA_MSOA_LAD_NOV19_UK_LU.csv'), os.path.join('Data', 'postcodes.csv')) 
    print('Done, file is saved as data/postcodes.csv')
else:
    print('Postcodes.csv already exists in the data file, no need to download it!')

Data has already been downloaded
Postcodes.csv already exists in the data file, no need to download it!


## Now join English IMD data to additional location labels

## Importing Geographical data and displaying in folium

In [47]:
m = folium.Map(location = [55.09, -1.5], zoom_start = 5) #pretty much centres it on the UK
#m #uncomment to view starting map


In [48]:
url = 'https://opendata.arcgis.com/datasets/c892586698ad4d268f9288f1df20ab77_0.geojson'

folium.TopoJson(json.loads(requests.get(url).text), 'objects.ana')

<folium.features.TopoJson at 0x203e5faf970>