In [1]:
import pandas as pd
import numpy as np

#### Opening and Merging XLSXs

In [2]:
dir = "/Users/finn/Documents/GitHub/FM-ds.github.io/Regional_Inequality/Untoutched Data/NUTS3 Incomes/"
paths = ["regionalgrossdisposablehouseholdincomelocalauthoritytlcnortheast.xls",
"regionalgrossdisposablehouseholdincomelocalauthoritytlnnorthernireland.xls",
"regionalgrossdisposablehouseholdincomelocalauthoritytlmscotland.xls",
"regionalgrossdisposablehouseholdincomelocalauthoritytllwales.xls",
"regionalgrossdisposablehouseholdincomelocalauthoritytlksouthwest.xls",
"regionalgrossdisposablehouseholdincomelocalauthoritytljsoutheast.xls",
"regionalgrossdisposablehouseholdincomelocalauthoritytlilondon.xls",
"regionalgrossdisposablehouseholdincomelocalauthoritytlheastofengland.xls",
"regionalgrossdisposablehouseholdincomelocalauthoritytlgwestmidlands.xls",
"regionalgrossdisposablehouseholdincomelocalauthoritytlfeastmidlands.xls",
"regionalgrossdisposablehouseholdincomelocalauthoritytleyorksandhumber.xls",
"regionalgrossdisposablehouseholdincomelocalauthoritytldnorthwest.xls"]

Creating and appending DFs to merge:

In [3]:

df = pd.read_excel(dir+paths[0], sheet_name='Table 2', skiprows=1)
df_names = list(df.columns) #prevents a weird issue where there are almost identical column names: e.g. 2016 vs '2016'"merge NUTS3 Data.ipynb"
for path in paths[1:]:
    new_df = pd.read_excel(dir+path, sheet_name='Table 2', skiprows=1, names = df_names)
    #print(list(new_df.columns))
    df = pd.concat([df, new_df], ignore_index=True)
df = df[~df['Region'].astype(str).str.startswith('Note 1')]
df = df[df['Region'].notna()]

years = range(1997,2020)
years = [str(y) for y in list(years)]

df.columns = list(df.columns[:-23])+ years
df
df.to_csv("temp.csv")



In [5]:
len(set(df['LAD code'].values))

374

In [20]:
df.to_pickle('LAD_incomes.pkl')

In [18]:
set(df.Region.values)

{'East Midlands',
 'East of England',
 'London',
 'North East',
 'North West',
 'Northern Ireland',
 'Scotland',
 'South East',
 'South West',
 'Wales',
 'West Midlands',
 'Yorkshire and The Humber'}

#### Associating Income Data with GeoJSON

In [6]:
import json
json_path = "G:\\My Drive\\4th Year\\Data Science\\Regional Inequality\\Cleaned Data\\working lowpoly\\lowpoly.json"

with open(json_path) as f:
    d = json.load(f)

for feature in d['features']:
    for year in years:
        feature['properties']['GDHI_'+str(year)] = int(df[df['Region name']==feature['properties']['LAD21NM']][year])
    

dumping our combined (geo)Json object:

In [7]:
with open('lowpoly.json', 'w') as f:
    json.dump(d, f)

#### Dodgy Validation Code

In [7]:
big1=-1.239631
small1=-1.239631
big2=54.72387
small2=54.72387
for feature in d['features']:
    print(feature['properties']['LAD21NM'])
    for coord in feature['geometry']['coordinates'][0]:
        if len(coord) != 2:
            inner_coords = coord[0]
            for inner_coord in inner_coords:
                if inner_coord[0] > big1:
                    big1 =  coord[0]
                if inner_coord[0] < small1:
                    small1 =  coord[0]
                if inner_coord[1] > big2:
                    big2 =  coord[1]
                if inner_coord[1] < small2:
                    small2 =  coord[1]
        else:    
            print(coord)
            if coord[0] > big1:
                big1 =  coord[0]
            if coord[0] < small1:
                small1 =  coord[0]
            if coord[1] > big2:
                big2 =  coord[1]
            if coord[1] < small2:
                small2 =  coord[1]    
non_matches = 0
for feature in d["features"]:
    non_matches += int((df['Region name'].value_counts()[feature['properties']['LAD21NM']])!=1)
non_matches

Hartlepool
[-1.239631477999978, 54.723875089000046]
[-1.180807560999938, 54.70240667100006]
[-1.198188326999968, 54.68543699600008]
[-1.180545990999974, 54.65991165600008]
[-1.147540267999943, 54.64798683500004]
[-1.157984666999937, 54.62948959600004]
[-1.187314286999936, 54.63172272200006]
[-1.212628622999944, 54.621726729000045]
[-1.299014272999955, 54.62798188800008]
[-1.380898315999957, 54.643917068000064]
[-1.341375058999972, 54.65018898900007]
[-1.346046617999946, 54.664470994000055]
[-1.290477033999935, 54.71839258400007]
[-1.270640929999956, 54.72702718800008]
[-1.239631477999978, 54.723875089000046]
Middlesbrough
[-1.198605146999967, 54.582867597000075]
[-1.166662912999925, 54.554053045000046]
[-1.199063101999968, 54.54449723300007]
[-1.167804462999925, 54.526965362000055]
[-1.146196948999943, 54.50282161200005]
[-1.234865905999925, 54.51031604500008]
[-1.28191211099994, 54.51823549900007]
[-1.285431710999944, 54.53624995400003]
[-1.251121834999935, 54.59152980700003]
[-1.1986

TypeError: 'NoneType' object is not subscriptable

0

#### Pre-Baking Quantiles

In [8]:
[int(x) for x in list(df['2019'].quantile(np.arange(0,1.05,0.05)))]

[13381,
 15867,
 16485,
 17000,
 17337,
 17942,
 18511,
 18828,
 19318,
 19817,
 20236,
 20743,
 21519,
 22196,
 22888,
 23570,
 24607,
 26320,
 28577,
 30351,
 200903]

In [9]:
years = [str(x) for x in list(range(1997,2020))]
quantiles = {}
for year in years:
    quantiles['GDHI_'+year]=[int(x) for x in list(df[year].quantile(np.arange(0,1.05,0.05)))]

Our Quantiles are 
[0, 0.05, 0.1, ... , 0.95, 1]

In [10]:
json.dumps(quantiles)

'{"GDHI_1997": [5875, 8106, 8446, 8738, 8937, 9164, 9349, 9599, 9798, 10126, 10329, 10701, 11009, 11404, 11698, 12121, 12545, 13593, 14506, 15885, 89249], "GDHI_1998": [6385, 8297, 8536, 8871, 9069, 9321, 9505, 9738, 10016, 10249, 10544, 10871, 11236, 11652, 11927, 12295, 12990, 14026, 15013, 16264, 98239], "GDHI_1999": [7091, 8636, 8988, 9323, 9525, 9720, 9886, 10126, 10420, 10649, 11077, 11361, 11774, 12045, 12397, 12727, 13314, 14463, 15609, 17103, 90832], "GDHI_2000": [7747, 9148, 9479, 9828, 10079, 10309, 10504, 10725, 10954, 11260, 11668, 11989, 12362, 12638, 13196, 13508, 14027, 15208, 16577, 18176, 95511], "GDHI_2001": [8114, 9473, 9939, 10256, 10504, 10675, 10918, 11071, 11398, 11772, 12034, 12306, 12789, 13130, 13568, 14030, 14678, 15646, 17034, 18478, 94159], "GDHI_2002": [8838, 9753, 10211, 10608, 10810, 11046, 11234, 11477, 11772, 12127, 12340, 12684, 13277, 13604, 14016, 14450, 15025, 16032, 17457, 18859, 95878], "GDHI_2003": [9181, 10015, 10562, 10879, 11128, 11405, 1161