In [1]:
import pandas as pd
import numpy as np
import pickle

import re

import warnings

import csv

import dash
from dash import dcc, html
from dash.dependencies import Input, Output

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from plotly.subplots import make_subplots 

import geopandas as gpd
import shapely.geometry as sg
from shapely.geometry import MultiPolygon, Polygon


import folium
from folium.plugins import DualMap, HeatMap

import plotly.express as px
import plotly.graph_objs as go

from dbfread import DBF

# Defining functions

In [2]:
# defining dissimilarity function
def dissimilarity(df):
    dissimilarity_results = {}
    for col in df.columns:
        col_numerator = []
        for i in range(df.shape[0]):
            col_numerator.append((df.iloc[i,:].sum() / df.sum().sum()) *
                                 np.abs((df.loc[i, col] / df.iloc[i,:].sum() - (df[col].sum() / df.sum().sum()))))
        col_numerator = sum(col_numerator)
        col_denominator = 2 * (df[col].sum() / df.sum().sum()) * (1 - (df[col].sum() / df.sum().sum()))
        dissimilarity_results[col] = round(col_numerator / col_denominator, 3)

        # print(f'dissimilarity {col} = {col_numerator/col_denominator}')
    return dissimilarity_results              
        

In [3]:
#| include: false

# defining simpson function

def simpson(df):
    simpson_series = []
    for i in range(df.shape[0]):
        area_ethnic_fraction_2 = []
        for col in df.columns:
            if col != 'other':
                area_ethnic_fraction_2.append((df.loc[i,col]/(df.iloc[i,:].sum()))**2)
        sum_area_ethnic_fraction_2 = sum(area_ethnic_fraction_2)
        simpson_series.append(sum_area_ethnic_fraction_2)
        
    simpson_series = pd.Series(simpson_series)
    simpson_index = 0
    for i in range(df.shape[0]):
        simpson_index += (simpson_series.iloc[i] * df.iloc[i, :].sum() / df.sum().sum())
    
    area_simpson = []
    for col in df.columns:
        if col != 'other':
            area_simpson.append((df[col].sum()/df.sum().sum())**2)
    area_simpson_index = sum(area_simpson)
    simpson_index = round(simpson_index, 3)
    area_simpson_index = round(area_simpson_index, 3)
    return [simpson_index, area_simpson_index]

In [4]:
# defining moran I function

def moran(df,border):
    positive_weights = []
    for key in border.keys():
        positive_weights.append(len(border[key]))
       
    fraction = {}
    for col in df.columns:
        # df[col] = pd.to_numeric(df[col], errors='coerce')
        fraction[col] = []
        for i in df.index:
            fraction[col].append(df.loc[i, col] / df.loc[i, :].sum())
    
    col_moran = {} 
    for col in df.columns:
        col_moran_list = []
        for i in df.index:
            morani = []
            for common in border[i]:    
                morani.append(((df.loc[i, col] / df.loc[i, :].sum()) - np.mean(fraction[col])) * ((df.loc[common, col] / df.loc[common, :].sum()) - np.mean(fraction[col])))
            row_moran = sum(morani)
            col_moran_list.append(row_moran)
        col_moran[col] = col_moran_list
    moran_results= {}
    for col in df.columns:
        moran_numerator = sum(col_moran[col])*df.shape[0]
        moran_denominator = sum((fraction[col] - np.mean(fraction[col]))**2)*sum(positive_weights)
        # moran_index = moran_numerator/moran_denominator
        moran_results[col] = round(moran_numerator/moran_denominator, 3)
    return moran_results


# Preprocessing

In [5]:
data_2001 = pd.read_csv('Census2001\\census2001.csv')

In [6]:
data_2001.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 175434 entries, 0 to 175433
Data columns (total 18 columns):
 #   Column                                Non-Null Count   Dtype 
---  ------                                --------------   ----- 
 0   2001 output area                      175434 non-null  object
 1   mnemonic                              175434 non-null  object
 2   White: British                        175434 non-null  int64 
 3   White: Irish                          175434 non-null  int64 
 4   White: Other                          175434 non-null  int64 
 5   Mixed: White and Black Caribbean      175434 non-null  int64 
 6   Mixed: White and Black African        175434 non-null  int64 
 7   Mixed: White and Asian                175434 non-null  int64 
 8   Mixed: Other                          175434 non-null  int64 
 9   Asian/Asian British: Indian           175434 non-null  int64 
 10  Asian/Asian British: Pakistani        175434 non-null  int64 
 11  Asian/Asian B

In [7]:
data_2001.head()

Unnamed: 0,2001 output area,mnemonic,White: British,White: Irish,White: Other,Mixed: White and Black Caribbean,Mixed: White and Black African,Mixed: White and Asian,Mixed: Other,Asian/Asian British: Indian,Asian/Asian British: Pakistani,Asian/Asian British: Bangladeshi,Asian/Asian British: Other,Black/Black British: Black Caribbean,Black/Black British: Black African,Black/Black British: Other,Chinese/Other: Chinese,Chinese/Other: Other
0,45UBFQ0001,E00159790,271,0,5,0,0,3,0,0,0,0,0,0,0,0,0,0
1,45UBFQ0002,E00159791,318,3,6,0,4,3,0,0,0,0,0,0,0,0,0,3
2,45UBFQ0003,E00159792,298,3,3,3,3,3,0,0,5,0,0,0,0,0,3,0
3,45UBFQ0004,E00159793,310,3,4,0,3,0,0,0,0,0,3,0,4,0,0,0
4,45UBFQ0005,E00159794,289,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [8]:
melt_2001 = data_2001.melt(id_vars=['2001 output area','mnemonic'], value_vars= data_2001.columns[2:], 
                           var_name='Ethnic group', value_name='Observation')
melt_2001.head()

Unnamed: 0,2001 output area,mnemonic,Ethnic group,Observation
0,45UBFQ0001,E00159790,White: British,271
1,45UBFQ0002,E00159791,White: British,318
2,45UBFQ0003,E00159792,White: British,298
3,45UBFQ0004,E00159793,White: British,310
4,45UBFQ0005,E00159794,White: British,289


In [9]:
melt_2001['Ethnic group'].unique()

array(['White: British', 'White: Irish', 'White: Other',
       'Mixed: White and Black Caribbean',
       'Mixed: White and Black African', 'Mixed: White and Asian',
       'Mixed: Other', 'Asian/Asian British: Indian',
       'Asian/Asian British: Pakistani',
       'Asian/Asian British: Bangladeshi', 'Asian/Asian British: Other',
       'Black/Black British: Black Caribbean',
       'Black/Black British: Black African', 'Black/Black British: Other',
       'Chinese/Other: Chinese', 'Chinese/Other: Other'], dtype=object)

In [10]:
melt_2001['ethnicity'] = melt_2001['Ethnic group'].apply( lambda x: x.split(':')[0])
melt_2001['sub_ethnicity'] = melt_2001['Ethnic group'].apply( lambda x: x.split(':')[1])
melt_2001.drop('Ethnic group', axis= 1, inplace= True)
melt_2001['sub_ethnicity'] = melt_2001['sub_ethnicity'].apply(str.lstrip)
melt_2001.head()

Unnamed: 0,2001 output area,mnemonic,Observation,ethnicity,sub_ethnicity
0,45UBFQ0001,E00159790,271,White,British
1,45UBFQ0002,E00159791,318,White,British
2,45UBFQ0003,E00159792,298,White,British
3,45UBFQ0004,E00159793,310,White,British
4,45UBFQ0005,E00159794,289,White,British


In [11]:
melt_2001.ethnicity.unique()

array(['White', 'Mixed', 'Asian/Asian British', 'Black/Black British',
       'Chinese/Other'], dtype=object)

In [12]:
melt_2001['ethnicity'] = melt_2001['ethnicity'].apply(lambda x: 'white' if 'White' in x else
                                                                'asian' if 'Asian' in x else 
                                                                'asian' if 'Chinese' in x else
                                                                'black' if 'Black' in x else 'other')

In [13]:
melt_2001

Unnamed: 0,2001 output area,mnemonic,Observation,ethnicity,sub_ethnicity
0,45UBFQ0001,E00159790,271,white,British
1,45UBFQ0002,E00159791,318,white,British
2,45UBFQ0003,E00159792,298,white,British
3,45UBFQ0004,E00159793,310,white,British
4,45UBFQ0005,E00159794,289,white,British
...,...,...,...,...,...
2806939,36UCHT0004,E00140745,0,asian,Other
2806940,36UCHT0005,E00140746,0,asian,Other
2806941,36UCHT0006,E00140747,0,asian,Other
2806942,36UCHT0007,E00140748,0,asian,Other


In [14]:
melt_2001.ethnicity.unique()

array(['white', 'other', 'asian', 'black'], dtype=object)

In [15]:
lookup_2001= pd.read_csv('Census2001\\lookup2001.csv')
lookup_2011= pd.read_csv('Census2011\\lookup2011.csv', low_memory= False)
lookup_2001.head()

Unnamed: 0,OA01CD,LSOA01CD,LSOA01NM,MSOA01CD,MSOA01NM,ObjectId
0,00AAFA0001,E01000001,City of London 001A,E02000001,City of London 001,1
1,00ABGA0017,E01000031,Barking and Dagenham 002A,E02000003,Barking and Dagenham 002,2
2,00AAFA0002,E01000001,City of London 001A,E02000001,City of London 001,3
3,00AAFA0003,E01000001,City of London 001A,E02000001,City of London 001,4
4,00AAFA0004,E01000001,City of London 001A,E02000001,City of London 001,5


In [16]:
# OA01CD and mnemonic in 2001 census data are not the same, so I found a lookup file to connect these dataset
# I only selected the columns that I need.
connection = pd.read_csv('Census2001\\Output_Area_(2001)_to_Output_Area_(2011)_to_Local_Authority_District_(2011)_Lookup_in_England_and_Wales.csv', usecols=['OA01CD','OA01CDO'])
connection.head()

Unnamed: 0,OA01CD,OA01CDO
0,E00000001,00AAFA0001
1,E00000051,00ABFX0015
2,E00000002,00AAFA0002
3,E00000003,00AAFA0003
4,E00000004,00AAFA0004


In [17]:
connection.shape

(181979, 2)

In [18]:
# there were duplicated rows in the connection dataset so I drop them. Now number of rows in the connection file and 
# data_2001 are the same
connection.drop_duplicates(subset= 'OA01CD', inplace= True)
connection.shape

(175434, 2)

In [19]:
melt_2001 = pd.merge(connection, melt_2001, left_on= 'OA01CD', right_on= 'mnemonic', how= 'left')
melt_2001.drop(['mnemonic','2001 output area'], axis= 1, inplace= True)
melt_2001.head()

Unnamed: 0,OA01CD,OA01CDO,Observation,ethnicity,sub_ethnicity
0,E00000001,00AAFA0001,170,white,British
1,E00000001,00AAFA0001,4,white,Irish
2,E00000001,00AAFA0001,28,white,Other
3,E00000001,00AAFA0001,0,other,White and Black Caribbean
4,E00000001,00AAFA0001,0,other,White and Black African


In [20]:
lookup_2001

Unnamed: 0,OA01CD,LSOA01CD,LSOA01NM,MSOA01CD,MSOA01NM,ObjectId
0,00AAFA0001,E01000001,City of London 001A,E02000001,City of London 001,1
1,00ABGA0017,E01000031,Barking and Dagenham 002A,E02000003,Barking and Dagenham 002,2
2,00AAFA0002,E01000001,City of London 001A,E02000001,City of London 001,3
3,00AAFA0003,E01000001,City of London 001A,E02000001,City of London 001,4
4,00AAFA0004,E01000001,City of London 001A,E02000001,City of London 001,5
...,...,...,...,...,...,...
175429,47UGGM0012,E01032482,Wyre Forest 005D,E02006771,Wyre Forest 005,175430
175430,47UGGM0013,E01032482,Wyre Forest 005D,E02006771,Wyre Forest 005,175431
175431,47UGGM0014,E01032481,Wyre Forest 005C,E02006771,Wyre Forest 005,175432
175432,47UGGM0015,E01032481,Wyre Forest 005C,E02006771,Wyre Forest 005,175433


In [21]:
merged_2001 = pd.merge(melt_2001, lookup_2001, left_on='OA01CDO', right_on='OA01CD', how= 'left')
merged_2001.head(3)

Unnamed: 0,OA01CD_x,OA01CDO,Observation,ethnicity,sub_ethnicity,OA01CD_y,LSOA01CD,LSOA01NM,MSOA01CD,MSOA01NM,ObjectId
0,E00000001,00AAFA0001,170,white,British,00AAFA0001,E01000001,City of London 001A,E02000001,City of London 001,1
1,E00000001,00AAFA0001,4,white,Irish,00AAFA0001,E01000001,City of London 001A,E02000001,City of London 001,1
2,E00000001,00AAFA0001,28,white,Other,00AAFA0001,E01000001,City of London 001A,E02000001,City of London 001,1


In [22]:
column_order = ['OA01CDO', 'OA01CD_x', 'LSOA01CD', 'MSOA01CD', 'LSOA01NM',  
                'MSOA01NM', 'ethnicity', 'sub_ethnicity', 'Observation']
merged_2001 = merged_2001[column_order]
merged_2001.rename(columns={'OA01CD_x':'OA01CD'}, inplace= True)
merged_2001.head(5)

Unnamed: 0,OA01CDO,OA01CD,LSOA01CD,MSOA01CD,LSOA01NM,MSOA01NM,ethnicity,sub_ethnicity,Observation
0,00AAFA0001,E00000001,E01000001,E02000001,City of London 001A,City of London 001,white,British,170
1,00AAFA0001,E00000001,E01000001,E02000001,City of London 001A,City of London 001,white,Irish,4
2,00AAFA0001,E00000001,E01000001,E02000001,City of London 001A,City of London 001,white,Other,28
3,00AAFA0001,E00000001,E01000001,E02000001,City of London 001A,City of London 001,other,White and Black Caribbean,0
4,00AAFA0001,E00000001,E01000001,E02000001,City of London 001A,City of London 001,other,White and Black African,0


In [23]:
# Creating a new column for LAD names in 2001
merged_2001['LAD01NM'] = merged_2001['MSOA01NM'].apply(lambda x: re.sub(r'\d+', '', x).strip())

In [24]:
# modifying LAD names based on the LAD names in 2011
merged_2001['LAD01NM'] = merged_2001['LAD01NM'].apply(lambda x: 'Northumberland' if x == 'Alnwick' else
                                                                    'Northumberland' if x == 'Berwick-upon-Tweed' else
                                                                    'Northumberland'  if x == 'Blyth Valley' else 
                                                                    'Shropshire'  if x == 'Bridgnorth' else
                                                                    'Bristol, City of'  if x == 'Bristol' else
                                                                    'Cornwall' if x == 'Caradon' else
                                                                    'Cornwall' if x == 'Carrick' else
                                                                    'Northumberland' if x == 'Castle Morpeth' else 
                                                                    'Cheshire West and Chester' if x == 'Chester' else 
                                                                    'County Durham' if x == 'Chester-le-Street' else
                                                                    'Cheshire West and Chester' if x == 'Congleton' else
                                                                    'Cheshire West and Chester' if x == 'Crewe and Nantwich' else
                                                                    'County Durham' if x == 'Derwentside' else
                                                                    'County Durham' if x == 'Durham' else
                                                                    'County Durham' if x == 'Easington' else
                                                                    'Cheshire West and Chester' if x == 'Ellesmere Port & Neston' else 
                                                                    'Herefordshire, County of' if x == 'Herefordshire' else 
                                                                    'Cornwall' if x == 'Kerrier' else
                                                                    'Kingston upon Hull, City of' if x == 'Kingston upon Hull' else
                                                                    'Cheshire East' if x == 'Macclesfield' else
                                                                    'Rhondda Cynon Taf' if x == 'Rhondda, Cynon, Taff' else
                                                                    'Central Bedfordshire' if x == 'Mid Bedfordshire' else
                                                                    'Cornwall' if x == 'North Cornwall' else
                                                                    'Shropshire' if x == 'North Shropshire' else
                                                                    'Wiltshire' if x == 'North Wiltshire' else
                                                                    'Shropshire' if x == 'Oswestry' else
                                                                    'Cornwall' if x == 'Penwith' else
                                                                    'Cornwall' if x == 'Restormel' else
                                                                    'Wiltshire' if x == 'Salisbury' else
                                                                    'County Durham' if x == 'Sedgefield' else
                                                                    'Shropshire' if x == 'Shrewsbury and Atcham' else
                                                                    'Central Bedfordshire' if x == 'South Bedfordshire' else
                                                                    'Central Bedfordshire' if x == 'Bedfordshire' else
                                                                    'Shropshire' if x == 'South Shropshire' else
                                                                    'County Durham' if x == 'Teesdale' else
                                                                    'Northumberland' if x == 'Tynedale' else
                                                                    'Northumberland' if x == 'Wansbeck' else
                                                                    'County Durham' if x == 'Wear Valley' else
                                                                    'Cheshire West and Chester' if x == 'Vale Royal' else
                                                                    'Wiltshire' if x == 'Kennet' else
                                                                    'Wiltshire' if x == 'West Wiltshire' else  x)


In [25]:
merged_2001.head()

Unnamed: 0,OA01CDO,OA01CD,LSOA01CD,MSOA01CD,LSOA01NM,MSOA01NM,ethnicity,sub_ethnicity,Observation,LAD01NM
0,00AAFA0001,E00000001,E01000001,E02000001,City of London 001A,City of London 001,white,British,170,City of London
1,00AAFA0001,E00000001,E01000001,E02000001,City of London 001A,City of London 001,white,Irish,4,City of London
2,00AAFA0001,E00000001,E01000001,E02000001,City of London 001A,City of London 001,white,Other,28,City of London
3,00AAFA0001,E00000001,E01000001,E02000001,City of London 001A,City of London 001,other,White and Black Caribbean,0,City of London
4,00AAFA0001,E00000001,E01000001,E02000001,City of London 001A,City of London 001,other,White and Black African,0,City of London


In [26]:
lookup_2011.head()

Unnamed: 0,OA11CD,LSOA11CD,LSOA11NM,MSOA11CD,MSOA11NM,LAD11CD,LAD11NM,LAD11NMW,ObjectId
0,E00000001,E01000001,City of London 001A,E02000001,City of London 001,E09000001,City of London,,1
1,E00000003,E01000001,City of London 001A,E02000001,City of London 001,E09000001,City of London,,2
2,E00000005,E01000001,City of London 001A,E02000001,City of London 001,E09000001,City of London,,3
3,E00000007,E01000001,City of London 001A,E02000001,City of London 001,E09000001,City of London,,4
4,E00000010,E01000003,City of London 001C,E02000001,City of London 001,E09000001,City of London,,5


In [27]:
merged_2001['sub_ethnicity'].unique()

array(['British', 'Irish', 'Other', 'White and Black Caribbean',
       'White and Black African', 'White and Asian', 'Indian',
       'Pakistani', 'Bangladeshi', 'Black Caribbean', 'Black African',
       'Chinese'], dtype=object)

In [28]:
len(merged_2001['OA01CD'].unique())

175434

In [29]:
len(merged_2001['LSOA01CD'].unique())

34378

In [30]:
len(merged_2001['MSOA01CD'].unique())

7194

In [31]:
len(merged_2001['LAD01NM'].unique())

348

# Ethnicity Dataset

In [32]:
ethnicity_2001 = merged_2001.groupby(['OA01CDO','OA01CD', 'LSOA01CD', 'MSOA01CD', 'LSOA01NM',
                                      'MSOA01NM', 'LAD01NM','ethnicity'])['Observation'].sum().reset_index()
ethnicity_2001.head(3)

Unnamed: 0,OA01CDO,OA01CD,LSOA01CD,MSOA01CD,LSOA01NM,MSOA01NM,LAD01NM,ethnicity,Observation
0,00AAFA0001,E00000001,E01000001,E02000001,City of London 001A,City of London 001,City of London,asian,10
1,00AAFA0001,E00000001,E01000001,E02000001,City of London 001A,City of London 001,City of London,black,0
2,00AAFA0001,E00000001,E01000001,E02000001,City of London 001A,City of London 001,City of London,other,3


In [33]:
ethnicity_2001.ethnicity.unique()

array(['asian', 'black', 'other', 'white'], dtype=object)

In [34]:
ethnicity_2001 = ethnicity_2001.pivot(index = ['OA01CDO','OA01CD', 'LSOA01CD', 'MSOA01CD', 'LSOA01NM',
                                      'MSOA01NM','LAD01NM'], columns = 'ethnicity', values = 'Observation').reset_index().rename_axis(None, axis=1)
ethnicity_2001.head(3)

Unnamed: 0,OA01CDO,OA01CD,LSOA01CD,MSOA01CD,LSOA01NM,MSOA01NM,LAD01NM,asian,black,other,white
0,00AAFA0001,E00000001,E01000001,E02000001,City of London 001A,City of London 001,City of London,10,0,3,202
1,00AAFA0002,E00000002,E01000001,E02000001,City of London 001A,City of London 001,City of London,6,0,3,92
2,00AAFA0003,E00000003,E01000001,E02000001,City of London 001A,City of London 001,City of London,14,0,4,189


In [35]:
ethnicity_2001 = pd.merge(ethnicity_2001, lookup_2011[['LAD11CD', 'LAD11NM']].drop_duplicates(), 
                          left_on= 'LAD01NM', right_on= 'LAD11NM', how= 'left')

ethnicity_2001['total_pop']= ethnicity_2001['white']+ethnicity_2001['asian']+ethnicity_2001['black']+ethnicity_2001['other']
ethnicity_2001.rename(columns={'LAD11CD':'LAD01CD'}, inplace= True)

column_order = ['OA01CDO', 'OA01CD', 'LSOA01CD', 'MSOA01CD', 'LAD01CD', 
                'LSOA01NM', 'MSOA01NM', 'LAD01NM', 'asian', 'black', 'other', 'white', 'total_pop'] 

ethnicity_2001 = ethnicity_2001[column_order]

ethnicity_2001.head(3)

Unnamed: 0,OA01CDO,OA01CD,LSOA01CD,MSOA01CD,LAD01CD,LSOA01NM,MSOA01NM,LAD01NM,asian,black,other,white,total_pop
0,00AAFA0001,E00000001,E01000001,E02000001,E09000001,City of London 001A,City of London 001,City of London,10,0,3,202,215
1,00AAFA0002,E00000002,E01000001,E02000001,E09000001,City of London 001A,City of London 001,City of London,6,0,3,92,101
2,00AAFA0003,E00000003,E01000001,E02000001,E09000001,City of London 001A,City of London 001,City of London,14,0,4,189,207


In [36]:
ethnicity_2001[['asian', 'black', 'other','white']].sum(axis= 0).sum()

52041655

In [37]:
ethnicity_2001.drop(columns=['OA01CDO'], inplace= True)

In [38]:
ethnicity_2001['LAD01CD'].fillna('E06000010', inplace= True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  ethnicity_2001['LAD01CD'].fillna('E06000010', inplace= True)


In [39]:
ethnicity_2001[ethnicity_2001['LAD01NM'].str.contains('Hull')]

Unnamed: 0,OA01CD,LSOA01CD,MSOA01CD,LAD01CD,LSOA01NM,MSOA01NM,LAD01NM,asian,black,other,white,total_pop
64332,E00064333,E01012763,E02002669,E06000010,Kingston upon Hull 018C,Kingston upon Hull 018,"Kingston upon Hull, City of",16,7,6,284,313
64333,E00064334,E01012761,E02002666,E06000010,Kingston upon Hull 015A,Kingston upon Hull 015,"Kingston upon Hull, City of",48,3,14,263,328
64334,E00064335,E01012760,E02002676,E06000010,Kingston upon Hull 025D,Kingston upon Hull 025,"Kingston upon Hull, City of",16,0,3,274,293
64335,E00064336,E01012756,E02002676,E06000010,Kingston upon Hull 025A,Kingston upon Hull 025,"Kingston upon Hull, City of",0,4,6,271,281
64336,E00064337,E01012763,E02002669,E06000010,Kingston upon Hull 018C,Kingston upon Hull 018,"Kingston upon Hull, City of",29,4,17,244,294
...,...,...,...,...,...,...,...,...,...,...,...,...
65161,E00065162,E01012914,E02002663,E06000010,Kingston upon Hull 012C,Kingston upon Hull 012,"Kingston upon Hull, City of",24,9,3,300,336
65162,E00065163,E01012914,E02002663,E06000010,Kingston upon Hull 012C,Kingston upon Hull 012,"Kingston upon Hull, City of",81,20,10,457,568
65163,E00065164,E01012917,E02002659,E06000010,Kingston upon Hull 008F,Kingston upon Hull 008,"Kingston upon Hull, City of",3,0,6,276,285
65164,E00065165,E01012915,E02002659,E06000010,Kingston upon Hull 008E,Kingston upon Hull 008,"Kingston upon Hull, City of",3,0,6,254,263


In [40]:
ethnicity_2001.to_csv('preprocessed files/2001/ethnicity_2001.csv', index= False)

# Sub-Ethnicity Dataset

In [None]:
sub_ethnicity_2001 = merged_2001.groupby(['OA01CDO','OA01CD', 'LSOA01CD', 'MSOA01CD', 'LSOA01NM',
                                      'MSOA01NM','LAD01NM','sub_ethnicity'])['Observation'].sum().reset_index()
sub_ethnicity_2001.head(3)

In [None]:
sub_ethnicity_2001 = sub_ethnicity_2001.pivot(index = ['OA01CDO','OA01CD', 'LSOA01CD', 'MSOA01CD', 'LSOA01NM', 'MSOA01NM','LAD01NM'],
                                              columns = 'sub_ethnicity', values = 'Observation').reset_index().rename_axis(None, axis=1)
sub_ethnicity_2001.head(3)

In [None]:
sub_ethnicity_2001 = pd.merge(sub_ethnicity_2001, lookup_2011[['LAD11CD', 'LAD11NM']].drop_duplicates(), 
                          left_on= 'LAD01NM', right_on= 'LAD11NM', how= 'left')

sub_ethnicity_2001.rename(columns={'LAD11CD':'LAD01CD'}, inplace= True)
sub_ethnicity_2001.drop(columns= ['LAD11NM'], axis= 1, inplace= True)
column_order = ['OA01CDO', 'OA01CD', 'LSOA01CD', 'MSOA01CD', 'LAD01CD', 
                'LSOA01NM', 'MSOA01NM', 'LAD01NM'] 

sub_ethnicity_2001 = sub_ethnicity_2001[column_order + [col for col in sub_ethnicity_2001.columns if col not in column_order]]

sub_ethnicity_2001.head(3)

In [None]:
sub_ethnicity_2001[['Bangladeshi', 'Black African',
       'Black Caribbean', 'British', 'Chinese', 'Indian', 'Irish', 'Other',
       'Pakistani', 'White and Asian', 'White and Black African',
       'White and Black Caribbean']].sum(axis= 0).sum()

In [None]:
sub_ethnicity_2001.to_csv('preprocessed files/2001/sub_ethnicity_2001.csv', index= False)

# Shape Files

In [None]:
shape_2001 = gpd.read_file('Census2001\\shape_2001\\OA_2001_EW_BGC.shp')
# shape_2001.info()

In [None]:
shape_2001 = shape_2001.rename(columns={'geometry': 'OA_geometry'})
shape_2001.head(3)

In [None]:
shape_2001 = shape_2001.merge(ethnicity_2001[['OA01CD', 'LSOA01CD', 'MSOA01CD', 'LAD01CD']], on = 'OA01CD', how= 'left')
shape_column_order = [ 'OA01CD', 'LSOA01CD', 'MSOA01CD', 'LAD01CD', 'GlobalID', 'OA_geometry']
shape_2001 = shape_2001[shape_column_order]
shape_2001 = gpd.GeoDataFrame(shape_2001, geometry='OA_geometry')
shape_2001.head(3)

In [None]:
# Saving OA_2001 into a shape file
shape_2001.to_file('preprocessed files/2001/OA_2001.shp')

In [None]:
OA_2001 = gpd.read_file('preprocessed files/2001/OA_2001.shp').set_geometry('geometry')
OA_2001.plot(alpha=0.5, edgecolor='k')

In [None]:
# creating new culumns in shape file for LSOA geometry
lsoa_geom_2001 = shape_2001.groupby('LSOA01CD')['OA_geometry'].agg(lambda x: x.unary_union)
shape_2001['LSOA_geometry'] = shape_2001['LSOA01CD'].map(lsoa_geom_2001)

In [None]:
# Saving LSOA_2001 into a shape file
LSOA_2001 = shape_2001.groupby('LSOA01CD').agg({'LSOA_geometry': 'first'}).reset_index()
LSOA_2001 = LSOA_2001.merge(ethnicity_2001.groupby('LSOA01CD')[['asian', 'black', 'other', 'white']].sum().reset_index(), on='LSOA01CD' ,how= 'left')

LSOA_2001['total'] = LSOA_2001['white'] + LSOA_2001['asian']+ LSOA_2001['black']+ LSOA_2001['other']

for col in LSOA_2001[['asian', 'black', 'other', 'white']]:
    new_name = col + '_fraction'
    LSOA_2001[new_name] = round(LSOA_2001[col]/LSOA_2001['total'], 3)

LSOA_2001['year'] = 2001
LSOA_2001.rename(columns={'LSOA01CD':'LSOACD'}, inplace = True)
column_order = ['year', 'LSOACD', 'white', 'asian', 'black', 'other',
                'white_fraction', 'asian_fraction', 'black_fraction', 'other_fraction', 'total', 'LSOA_geometry',]
LSOA_2001 = LSOA_2001[column_order]

LSOA_2001 = gpd.GeoDataFrame(LSOA_2001, geometry='LSOA_geometry')
LSOA_2001.to_file('preprocessed files/2001/LSOA_2001.shp', driver='ESRI Shapefile')

In [None]:
LSOA_2001 = gpd.read_file('preprocessed files/2001/LSOA_2001.shp').set_geometry('geometry')
LSOA_2001.plot(alpha=0.5, edgecolor='k')

In [None]:
# creating new culumns in shape file for MSOA geometry
msoa_geom_2001 = shape_2001.groupby('MSOA01CD')['LSOA_geometry'].agg(lambda x: x.unary_union)
shape_2001['MSOA_geometry'] = shape_2001['MSOA01CD'].map(msoa_geom_2001)

In [None]:
# Saving MSOA_2001 into a shape file
MSOA_2001 = shape_2001.groupby('MSOA01CD').agg({'MSOA_geometry': 'first'}).reset_index()
MSOA_2001 = MSOA_2001.merge(ethnicity_2001.groupby('MSOA01CD')[['asian', 'black', 'other', 'white']].sum().reset_index(), on='MSOA01CD' ,how= 'left')

MSOA_2001['total'] = MSOA_2001['white'] + MSOA_2001['asian']+ MSOA_2001['black']+ MSOA_2001['other']

for col in MSOA_2001[['asian', 'black', 'other', 'white']]:
    new_name = col + '_fraction'
    MSOA_2001[new_name] = round(MSOA_2001[col]/MSOA_2001['total'], 3)

MSOA_2001['year'] = 2001
MSOA_2001.rename(columns={'MSOA01CD':'MSOACD'}, inplace = True)
column_order = ['year', 'MSOACD', 'white', 'asian', 'black', 'other',
                'white_fraction', 'asian_fraction', 'black_fraction', 'other_fraction', 'total', 'MSOA_geometry',]
MSOA_2001 = MSOA_2001[column_order]

MSOA_2001 = gpd.GeoDataFrame(MSOA_2001, geometry='MSOA_geometry')
MSOA_2001.to_file('preprocessed files/2001/MSOA_2001.shp', driver='ESRI Shapefile')

In [None]:
MSOA_2001 = gpd.read_file('preprocessed files/2001/MSOA_2001.shp').set_geometry('geometry')
MSOA_2001.plot(alpha=0.5, edgecolor='k')

In [None]:
# creating new culumns in shape file for LAD geometry
lad_geom_2001 = shape_2001.groupby('LAD01CD')['MSOA_geometry'].agg(lambda x: x.unary_union)
shape_2001['LAD_geometry'] = shape_2001['LAD01CD'].map(lad_geom_2001)

In [None]:
# In order t have better and clearer borders of LAD, I took border info from another files and mixed it with LAD borders of 2021
# For the LADs that geometry was unavailable in new_borders, I took LAD geopmetry from year 2011
new_borders = gpd.read_file('May_2020_Boundaries/LAD_May_2020_Boundaries_UK_BFE_2022_4839426458879395509.geojson')
LAD_2011 = gpd.read_file('preprocessed files/2011/LAD_2011.shp')

In [None]:
# Saving LAD_2001 into a shape file
LAD_2001 = shape_2001.groupby('LAD01CD').agg({'LAD_geometry': 'first'}).reset_index()
LAD_2001 = LAD_2001.merge(ethnicity_2001.groupby(['LAD01CD', 'LAD01NM'])[['asian', 'black', 'other', 'white']].sum().reset_index(), on='LAD01CD' ,how= 'left')
LAD_2001['LAD_pop'] = LAD_2001['white'] + LAD_2001['asian']+ LAD_2001['black']+ LAD_2001['other']

for col in LAD_2001[['asian', 'black', 'other', 'white']]:
    new_name = col + '_fraction'
    LAD_2001[new_name] = round(LAD_2001[col]/LAD_2001['LAD_pop'], 3)

LAD_2001['year'] = 2001
LAD_2001.rename(columns={'LAD01CD':'LADCD', 'LAD01NM':'LADNM'}, inplace = True)
column_order = ['year', 'LADCD', 'LADNM', 'white', 'asian', 'black', 'other',
                'white_fraction', 'asian_fraction', 'black_fraction', 'other_fraction', 'LAD_pop', 'LAD_geometry']
LAD_2001 = LAD_2001[column_order]

LAD_2001 =  pd.merge(LAD_2001, new_borders[['lad20nm', 'geometry']], left_on= 'LADNM',right_on='lad20nm', how= 'left')
LAD_2001 =  LAD_2001.merge(LAD_2011[['LADNM','LADCD', 'geometry']], on= 'LADNM', how= 'left')

LAD_2001['geometry']= LAD_2001['geometry_x'].fillna(LAD_2001['geometry_y'])
LAD_2001.drop(columns=['LAD_geometry','lad20nm','geometry_x', 'LADCD_y', 'geometry_y'], axis= 1,inplace= True)
LAD_2001.rename(columns={'LADCD_x':'LADCD','geometry':'LAD_geometry'}, inplace= True)

LAD_2001 = gpd.GeoDataFrame(LAD_2001, geometry='LAD_geometry')
LAD_2001.to_file('preprocessed files/2001/LAD_2001.shp', driver='ESRI Shapefile')

In [None]:
LAD_2001 = gpd.read_file('preprocessed files/2001/LAD_2001.shp').set_geometry('geometry')
LAD_2001.set_geometry('geometry')
LAD_2001.plot(alpha=0.5, edgecolor='k')

# Indexes

## Simpson

In [None]:
# Simpson Index at coutry level in 2001
OA_simp_2001 = simpson(ethnicity_2001[['white','asian','black','other']])
LSOA_simp_2001 = simpson(ethnicity_2001.groupby(['LSOA01CD'])[['white','asian','black','other']].sum().reset_index()[['white','asian','black','other']])
MSOA_simp_2001 = simpson(ethnicity_2001.groupby(['MSOA01CD'])[['white','asian','black','other']].sum().reset_index()[['white','asian','black','other']])
LAD_simp_2001 = simpson(ethnicity_2001.groupby(['LAD01CD'])[['white','asian','black','other']].sum().reset_index()[['white','asian','black','other']])

country_dic_sim_2001 = {'OA':OA_simp_2001[0], 'LSOA':LSOA_simp_2001[0],
                        'MSOA':MSOA_simp_2001[0], 'LAD':LAD_simp_2001[0], 'country':LAD_simp_2001[1]}

flat_dict = [{'year': 2001, 'total_population': ethnicity_2001['total_pop'].sum(),
              'white_frac': round(ethnicity_2001['white'].sum()/ethnicity_2001['total_pop'].sum(),3),
              'asian_frac': round(ethnicity_2001['asian'].sum()/ethnicity_2001['total_pop'].sum(),3),
              'black_frac': round(ethnicity_2001['black'].sum()/ethnicity_2001['total_pop'].sum(),3),
              'other_frac': round(ethnicity_2001['other'].sum()/ethnicity_2001['total_pop'].sum(),3),
              'OA':OA_simp_2001[0], 'LSOA':LSOA_simp_2001[0],'MSOA':MSOA_simp_2001[0], 'LAD':LAD_simp_2001[0], 'country':LAD_simp_2001[1]}]

country_simpson_2001 = pd.DataFrame(flat_dict)
country_simpson_2001.to_csv('preprocessed files/2001/country_simpson_2001.csv', index= False)
country_simpson_2001

In [None]:
#| include: false
# Creating LAD level simpson index dataset for year 2001.
# Calculating OA, LSOA and MSOA level simpson index inside each LAD
LAD_list_2001= ethnicity_2001['LAD01CD'].unique()
LAD_dic = {}
for LAD in LAD_list_2001:
    df = ethnicity_2001[ethnicity_2001['LAD01CD'] == LAD].reset_index(drop= True)
    OA_LADsimp_2001 = simpson(df[['white','asian','black','other']])
    LSOA_LADsimp_2001 = simpson(df.groupby(['LSOA01CD'])[['white','asian','black','other']].sum().reset_index()[['white','asian','black','other']])
    MSOA_LADsimp_2001 = simpson(df.groupby(['MSOA01CD'])[['white','asian','black','other']].sum().reset_index()[['white','asian','black','other']])
    LAD_dic[LAD] = {'OA':OA_LADsimp_2001, 'LSOA':LSOA_LADsimp_2001,'MSOA':MSOA_LADsimp_2001}

flat_dict = [{'year': 2001, 'LADCD': key, 
              'OA': value['OA'][0], 'LSOA': value['LSOA'][0],
              'MSOA': value['MSOA'][0], 'LAD': value['MSOA'][1]} for key, value in LAD_dic.items()]

LAD_simpson_2001 = pd.DataFrame(flat_dict)

LAD_simpson_2001 = LAD_simpson_2001.merge(
                         LAD_2001.groupby(['LADCD','LADNM'])[['LAD_pop','white_frac','asian_frac','black_frac','other_frac']].sum().reset_index(),
                         on= 'LADCD', how='left')

LAD_simpson_2001.rename(columns = {'LAD_pop': 'LAD_population'} ,inplace= True)

column_order = ['year', 'LADNM', 'LADCD', 'LAD_population',
                'white_frac', 'asian_frac', 'black_frac', 'other_frac', 
                'OA', 'LSOA', 'MSOA', 'LAD']

LAD_simpson_2001 = LAD_simpson_2001[column_order]
LAD_simpson_2001.to_csv('preprocessed files/2001/LAD_simpson_2001.csv', index= False)
LAD_simpson_2001.head()

## Borders

In [None]:
LAD_borders_2001 = {}

for idx, row in shape_2001.groupby('LAD01CD').agg({'LAD_geometry': 'first'}).iterrows():
    borders = []
    for idx2, row2 in shape_2001.groupby('LAD01CD').agg({'LAD_geometry': 'first'}).iterrows():
        if idx != idx2:
            if row.LAD_geometry.intersects(row2.LAD_geometry):
                borders.append(idx2)
    LAD_borders_2001[idx] = borders

In [None]:
with open('preprocessed files/2001/LAD_borders_2001.pkl', 'wb') as f:
    pickle.dump(LAD_borders_2001, f)

In [None]:
MSOA_borders_2001 = {}
for lad_code in shape_2001['LAD01CD'].unique():
    df_lad = shape_2001[shape_2001['LAD01CD'] == lad_code]
    MSOA_borders_2001[lad_code] = {}
    df_msoa =df_lad.groupby('MSOA01CD').agg({'MSOA_geometry': 'first'})
    for idx, row in df_msoa.iterrows():
        borders = []
        for idx2, row2 in df_msoa.iterrows():
            if idx != idx2:
                if row['MSOA_geometry'].intersects(row2['MSOA_geometry']):
                    borders.append(idx2)
        MSOA_borders_2001[lad_code][idx] = borders


In [None]:
with open('preprocessed files/2001/MSOA_borders_2001.pkl', 'wb') as f:
    pickle.dump(MSOA_borders_2001, f)

In [None]:
MSOA_borders_2001

In [None]:
LSOA_borders_2001 = {}
for lad_code in shape_2001['LAD01CD'].unique():
    df_lad = shape_2001[shape_2001['LAD01CD'] == lad_code]
    LSOA_borders_2001[lad_code] = {}
    df_lsoa =df_lad.groupby('LSOA01CD').agg({'LSOA_geometry': 'first'})
    for idx, row in df_lsoa.iterrows():
        borders = []
        for idx2, row2 in df_lsoa.iterrows():
            if idx != idx2:
                if row['LSOA_geometry'].intersects(row2['LSOA_geometry']):
                    borders.append(idx2)
        LSOA_borders_2001[lad_code][idx] = borders


In [None]:
with open('preprocessed files/2001/LSOA_borders_2001.pkl', 'wb') as f:
    pickle.dump(LSOA_borders_2001, f)

In [None]:
OA_borders_2001 = {}
for lad_code in shape_2001['LAD01CD'].unique():
    df_lad = shape_2001[shape_2001['LAD01CD'] == lad_code]
    OA_borders_2001[lad_code] = {}
    df_oa = df_lad.set_index('OA01CD')
    for idx, row in df_oa.iterrows():
        borders = []
        for idx2, row2 in df_oa.iterrows():
            if idx != idx2:
                if row['OA_geometry'].intersects(row2['OA_geometry']):
                    borders.append(idx2)
        OA_borders_2001[lad_code][idx] = borders


In [None]:
with open('preprocessed files/2001/OA_borders_2001.pkl', 'wb') as f:
    pickle.dump(OA_borders_2001, f)

## Dissimilarity

In [None]:
# Creating country level dissimilarity index dataset for year 2001.
# Calculating OA, LSOA, MSOA and LAD level dissimilarity index inside england

OA_diss_2001 = dissimilarity(ethnicity_2001[['white','asian','black','other']])
LSOA_diss_2001 = dissimilarity(ethnicity_2001.groupby(['LSOA01CD'])[['white','asian','black','other']].sum().reset_index()[['white','asian','black','other']])
MSOA_diss_2001 = dissimilarity(ethnicity_2001.groupby(['MSOA01CD'])[['white','asian','black','other']].sum().reset_index()[['white','asian','black','other']])
LAD_diss_2001 = dissimilarity(ethnicity_2001.groupby(['LAD01CD'])[['white','asian','black','other']].sum().reset_index()[['white','asian','black','other']])
country_dic_diss_2001 = {'OA_level':OA_diss_2001, 'LSOA_level':LSOA_diss_2001,
                        'MSOA_level':MSOA_diss_2001, 'LAD_level':LAD_diss_2001}

flat_dict = [{'year': 2001, 'total_population': ethnicity_2001['total_pop'].sum(),
              'white_frac': round(ethnicity_2001['white'].sum()/ethnicity_2001['total_pop'].sum(),3),
              'asian_frac': round(ethnicity_2001['asian'].sum()/ethnicity_2001['total_pop'].sum(),3),
              'black_frac': round(ethnicity_2001['black'].sum()/ethnicity_2001['total_pop'].sum(),3),
              'other_frac': round(ethnicity_2001['other'].sum()/ethnicity_2001['total_pop'].sum(),3),
              'OA_white': country_dic_diss_2001['OA_level']['white'], 'LSOA_white': country_dic_diss_2001['LSOA_level']['white'],
              'MSOA_white': country_dic_diss_2001['MSOA_level']['white'], 'LAD_white': country_dic_diss_2001['LAD_level']['white'],
              'OA_asian': country_dic_diss_2001['OA_level']['asian'], 'LSOA_asian': country_dic_diss_2001['LSOA_level']['asian'],
              'MSOA_asian': country_dic_diss_2001['MSOA_level']['asian'], 'LAD_asian': country_dic_diss_2001['LAD_level']['asian'],
              'OA_black': country_dic_diss_2001['OA_level']['black'], 'LSOA_black': country_dic_diss_2001['LSOA_level']['black'],
              'MSOA_black': country_dic_diss_2001['MSOA_level']['black'], 'LAD_black': country_dic_diss_2001['LAD_level']['black'],
              'OA_other': country_dic_diss_2001['OA_level']['other'], 'LSOA_other': country_dic_diss_2001['LSOA_level']['other'],
              'MSOA_other': country_dic_diss_2001['MSOA_level']['other'], 'LAD_other': country_dic_diss_2001['LAD_level']['other']}]


country_dissimilarity_2001 = pd.DataFrame(flat_dict)

In [None]:
# Creating london level dissimilarity index dataset for year 2001.
# Calculating OA, LSOA, MSOA and LAD level dissimilarity index inside london
df = ethnicity_2001[ethnicity_2001['']]

OA_diss_london_2001 = dissimilarity(ethnicity_2001[['white','asian','black','other']])
LSOA_diss_london_2001 = dissimilarity(ethnicity_2001.groupby(['LSOA01CD'])[['white','asian','black','other']].sum().reset_index()[['white','asian','black','other']])
MSOA_diss_london_2001 = dissimilarity(ethnicity_2001.groupby(['MSOA01CD'])[['white','asian','black','other']].sum().reset_index()[['white','asian','black','other']])
LAD_diss_london_2001 = dissimilarity(ethnicity_2001.groupby(['LAD01CD'])[['white','asian','black','other']].sum().reset_index()[['white','asian','black','other']])
country_dic_diss_2001 = {'OA_level':OA_diss_2001, 'LSOA_level':LSOA_diss_2001,
                        'MSOA_level':MSOA_diss_2001, 'LAD_level':LAD_diss_2001}

flat_dict = [{'year': 2001, 'total_population': ethnicity_2001['total_pop'].sum(),
              'white_frac': round(ethnicity_2001['white'].sum()/ethnicity_2001['total_pop'].sum(),3),
              'asian_frac': round(ethnicity_2001['asian'].sum()/ethnicity_2001['total_pop'].sum(),3),
              'black_frac': round(ethnicity_2001['black'].sum()/ethnicity_2001['total_pop'].sum(),3),
              'other_frac': round(ethnicity_2001['other'].sum()/ethnicity_2001['total_pop'].sum(),3),
              'OA_white': country_dic_diss_2001['OA_level']['white'], 'LSOA_white': country_dic_diss_2001['LSOA_level']['white'],
              'MSOA_white': country_dic_diss_2001['MSOA_level']['white'], 'LAD_white': country_dic_diss_2001['LAD_level']['white'],
              'OA_asian': country_dic_diss_2001['OA_level']['asian'], 'LSOA_asian': country_dic_diss_2001['LSOA_level']['asian'],
              'MSOA_asian': country_dic_diss_2001['MSOA_level']['asian'], 'LAD_asian': country_dic_diss_2001['LAD_level']['asian'],
              'OA_black': country_dic_diss_2001['OA_level']['black'], 'LSOA_black': country_dic_diss_2001['LSOA_level']['black'],
              'MSOA_black': country_dic_diss_2001['MSOA_level']['black'], 'LAD_black': country_dic_diss_2001['LAD_level']['black'],
              'OA_other': country_dic_diss_2001['OA_level']['other'], 'LSOA_other': country_dic_diss_2001['LSOA_level']['other'],
              'MSOA_other': country_dic_diss_2001['MSOA_level']['other'], 'LAD_other': country_dic_diss_2001['LAD_level']['other']}]


country_dissimilarity_2001 = pd.DataFrame(flat_dict)

In [None]:
country_dissimilarity_2001.to_csv('preprocessed files/2001/country_dissimilarity_2001.csv', index= False)

In [None]:
# Creating LAD level dissimilarity index dataset for year 2001.
# Calculating OA, LSOA and MSOA level dissimilarity index inside each LAD
LAD_list_2001= ethnicity_2001['LAD01CD'].unique()
LAD_dic_diss_2001 = {}
for LAD in LAD_list_2001:
    df = ethnicity_2001[ethnicity_2001['LAD01CD'] == LAD].reset_index(drop= True)
    OA_diss_2001 = dissimilarity(df[['white','asian','black','other']])
    LSOA_diss_2001 = dissimilarity(df.groupby(['LSOA01CD'])[['white','asian','black','other']].sum().reset_index()[['white','asian','black','other']])
    MSOA_diss_2001 = dissimilarity(df.groupby(['MSOA01CD'])[['white','asian','black','other']].sum().reset_index()[['white','asian','black','other']])
    LAD_dic_diss_2001[LAD] = {'OA_level':OA_diss_2001, 'LSOA_level':LSOA_diss_2001, 'MSOA_level':MSOA_diss_2001,}

flat_dict = [{'year': 2001, 'LADCD': key,
              'OA_white': value['OA_level']['white'], 'LSOA_white': value['LSOA_level']['white'], 'MSOA_white': value['MSOA_level']['white'],
              'OA_asian': value['OA_level']['asian'], 'LSOA_asian': value['LSOA_level']['asian'], 'MSOA_asian': value['MSOA_level']['asian'],
              'OA_black': value['OA_level']['black'], 'LSOA_black': value['LSOA_level']['black'], 'MSOA_black': value['MSOA_level']['black'],
              'OA_other': value['OA_level']['other'], 'LSOA_other': value['LSOA_level']['other'], 'MSOA_other': value['MSOA_level']['other'],
              } for key, value in LAD_dic_diss_2001.items()]

LAD_dissimilarity_2001 = pd.DataFrame(flat_dict)

LAD_dissimilarity_2001 = LAD_dissimilarity_2001.merge(
                         LAD_2001.groupby(['LADCD','LADNM'])[['LAD_pop','white_frac','asian_frac','black_frac','other_frac']].sum().reset_index(),
                         on= 'LADCD', how='left')
LAD_dissimilarity_2001.rename(columns = {'LAD_pop': 'LAD_population'} ,inplace= True)

# LAD_dissimilarity_2001.drop(['LAD'], axis= 1, inplace= True)
column_order = ['year', 'LADNM', 'LADCD', 'LAD_population',
                'white_frac', 'asian_frac', 'black_frac', 'other_frac',
                'OA_white', 'LSOA_white', 'MSOA_white',
                'OA_asian', 'LSOA_asian', 'MSOA_asian',
                'OA_black', 'LSOA_black', 'MSOA_black',
                'OA_other', 'LSOA_other', 'MSOA_other']

LAD_dissimilarity_2001 = LAD_dissimilarity_2001[column_order]
LAD_dissimilarity_2001.head()

In [None]:
LAD_dissimilarity_2001.to_csv('preprocessed files/2001/LAD_dissimilarity_2001.csv', index= False)

In [None]:
LAD_2001 = gpd.read_file('preprocessed files/2001/LAD_2001.shp')

## Moran

In [None]:
with open('preprocessed files/2001/OA_borders_2001.pkl', 'rb') as f:
    OA_borders_2001 = pickle.load(f)
with open('preprocessed files/2001/LSOA_borders_2001.pkl', 'rb') as f:
    LSOA_borders_2001 = pickle.load(f)
with open('preprocessed files/2001/MSOA_borders_2001.pkl', 'rb') as f:
    MSOA_borders_2001 = pickle.load(f)
with open('preprocessed files/2001/LAD_borders_2001.pkl', 'rb') as f:
    LAD_borders_2001 = pickle.load(f)

In [None]:
# Creating LAD level dissimilarity index dataset for year 2001.
# Calculating OA, LSOA and MSOA level dissimilarity index inside each LAD
LAD_list_2001= ethnicity_2001['LAD01CD'].unique()
LAD_dic_mor_2001 = {}
for LAD in LAD_list_2001:
    df = ethnicity_2001[ethnicity_2001['LAD01CD'] == LAD].reset_index(drop= True)
    OA_mor_2001 = moran(df[['OA01CD','white','asian','black','other']].set_index('OA01CD'),OA_borders_2001[LAD])
    LSOA_mor_2001 = moran(df.groupby(['LSOA01CD'])[['white','asian','black','other']].sum()[['white','asian','black','other']],LSOA_borders_2001[LAD])
    MSOA_mor_2001 = moran(df.groupby(['MSOA01CD'])[['white','asian','black','other']].sum()[['white','asian','black','other']],MSOA_borders_2001[LAD])
    LAD_dic_mor_2001[LAD] = {'OA_level':OA_mor_2001, 'LSOA_level':LSOA_mor_2001, 'MSOA_level':MSOA_mor_2001}

flat_dict = [{'year': 2001, 'LADCD': key,
              'OA_white': value['OA_level']['white'], 'LSOA_white': value['LSOA_level']['white'], 'MSOA_white': value['MSOA_level']['white'],
              'OA_asian': value['OA_level']['asian'], 'LSOA_asian': value['LSOA_level']['asian'], 'MSOA_asian': value['MSOA_level']['asian'],
              'OA_black': value['OA_level']['black'], 'LSOA_black': value['LSOA_level']['black'], 'MSOA_black': value['MSOA_level']['black'],
              'OA_other': value['OA_level']['other'], 'LSOA_other': value['LSOA_level']['other'], 'MSOA_other': value['MSOA_level']['other'],
              } for key, value in LAD_dic_mor_2001.items()]

LAD_moran_2001 = pd.DataFrame(flat_dict)

LAD_moran_2001 = LAD_moran_2001.merge(
                         LAD_2001.groupby(['LADCD','LADNM'])[['LAD_pop','white_frac','asian_frac','black_frac','other_frac']].sum().reset_index(),
                         on= 'LADCD', how='left')
LAD_moran_2001.rename(columns = {'LAD_pop': 'LAD_population'} ,inplace= True)

column_order = ['year', 'LADNM', 'LADCD', 'LAD_population', 
                'white_frac', 'asian_frac', 'black_frac', 'other_frac',
                'OA_white', 'LSOA_white', 'MSOA_white',
                'OA_asian', 'LSOA_asian', 'MSOA_asian',
                'OA_black', 'LSOA_black', 'MSOA_black',
                'OA_other', 'LSOA_other', 'MSOA_other']

LAD_moran_2001 = LAD_moran_2001[column_order]
LAD_moran_2001.head()

In [None]:
LAD_moran_2001.to_csv('preprocessed files/2001/LAD_moran_2001.csv', index= False)

In [None]:
ethnicity_2001.groupby(['LSOA01CD'])['total_pop'].sum().describe()

In [None]:
ethnicity_2001.groupby(['MSOA01CD'])['total_pop'].sum().describe()

In [None]:
ethnicity_2001.groupby(['LAD01CD'])['total_pop'].sum().describe()

In [None]:
ethnicity_2001.groupby(['MSOA01CD'])['LSOA01CD'].count().describe()

# Cities

## OAs

In [41]:
OA_2001 = gpd.read_file('preprocessed files/2001/OA_2001.shp')
OA_2011 = gpd.read_file('preprocessed files/2011/OA_2011.shp')
OA_2021 = gpd.read_file('preprocessed files/2021/OA_2021.shp')

In [42]:
# Creating city datasets
birmingham_2001= ethnicity_2001[ethnicity_2001['LAD01NM'].str.contains('Birmingham')]
birmingham_2001['year']= '2001'
birmingham_2001.rename(columns={'OA01CD':'OACD', 'LSOA01CD':'LSOACD', 'MSOA01CD':'MSOACD', 'LAD01CD':'LADCD',
                        'LSOA01NM':'LSOANM', 'MSOA01NM':'MSOANM', 'LAD01NM':'LADNM'}, inplace=True)

birmingham_2001 = pd.merge(birmingham_2001,OA_2021[['OA21CD','geometry']], left_on='OACD', right_on='OA21CD', how= 'left').drop(['OA21CD'], axis = 1)
birmingham_2001['geometry'] = birmingham_2001['OACD'].map(OA_2011.set_index('OA11CD')['geometry']).fillna(birmingham_2001['geometry'])
birmingham_2001['geometry'] = birmingham_2001['OACD'].map(OA_2001.set_index('OA01CD')['geometry']).fillna(birmingham_2001['geometry'])

cols = list(birmingham_2001.columns)
cols.insert(0, cols.pop(cols.index('year')))
birmingham_2001 = birmingham_2001[cols]
# birmingham_2001 = birmingham_2001.drop(['LSOACD', 'MSOACD', 'LADCD', 'LSOANM', 'MSOANM'], axis= 1)
birmingham_2001 = birmingham_2001.set_geometry('geometry')

leicester_2001= ethnicity_2001[ethnicity_2001['LAD01NM'].str.contains('Leicester') &
                                ~ethnicity_2001['LAD01NM'].str.contains('North West Leicestershire')]
leicester_2001['year']= '2001'
leicester_2001.rename(columns={'OA01CD':'OACD', 'LSOA01CD':'LSOACD', 'MSOA01CD':'MSOACD', 'LAD01CD':'LADCD',
                        'LSOA01NM':'LSOANM', 'MSOA01NM':'MSOANM', 'LAD01NM':'LADNM'}, inplace=True)

leicester_2001 = pd.merge(leicester_2001,OA_2021[['OA21CD','geometry']], left_on='OACD', right_on='OA21CD', how= 'left').drop(['OA21CD'], axis = 1)
leicester_2001['geometry'] = leicester_2001['OACD'].map(OA_2011.set_index('OA11CD')['geometry']).fillna(leicester_2001['geometry'])
leicester_2001['geometry'] = leicester_2001['OACD'].map(OA_2001.set_index('OA01CD')['geometry']).fillna(leicester_2001['geometry'])

cols = list(leicester_2001.columns)
cols.insert(0, cols.pop(cols.index('year')))
leicester_2001 = leicester_2001[cols]
# leicester_2001 = leicester_2001.drop(['LSOACD', 'MSOACD', 'LADCD', 'LSOANM', 'MSOANM'], axis= 1)
leicester_2001 = leicester_2001.set_geometry('geometry')

bradford_2001= ethnicity_2001[ethnicity_2001['LAD01NM'].str.contains('Bradford')]
bradford_2001['year']= '2001'
bradford_2001.rename(columns={'OA01CD':'OACD', 'LSOA01CD':'LSOACD', 'MSOA01CD':'MSOACD', 'LAD01CD':'LADCD',
                        'LSOA01NM':'LSOANM', 'MSOA01NM':'MSOANM', 'LAD01NM':'LADNM'}, inplace=True)

bradford_2001 = pd.merge(bradford_2001,OA_2021[['OA21CD','geometry']], left_on='OACD', right_on='OA21CD', how= 'left').drop(['OA21CD'], axis = 1)
bradford_2001['geometry'] = bradford_2001['OACD'].map(OA_2011.set_index('OA11CD')['geometry']).fillna(bradford_2001['geometry'])
bradford_2001['geometry'] = bradford_2001['OACD'].map(OA_2001.set_index('OA01CD')['geometry']).fillna(bradford_2001['geometry'])

cols = list(bradford_2001.columns)
cols.insert(0, cols.pop(cols.index('year')))
bradford_2001 = bradford_2001[cols]
# bradford_2001 = bradford_2001.drop(['LSOACD', 'MSOACD', 'LADCD', 'LSOANM', 'MSOANM'], axis= 1)
bradford_2001 = bradford_2001.set_geometry('geometry')

blackburn_2001= ethnicity_2001[ethnicity_2001['LAD01NM'].str.contains('Blackburn')]
blackburn_2001['year']= '2001'
blackburn_2001.rename(columns={'OA01CD':'OACD', 'LSOA01CD':'LSOACD', 'MSOA01CD':'MSOACD', 'LAD01CD':'LADCD',
                        'LSOA01NM':'LSOANM', 'MSOA01NM':'MSOANM', 'LAD01NM':'LADNM'}, inplace=True)

blackburn_2001 = pd.merge(blackburn_2001,OA_2021[['OA21CD','geometry']], left_on='OACD', right_on='OA21CD', how= 'left').drop(['OA21CD'], axis = 1)
blackburn_2001['geometry'] = blackburn_2001['OACD'].map(OA_2011.set_index('OA11CD')['geometry']).fillna(blackburn_2001['geometry'])
blackburn_2001['geometry'] = blackburn_2001['OACD'].map(OA_2001.set_index('OA01CD')['geometry']).fillna(blackburn_2001['geometry'])

cols = list(blackburn_2001.columns)
cols.insert(0, cols.pop(cols.index('year')))
blackburn_2001 = blackburn_2001[cols]
# blackburn_2001 = blackburn_2001.drop(['LSOACD', 'MSOACD', 'LADCD', 'LSOANM', 'MSOANM'], axis= 1)
blackburn_2001 = blackburn_2001.set_geometry('geometry')

oldham_2001= ethnicity_2001[ethnicity_2001['LAD01NM'].str.contains('Oldham')]
oldham_2001['year']= '2001'
oldham_2001.rename(columns={'OA01CD':'OACD', 'LSOA01CD':'LSOACD', 'MSOA01CD':'MSOACD', 'LAD01CD':'LADCD',
                        'LSOA01NM':'LSOANM', 'MSOA01NM':'MSOANM', 'LAD01NM':'LADNM'}, inplace=True)

oldham_2001 = pd.merge(oldham_2001,OA_2021[['OA21CD','geometry']], left_on='OACD', right_on='OA21CD', how= 'left').drop(['OA21CD'], axis = 1)
oldham_2001['geometry'] = oldham_2001['OACD'].map(OA_2011.set_index('OA11CD')['geometry']).fillna(oldham_2001['geometry'])
oldham_2001['geometry'] = oldham_2001['OACD'].map(OA_2001.set_index('OA01CD')['geometry']).fillna(oldham_2001['geometry'])

cols = list(oldham_2001.columns)
cols.insert(0, cols.pop(cols.index('year')))
oldham_2001 = oldham_2001[cols]
# oldham_2001 = oldham_2001.drop(['LSOACD', 'MSOACD', 'LADCD', 'LSOANM', 'MSOANM'], axis= 1)
oldham_2001 = oldham_2001.set_geometry('geometry')

pendle_2001= ethnicity_2001[ethnicity_2001['LAD01NM'].str.contains('Pendle')]
pendle_2001['year']= '2001'
pendle_2001.rename(columns={'OA01CD':'OACD', 'LSOA01CD':'LSOACD', 'MSOA01CD':'MSOACD', 'LAD01CD':'LADCD',
                        'LSOA01NM':'LSOANM', 'MSOA01NM':'MSOANM', 'LAD01NM':'LADNM'}, inplace=True)

pendle_2001 = pd.merge(pendle_2001,OA_2021[['OA21CD','geometry']], left_on='OACD', right_on='OA21CD', how= 'left').drop(['OA21CD'], axis = 1)
pendle_2001['geometry'] = pendle_2001['OACD'].map(OA_2011.set_index('OA11CD')['geometry']).fillna(pendle_2001['geometry'])
pendle_2001['geometry'] = pendle_2001['OACD'].map(OA_2001.set_index('OA01CD')['geometry']).fillna(pendle_2001['geometry'])

cols = list(pendle_2001.columns)
cols.insert(0, cols.pop(cols.index('year')))
pendle_2001 = pendle_2001[cols]
# pendle_2001 = pendle_2001.drop(['LSOACD', 'MSOACD', 'LADCD', 'LSOANM', 'MSOANM'], axis= 1)
pendle_2001 = pendle_2001.set_geometry('geometry')

london_2001= ethnicity_2001[ethnicity_2001.LAD01CD.str.extract('([a-zA-Z]+)([^a-zA-Z]+)', expand=True)[1].astype(int).between(9000001,9000034)]
london_2001['year']= '2001'
london_2001.rename(columns={'OA01CD':'OACD', 'LSOA01CD':'LSOACD', 'MSOA01CD':'MSOACD', 'LAD01CD':'LADCD',
                        'LSOA01NM':'LSOANM', 'MSOA01NM':'MSOANM', 'LAD01NM':'LADNM'}, inplace=True)

london_2001 = pd.merge(london_2001,OA_2021[['OA21CD','geometry']], left_on='OACD', right_on='OA21CD', how= 'left').drop(['OA21CD'], axis = 1)
london_2001['geometry'] = london_2001['OACD'].map(OA_2011.set_index('OA11CD')['geometry']).fillna(london_2001['geometry'])
london_2001['geometry'] = london_2001['OACD'].map(OA_2001.set_index('OA01CD')['geometry']).fillna(london_2001['geometry'])

cols = list(london_2001.columns)
cols.insert(0, cols.pop(cols.index('year')))
london_2001 = london_2001[cols]
# london_2001 = london_2001.drop(['LSOACD', 'MSOACD', 'LADCD', 'LSOANM', 'MSOANM'], axis= 1)
london_2001 = london_2001.set_geometry('geometry')


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  birmingham_2001['year']= '2001'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  birmingham_2001.rename(columns={'OA01CD':'OACD', 'LSOA01CD':'LSOACD', 'MSOA01CD':'MSOACD', 'LAD01CD':'LADCD',
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  leicester_2001['year']= '2001'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the

In [43]:
# Additional Columns
ethnicity_list = ['asian', 'black', 'other', 'white']
col_order = ['year', 'OACD', 'LSOACD', 'MSOACD', 'LADCD', 'LSOANM', 'MSOANM', 'LADNM', 'OA_simpson',
             'white', 'asian', 'black', 'other', 'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction', 'other_fraction', 'geometry']

# Creating OA_simpson and fraction columns
for ethnicity in ethnicity_list:
    birmingham_2001[f'{ethnicity}_fraction'] = round(birmingham_2001[ethnicity]/birmingham_2001['total_pop'],3)
OA_simpson = []
for OA in birmingham_2001['OACD']:
    df = birmingham_2001[birmingham_2001['OACD']== OA]
    birmingham_OA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))[0]
    OA_simpson.append(birmingham_OA_simpson)

# Reordering the columns
birmingham_2001['OA_simpson']= OA_simpson
birmingham_2001 = birmingham_2001[col_order]

# Creating OA_simpson and fraction columns
for ethnicity in ethnicity_list:
    leicester_2001[f'{ethnicity}_fraction'] = round(leicester_2001[ethnicity]/leicester_2001['total_pop'],3)
OA_simpson = []
for OA in leicester_2001['OACD']:
    df = leicester_2001[leicester_2001['OACD']== OA]
    leicester_OA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))[0]
    OA_simpson.append(leicester_OA_simpson)

# Reordering the columns
leicester_2001['OA_simpson']= OA_simpson
leicester_2001 = leicester_2001[col_order]

# Creating OA_simpson and fraction columns
for ethnicity in ethnicity_list:
    bradford_2001[f'{ethnicity}_fraction'] = round(bradford_2001[ethnicity]/bradford_2001['total_pop'],3)
OA_simpson = []
for OA in bradford_2001['OACD']:
    df = bradford_2001[bradford_2001['OACD']== OA]
    bradford_OA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))[0]
    OA_simpson.append(bradford_OA_simpson)

# Reordering the columns
bradford_2001['OA_simpson']= OA_simpson
bradford_2001 = bradford_2001[col_order]

# Creating OA_simpson and fraction columns
for ethnicity in ethnicity_list:
    blackburn_2001[f'{ethnicity}_fraction'] = round(blackburn_2001[ethnicity]/blackburn_2001['total_pop'],3)
OA_simpson = []
for OA in blackburn_2001['OACD']:
    df = blackburn_2001[blackburn_2001['OACD']== OA]
    blackburn_OA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))[0]
    OA_simpson.append(blackburn_OA_simpson)

# Reordering the columns
blackburn_2001['OA_simpson']= OA_simpson
blackburn_2001 = blackburn_2001[col_order]

# Creating OA_simpson and fraction columns
for ethnicity in ethnicity_list:
    oldham_2001[f'{ethnicity}_fraction'] = round(oldham_2001[ethnicity]/oldham_2001['total_pop'],3)
OA_simpson = []
for OA in oldham_2001['OACD']:
    df = oldham_2001[oldham_2001['OACD']== OA]
    oldham_OA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))[0]
    OA_simpson.append(oldham_OA_simpson)

# Reordering the columns
oldham_2001['OA_simpson']= OA_simpson
oldham_2001 = oldham_2001[col_order]

# Creating OA_simpson and fraction columns
for ethnicity in ethnicity_list:
    pendle_2001[f'{ethnicity}_fraction'] = round(pendle_2001[ethnicity]/pendle_2001['total_pop'],3)
OA_simpson = []
for OA in pendle_2001['OACD']:
    df = pendle_2001[pendle_2001['OACD']== OA]
    pendle_OA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))[0]
    OA_simpson.append(pendle_OA_simpson)

# Reordering the columns
pendle_2001['OA_simpson']= OA_simpson
pendle_2001 = pendle_2001[col_order]

# Creating OA_simpson and fraction columns
for ethnicity in ethnicity_list:
    london_2001[f'{ethnicity}_fraction'] = round(london_2001[ethnicity]/london_2001['total_pop'],3)
OA_simpson = []
for OA in london_2001['OACD']:
    df = london_2001[london_2001['OACD']== OA]
    london_OA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))[0]
    OA_simpson.append(london_OA_simpson)

# Reordering the columns
london_2001['OA_simpson']= OA_simpson
london_2001 = london_2001[col_order]

In [44]:
birmingham_2001.to_csv('preprocessed files/2001/birmingham_2001.csv', index= False)
leicester_2001.to_csv('preprocessed files/2001/leicester_2001.csv', in1.to_csv('preprocessed files/2001/bradford_2001.csv', index= False)dex= False)
bradford_200
blackburn_2001.to_csv('preprocessed files/2001/blackburn_2001.csv', index= False)
oldham_2001.to_csv('preprocessed files/2001/oldham_2001.csv', index= False)
pendle_2001.to_csv('preprocessed files/2001/pendle_2001.csv', index= False)
london_2001.to_csv('preprocessed files/2001/london_2001.csv', index= False)

## LSOAs

In [45]:
LSOA_2001 = gpd.read_file('preprocessed files/2001/LSOA_2001.shp')
LSOA_2011 = gpd.read_file('preprocessed files/2011/LSOA_2011.shp')
LSOA_2021 = gpd.read_file('preprocessed files/2021/LSOA_2021.shp')


In [46]:
# Creatin city LSOA datasets
birmingham_LSOA_2001 = birmingham_2001.groupby(['LSOACD','year'])[['white','asian','black','other']].sum().reset_index().merge(LSOA_2021[['LSOACD','geometry']], on='LSOACD', how= 'left')
birmingham_LSOA_2001['geometry'] = birmingham_LSOA_2001['LSOACD'].map(LSOA_2011.set_index('LSOACD')['geometry']).fillna(birmingham_LSOA_2001['geometry'])
birmingham_LSOA_2001['geometry'] = birmingham_LSOA_2001['LSOACD'].map(LSOA_2001.set_index('LSOACD')['geometry']).fillna(birmingham_LSOA_2001['geometry'])
birmingham_LSOA_2001= birmingham_LSOA_2001.set_geometry('geometry')

leicester_LSOA_2001 = leicester_2001.groupby(['LSOACD','year'])[['white','asian','black','other']].sum().reset_index().merge(LSOA_2021[['LSOACD','geometry']], on='LSOACD', how= 'left')
leicester_LSOA_2001['geometry'] = leicester_LSOA_2001['LSOACD'].map(LSOA_2011.set_index('LSOACD')['geometry']).fillna(leicester_LSOA_2001['geometry'])
leicester_LSOA_2001['geometry'] = leicester_LSOA_2001['LSOACD'].map(LSOA_2001.set_index('LSOACD')['geometry']).fillna(leicester_LSOA_2001['geometry'])
leicester_LSOA_2001= leicester_LSOA_2001.set_geometry('geometry')

bradford_LSOA_2001 = bradford_2001.groupby(['LSOACD','year'])[['white','asian','black','other']].sum().reset_index().merge(LSOA_2021[['LSOACD','geometry']], on='LSOACD', how= 'left')
bradford_LSOA_2001['geometry'] = bradford_LSOA_2001['LSOACD'].map(LSOA_2011.set_index('LSOACD')['geometry']).fillna(bradford_LSOA_2001['geometry'])
bradford_LSOA_2001['geometry'] = bradford_LSOA_2001['LSOACD'].map(LSOA_2001.set_index('LSOACD')['geometry']).fillna(bradford_LSOA_2001['geometry'])
bradford_LSOA_2001= bradford_LSOA_2001.set_geometry('geometry')

blackburn_LSOA_2001 = blackburn_2001.groupby(['LSOACD','year'])[['white','asian','black','other']].sum().reset_index().merge(LSOA_2021[['LSOACD','geometry']], on='LSOACD', how= 'left')
blackburn_LSOA_2001['geometry'] = blackburn_LSOA_2001['LSOACD'].map(LSOA_2011.set_index('LSOACD')['geometry']).fillna(blackburn_LSOA_2001['geometry'])
blackburn_LSOA_2001['geometry'] = blackburn_LSOA_2001['LSOACD'].map(LSOA_2001.set_index('LSOACD')['geometry']).fillna(blackburn_LSOA_2001['geometry'])
blackburn_LSOA_2001= blackburn_LSOA_2001.set_geometry('geometry')

oldham_LSOA_2001 = oldham_2001.groupby(['LSOACD','year'])[['white','asian','black','other']].sum().reset_index().merge(LSOA_2021[['LSOACD','geometry']], on='LSOACD', how= 'left')
oldham_LSOA_2001['geometry'] = oldham_LSOA_2001['LSOACD'].map(LSOA_2011.set_index('LSOACD')['geometry']).fillna(oldham_LSOA_2001['geometry'])
oldham_LSOA_2001['geometry'] = oldham_LSOA_2001['LSOACD'].map(LSOA_2001.set_index('LSOACD')['geometry']).fillna(oldham_LSOA_2001['geometry'])
oldham_LSOA_2001= oldham_LSOA_2001.set_geometry('geometry')

pendle_LSOA_2001 = pendle_2001.groupby(['LSOACD','year'])[['white','asian','black','other']].sum().reset_index().merge(LSOA_2021[['LSOACD','geometry']], on='LSOACD', how= 'left')
pendle_LSOA_2001['geometry'] = pendle_LSOA_2001['LSOACD'].map(LSOA_2011.set_index('LSOACD')['geometry']).fillna(pendle_LSOA_2001['geometry'])
pendle_LSOA_2001['geometry'] = pendle_LSOA_2001['LSOACD'].map(LSOA_2001.set_index('LSOACD')['geometry']).fillna(pendle_LSOA_2001['geometry'])
pendle_LSOA_2001= pendle_LSOA_2001.set_geometry('geometry')

london_LSOA_2001 = london_2001.groupby(['LSOACD','LADNM','year'])[['white','asian','black','other']].sum().reset_index().merge(LSOA_2021[['LSOACD','geometry']], on='LSOACD', how= 'left')
london_LSOA_2001['geometry'] = london_LSOA_2001['LSOACD'].map(LSOA_2011.set_index('LSOACD')['geometry']).fillna(london_LSOA_2001['geometry'])
london_LSOA_2001['geometry'] = london_LSOA_2001['LSOACD'].map(LSOA_2001.set_index('LSOACD')['geometry']).fillna(london_LSOA_2001['geometry'])
london_LSOA_2001= london_LSOA_2001.set_geometry('geometry')


In [47]:
col_order = ['year', 'LSOACD', 'LSOA_simpson',
             'white', 'asian', 'black', 'other', 'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction', 'other_fraction', 'geometry']

# Creating LSOA_simpson and fraction column
birmingham_LSOA_2001['total_pop'] = (birmingham_LSOA_2001['white'] + birmingham_LSOA_2001['asian'] +
                                     birmingham_LSOA_2001['black'] + birmingham_LSOA_2001['other'])
for ethnicity in ethnicity_list:
    birmingham_LSOA_2001[f'{ethnicity}_fraction'] = round(birmingham_LSOA_2001[ethnicity]/birmingham_LSOA_2001['total_pop'],3)
LSOA_simpson = []
for LSOA in birmingham_LSOA_2001['LSOACD']:
    df = birmingham_LSOA_2001[birmingham_LSOA_2001['LSOACD']== LSOA]
    birmingham_LSOA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))
    LSOA_simpson.append(birmingham_LSOA_simpson[0])

# Reordering the columns
birmingham_LSOA_2001['LSOA_simpson']= LSOA_simpson
birmingham_LSOA_2001 = birmingham_LSOA_2001[col_order]

# Creating LSOA_simpson and fraction column
leicester_LSOA_2001['total_pop'] = (leicester_LSOA_2001['white'] + leicester_LSOA_2001['asian'] +
                                    leicester_LSOA_2001['black'] + leicester_LSOA_2001['other'])
for ethnicity in ethnicity_list:
    leicester_LSOA_2001[f'{ethnicity}_fraction'] = round(leicester_LSOA_2001[ethnicity]/leicester_LSOA_2001['total_pop'],3)
LSOA_simpson = []
for LSOA in leicester_LSOA_2001['LSOACD']:
    df = leicester_LSOA_2001[leicester_LSOA_2001['LSOACD']== LSOA]
    leicester_LSOA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))
    LSOA_simpson.append(leicester_LSOA_simpson[0])

# Reordering the columns
leicester_LSOA_2001['LSOA_simpson']= LSOA_simpson
leicester_LSOA_2001 = leicester_LSOA_2001[col_order]

# Creating LSOA_simpson and fraction column
bradford_LSOA_2001['total_pop'] = (bradford_LSOA_2001['white'] + bradford_LSOA_2001['asian'] +
                                   bradford_LSOA_2001['black'] + bradford_LSOA_2001['other'])
for ethnicity in ethnicity_list:
    bradford_LSOA_2001[f'{ethnicity}_fraction'] = round(bradford_LSOA_2001[ethnicity]/bradford_LSOA_2001['total_pop'],3)
LSOA_simpson = []
for LSOA in bradford_LSOA_2001['LSOACD']:
    df = bradford_LSOA_2001[bradford_LSOA_2001['LSOACD']== LSOA]
    bradford_LSOA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))
    LSOA_simpson.append(bradford_LSOA_simpson[0])

# Reordering the columns
bradford_LSOA_2001['LSOA_simpson']= LSOA_simpson
bradford_LSOA_2001 = bradford_LSOA_2001[col_order]

# Creating LSOA_simpson and fraction column
blackburn_LSOA_2001['total_pop'] = (blackburn_LSOA_2001['white'] + blackburn_LSOA_2001['asian'] +
                                    blackburn_LSOA_2001['black'] + blackburn_LSOA_2001['other'])
for ethnicity in ethnicity_list:
    blackburn_LSOA_2001[f'{ethnicity}_fraction'] = round(blackburn_LSOA_2001[ethnicity]/blackburn_LSOA_2001['total_pop'],3)
LSOA_simpson = []
for LSOA in blackburn_LSOA_2001['LSOACD']:
    df = blackburn_LSOA_2001[blackburn_LSOA_2001['LSOACD']== LSOA]
    blackburn_LSOA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))
    LSOA_simpson.append(blackburn_LSOA_simpson[0])

# Reordering the columns
blackburn_LSOA_2001['LSOA_simpson']= LSOA_simpson
blackburn_LSOA_2001 = blackburn_LSOA_2001[col_order]

# Creating LSOA_simpson and fraction column
oldham_LSOA_2001['total_pop'] = (oldham_LSOA_2001['white'] + oldham_LSOA_2001['asian'] +
                                 oldham_LSOA_2001['black'] + oldham_LSOA_2001['other'])
for ethnicity in ethnicity_list:
    oldham_LSOA_2001[f'{ethnicity}_fraction'] = round(oldham_LSOA_2001[ethnicity]/oldham_LSOA_2001['total_pop'],3)
LSOA_simpson = []
for LSOA in oldham_LSOA_2001['LSOACD']:
    df = oldham_LSOA_2001[oldham_LSOA_2001['LSOACD']== LSOA]
    oldham_LSOA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))
    LSOA_simpson.append(oldham_LSOA_simpson[0])

# Reordering the columns
oldham_LSOA_2001['LSOA_simpson']= LSOA_simpson
oldham_LSOA_2001 = oldham_LSOA_2001[col_order]

# Creating LSOA_simpson and fraction column
pendle_LSOA_2001['total_pop'] = (pendle_LSOA_2001['white'] + pendle_LSOA_2001['asian'] +
                                 pendle_LSOA_2001['black'] + pendle_LSOA_2001['other'])
for ethnicity in ethnicity_list:
    pendle_LSOA_2001[f'{ethnicity}_fraction'] =  round(pendle_LSOA_2001[ethnicity]/pendle_LSOA_2001['total_pop'],3)
LSOA_simpson = []
for LSOA in pendle_LSOA_2001['LSOACD']:
    df = pendle_LSOA_2001[pendle_LSOA_2001['LSOACD']== LSOA]
    pendle_LSOA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))
    LSOA_simpson.append(pendle_LSOA_simpson[0])

# Reordering the columns
pendle_LSOA_2001['LSOA_simpson']= LSOA_simpson
pendle_LSOA_2001 = pendle_LSOA_2001[col_order]

# Creating LSOA_simpson and fraction column
london_LSOA_2001['total_pop'] = (london_LSOA_2001['white'] + london_LSOA_2001['asian'] +
                                     london_LSOA_2001['black'] + london_LSOA_2001['other'])
for ethnicity in ethnicity_list:
    london_LSOA_2001[f'{ethnicity}_fraction'] = round(london_LSOA_2001[ethnicity]/london_LSOA_2001['total_pop'],3)
LSOA_simpson = []
for LSOA in london_LSOA_2001['LSOACD']:
    df = london_LSOA_2001[london_LSOA_2001['LSOACD']== LSOA]
    london_LSOA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))
    LSOA_simpson.append(london_LSOA_simpson[0])

# Reordering the columns
london_LSOA_2001['LSOA_simpson']= LSOA_simpson
col_order = ['year', 'LSOACD','LADNM', 'LSOA_simpson',
             'white', 'asian', 'black', 'other', 'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction', 'other_fraction', 'geometry']
london_LSOA_2001 = london_LSOA_2001[col_order]

## MSOA

In [55]:
MSOA_2001 = gpd.read_file('preprocessed files/2001/MSOA_2001.shp')
MSOA_2011 = gpd.read_file('preprocessed files/2011/MSOA_2011.shp')
MSOA_2021 = gpd.read_file('preprocessed files/2021/MSOA_2021.shp')

In [56]:
# Creatin city MSOA datasets
birmingham_MSOA_2001 = birmingham_2001.groupby(['MSOACD','year'])[['white','asian','black','other']].sum().reset_index().merge(MSOA_2021[['MSOACD','geometry']], on='MSOACD', how= 'left')
birmingham_MSOA_2001['geometry'] = birmingham_MSOA_2001['MSOACD'].map(MSOA_2011.set_index('MSOACD')['geometry']).fillna(birmingham_MSOA_2001['geometry'])
birmingham_MSOA_2001['geometry'] = birmingham_MSOA_2001['MSOACD'].map(MSOA_2001.set_index('MSOACD')['geometry']).fillna(birmingham_MSOA_2001['geometry'])
birmingham_MSOA_2001= birmingham_MSOA_2001.set_geometry('geometry')

leicester_MSOA_2001 = leicester_2001.groupby(['MSOACD','year'])[['white','asian','black','other']].sum().reset_index().merge(MSOA_2021[['MSOACD','geometry']], on='MSOACD', how= 'left')
leicester_MSOA_2001['geometry'] = leicester_MSOA_2001['MSOACD'].map(MSOA_2011.set_index('MSOACD')['geometry']).fillna(leicester_MSOA_2001['geometry'])
leicester_MSOA_2001['geometry'] = leicester_MSOA_2001['MSOACD'].map(MSOA_2001.set_index('MSOACD')['geometry']).fillna(leicester_MSOA_2001['geometry'])
leicester_MSOA_2001= leicester_MSOA_2001.set_geometry('geometry')

bradford_MSOA_2001 = bradford_2001.groupby(['MSOACD','year'])[['white','asian','black','other']].sum().reset_index().merge(MSOA_2021[['MSOACD','geometry']], on='MSOACD', how= 'left')
bradford_MSOA_2001['geometry'] = bradford_MSOA_2001['MSOACD'].map(MSOA_2011.set_index('MSOACD')['geometry']).fillna(bradford_MSOA_2001['geometry'])
bradford_MSOA_2001['geometry'] = bradford_MSOA_2001['MSOACD'].map(MSOA_2001.set_index('MSOACD')['geometry']).fillna(bradford_MSOA_2001['geometry'])
bradford_MSOA_2001= bradford_MSOA_2001.set_geometry('geometry')

blackburn_MSOA_2001 = blackburn_2001.groupby(['MSOACD','year'])[['white','asian','black','other']].sum().reset_index().merge(MSOA_2021[['MSOACD','geometry']], on='MSOACD', how= 'left')
blackburn_MSOA_2001['geometry'] = blackburn_MSOA_2001['MSOACD'].map(MSOA_2011.set_index('MSOACD')['geometry']).fillna(blackburn_MSOA_2001['geometry'])
blackburn_MSOA_2001['geometry'] = blackburn_MSOA_2001['MSOACD'].map(MSOA_2001.set_index('MSOACD')['geometry']).fillna(blackburn_MSOA_2001['geometry'])
blackburn_MSOA_2001= blackburn_MSOA_2001.set_geometry('geometry')

oldham_MSOA_2001 = oldham_2001.groupby(['MSOACD','year'])[['white','asian','black','other']].sum().reset_index().merge(MSOA_2021[['MSOACD','geometry']], on='MSOACD', how= 'left')
oldham_MSOA_2001['geometry'] = oldham_MSOA_2001['MSOACD'].map(MSOA_2011.set_index('MSOACD')['geometry']).fillna(oldham_MSOA_2001['geometry'])
oldham_MSOA_2001['geometry'] = oldham_MSOA_2001['MSOACD'].map(MSOA_2001.set_index('MSOACD')['geometry']).fillna(oldham_MSOA_2001['geometry'])
oldham_MSOA_2001= oldham_MSOA_2001.set_geometry('geometry')

pendle_MSOA_2001 = pendle_2001.groupby(['MSOACD','year'])[['white','asian','black','other']].sum().reset_index().merge(MSOA_2021[['MSOACD','geometry']], on='MSOACD', how= 'left')
pendle_MSOA_2001['geometry'] = pendle_MSOA_2001['MSOACD'].map(MSOA_2011.set_index('MSOACD')['geometry']).fillna(pendle_MSOA_2001['geometry'])
pendle_MSOA_2001['geometry'] = pendle_MSOA_2001['MSOACD'].map(MSOA_2001.set_index('MSOACD')['geometry']).fillna(pendle_MSOA_2001['geometry'])
pendle_MSOA_2001= pendle_MSOA_2001.set_geometry('geometry')

london_MSOA_2001 = london_2001.groupby(['MSOACD','LADNM','year'])[['white','asian','black','other']].sum().reset_index().merge(MSOA_2021[['MSOACD','geometry']], on='MSOACD', how= 'left')
london_MSOA_2001['geometry'] = london_MSOA_2001['MSOACD'].map(MSOA_2011.set_index('MSOACD')['geometry']).fillna(london_MSOA_2001['geometry'])
london_MSOA_2001['geometry'] = london_MSOA_2001['MSOACD'].map(MSOA_2001.set_index('MSOACD')['geometry']).fillna(london_MSOA_2001['geometry'])
london_MSOA_2001= london_MSOA_2001.set_geometry('geometry')


In [57]:
col_order = ['year', 'MSOACD', 'MSOA_simpson',
             'white', 'asian', 'black', 'other', 'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction', 'other_fraction', 'geometry']

# Creating LMSOA_simpson and fraction column
birmingham_MSOA_2001['total_pop'] = (birmingham_MSOA_2001['white'] + birmingham_MSOA_2001['asian'] +
                                     birmingham_MSOA_2001['black'] + birmingham_MSOA_2001['other'])
for ethnicity in ethnicity_list:
    birmingham_MSOA_2001[f'{ethnicity}_fraction'] = round(birmingham_MSOA_2001[ethnicity]/birmingham_MSOA_2001['total_pop'],3)
MSOA_simpson = []
for MSOA in birmingham_MSOA_2001['MSOACD']:
    df = birmingham_MSOA_2001[birmingham_MSOA_2001['MSOACD']== MSOA]
    birmingham_MSOA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))
    MSOA_simpson.append(birmingham_MSOA_simpson[0])

# Reordering the columns
birmingham_MSOA_2001['MSOA_simpson']= MSOA_simpson
birmingham_MSOA_2001 = birmingham_MSOA_2001[col_order]

# Creating LMSOA_simpson and fraction column
leicester_MSOA_2001['total_pop'] = (leicester_MSOA_2001['white'] + leicester_MSOA_2001['asian'] +
                                    leicester_MSOA_2001['black'] + leicester_MSOA_2001['other'])
for ethnicity in ethnicity_list:
    leicester_MSOA_2001[f'{ethnicity}_fraction'] = round(leicester_MSOA_2001[ethnicity]/leicester_MSOA_2001['total_pop'],3)
MSOA_simpson = []
for MSOA in leicester_MSOA_2001['MSOACD']:
    df = leicester_MSOA_2001[leicester_MSOA_2001['MSOACD']== MSOA]
    leicester_MSOA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))
    MSOA_simpson.append(leicester_MSOA_simpson[0])

# Reordering the columns
leicester_MSOA_2001['MSOA_simpson']= MSOA_simpson
leicester_MSOA_2001 = leicester_MSOA_2001[col_order]

# Creating LMSOA_simpson and fraction column
bradford_MSOA_2001['total_pop'] = (bradford_MSOA_2001['white'] + bradford_MSOA_2001['asian'] +
                                   bradford_MSOA_2001['black'] + bradford_MSOA_2001['other'])
for ethnicity in ethnicity_list:
    bradford_MSOA_2001[f'{ethnicity}_fraction'] = round(bradford_MSOA_2001[ethnicity]/bradford_MSOA_2001['total_pop'],3)
MSOA_simpson = []
for MSOA in bradford_MSOA_2001['MSOACD']:
    df = bradford_MSOA_2001[bradford_MSOA_2001['MSOACD']== MSOA]
    bradford_MSOA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))
    MSOA_simpson.append(bradford_MSOA_simpson[0])

# Reordering the columns
bradford_MSOA_2001['MSOA_simpson']= MSOA_simpson
bradford_MSOA_2001 = bradford_MSOA_2001[col_order]

# Creating LMSOA_simpson and fraction column
blackburn_MSOA_2001['total_pop'] = (blackburn_MSOA_2001['white'] + blackburn_MSOA_2001['asian'] +
                                    blackburn_MSOA_2001['black'] + blackburn_MSOA_2001['other'])
for ethnicity in ethnicity_list:
    blackburn_MSOA_2001[f'{ethnicity}_fraction'] = round(blackburn_MSOA_2001[ethnicity]/blackburn_MSOA_2001['total_pop'],3)
MSOA_simpson = []
for MSOA in blackburn_MSOA_2001['MSOACD']:
    df = blackburn_MSOA_2001[blackburn_MSOA_2001['MSOACD']== MSOA]
    blackburn_MSOA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))
    MSOA_simpson.append(blackburn_MSOA_simpson[0])

# Reordering the columns
blackburn_MSOA_2001['MSOA_simpson']= MSOA_simpson
blackburn_MSOA_2001 = blackburn_MSOA_2001[col_order]

# Creating LMSOA_simpson and fraction column
oldham_MSOA_2001['total_pop'] = (oldham_MSOA_2001['white'] + oldham_MSOA_2001['asian'] +
                                 oldham_MSOA_2001['black'] + oldham_MSOA_2001['other'])
for ethnicity in ethnicity_list:
    oldham_MSOA_2001[f'{ethnicity}_fraction'] = round(oldham_MSOA_2001[ethnicity]/oldham_MSOA_2001['total_pop'],3)
MSOA_simpson = []
for MSOA in oldham_MSOA_2001['MSOACD']:
    df = oldham_MSOA_2001[oldham_MSOA_2001['MSOACD']== MSOA]
    oldham_MSOA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))
    MSOA_simpson.append(oldham_MSOA_simpson[0])

# Reordering the columns
oldham_MSOA_2001['MSOA_simpson']= MSOA_simpson
oldham_MSOA_2001 = oldham_MSOA_2001[col_order]

# Creating LMSOA_simpson and fraction column
pendle_MSOA_2001['total_pop'] = (pendle_MSOA_2001['white'] + pendle_MSOA_2001['asian'] +
                                 pendle_MSOA_2001['black'] + pendle_MSOA_2001['other'])
for ethnicity in ethnicity_list:
    pendle_MSOA_2001[f'{ethnicity}_fraction'] = round(pendle_MSOA_2001[ethnicity]/pendle_MSOA_2001['total_pop'],3)
MSOA_simpson = []
for MSOA in pendle_MSOA_2001['MSOACD']:
    df = pendle_MSOA_2001[pendle_MSOA_2001['MSOACD']== MSOA]
    pendle_MSOA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))
    MSOA_simpson.append(pendle_MSOA_simpson[0])

# Reordering the columns
pendle_MSOA_2001['MSOA_simpson']= MSOA_simpson
pendle_MSOA_2001 = pendle_MSOA_2001[col_order]

# Creating LMSOA_simpson and fraction column
london_MSOA_2001['total_pop'] = (london_MSOA_2001['white'] + london_MSOA_2001['asian'] +
                                 london_MSOA_2001['black'] + london_MSOA_2001['other'])
for ethnicity in ethnicity_list:
    london_MSOA_2001[f'{ethnicity}_fraction'] = round(london_MSOA_2001[ethnicity]/london_MSOA_2001['total_pop'],3)
MSOA_simpson = []
for MSOA in london_MSOA_2001['MSOACD']:
    df = london_MSOA_2001[london_MSOA_2001['MSOACD']== MSOA]
    london_MSOA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))
    MSOA_simpson.append(london_MSOA_simpson[0])

# Reordering the columns
london_MSOA_2001['MSOA_simpson']= MSOA_simpson
col_order = ['year', 'MSOACD','LADNM', 'MSOA_simpson',
             'white', 'asian', 'black', 'other', 'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction', 'other_fraction', 'geometry']
london_MSOA_2001 = london_MSOA_2001[col_order]

## LAD

In [51]:
LAD_2001 = gpd.read_file('preprocessed files/2001/LAD_2001.shp')
LAD_2011 = gpd.read_file('preprocessed files/2011/LAD_2011.shp')
LAD_2021 = gpd.read_file('preprocessed files/2021/LAD_2021.shp')

In [52]:
london_LAD_2001 = london_2001.groupby(['LADCD','LADNM','year'])[['white','asian','black','other']].sum().reset_index().merge(LAD_2021[['LADCD','geometry']], on='LADCD', how= 'left')
london_LAD_2001['geometry'] = london_LAD_2001['LADCD'].map(LAD_2011.set_index('LADCD')['geometry']).fillna(london_LAD_2001['geometry'])
london_LAD_2001['geometry'] = london_LAD_2001['LADCD'].map(LAD_2021.set_index('LADCD')['geometry']).fillna(london_LAD_2001['geometry'])
london_LAD_2001= london_LAD_2001.set_geometry('geometry')

In [53]:
# Creating LAD_simpson and fraction column
london_LAD_2001['total_pop'] = (london_LAD_2001['white'] + london_LAD_2001['asian'] +
                                london_LAD_2001['black'] + london_LAD_2001['other'])
# Creating LAD_simpson and fraction column
for ethnicity in ethnicity_list:
    london_LAD_2001[f'{ethnicity}_fraction'] = round(london_LAD_2001[ethnicity]/london_LAD_2001['total_pop'],3)
LAD_simpson = []
for LAD in london_LAD_2001['LADCD']:
    df = london_LAD_2001[london_LAD_2001['LADCD']== LAD]
    london_LAD_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))
    LAD_simpson.append(london_LAD_simpson[0])

# Reordering the columns
london_LAD_2001['LAD_simpson']= LAD_simpson
col_order = ['year', 'LADCD','LADNM', 'LAD_simpson',
             'white', 'asian', 'black', 'other', 'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction', 'other_fraction', 'geometry']
london_LAD_2001 = london_LAD_2001[col_order]


# City Borders

## Birmingham

In [54]:
# # Creating shared border dictionary at OA level- Birmingham
# birmingham_OA_borders_2001 = {}

# for idx1, row1 in birmingham_2001.set_index('OACD').iterrows():
#     borders = []
#     for idx2, row2 in birmingham_2001.set_index('OACD').iterrows():
#         if idx1 != idx2:
#             if row1.geometry.intersects(row2.geometry):
#                 borders.append(idx2)
#     birmingham_OA_borders_2001[idx1] = borders
    
# with open('preprocessed files/2001/birmingham_OA_borders_2001.pkl', 'wb') as f:
#     pickle.dump(birmingham_OA_borders_2001, f)

In [55]:
# Creating shared border dictionary at LSOA level- Birmingham
birmingham_LSOA_borders_2001 = {}

for idx1, row1 in birmingham_LSOA_2001.set_index('LSOACD').iterrows():
    borders = []
    for idx2, row2 in birmingham_LSOA_2001.set_index('LSOACD').iterrows():
        if idx1 != idx2:
            if row1.geometry.intersects(row2.geometry):
                borders.append(idx2)
    birmingham_LSOA_borders_2001[idx1] = borders

with open('preprocessed files/2001/birmingham_LSOA_borders_2001.pkl', 'wb') as f:
    pickle.dump(birmingham_LSOA_borders_2001, f)

In [56]:
# Creating shared border dictionary at LSOA_OA level- Birmingham
birmingham_LSOA_OA_borders_2001 = {}
for lsoa in birmingham_2001['LSOACD'].unique():
    lsoa_df = birmingham_2001[birmingham_2001['LSOACD'] == lsoa]
    oa_borders = {}
    for idx1, row1 in lsoa_df.set_index('OACD').iterrows():
        borders = []
        for idx2, row2 in lsoa_df.set_index('OACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        oa_borders[idx1] = borders
    birmingham_LSOA_OA_borders_2001[lsoa] = oa_borders
with open('preprocessed files/2001/birmingham_LSOA_OA_borders_2001.pkl', 'wb') as f:
    pickle.dump(birmingham_LSOA_OA_borders_2001, f)

In [57]:
# Adding moran index to the city LSOA dataset- Birmingham
birmingham_LSOA_OA_moran = {}
for lsoa in birmingham_2001['LSOACD'].unique():
    lsoa_df = birmingham_2001[birmingham_2001['LSOACD'] == lsoa]
    OA_moran = moran(lsoa_df[['OACD','white','asian','black','other']].set_index('OACD'),birmingham_LSOA_OA_borders_2001[lsoa])
    birmingham_LSOA_OA_moran[lsoa] = {'OA_moran':OA_moran}

flat_dict = [{'LSOACD': key,
              'white_moran': value['OA_moran']['white'],
              'asian_moran': value['OA_moran']['asian'], 
              'black_moran': value['OA_moran']['black'],
              'other_moran': value['OA_moran']['other'], 
              } for key, value in birmingham_LSOA_OA_moran.items()]

birmingham_LSOA_OA_moran_2001 = pd.DataFrame(flat_dict)

birmingham_LSOA_2001 = birmingham_LSOA_2001.merge(birmingham_LSOA_OA_moran_2001[['LSOACD','white_moran','asian_moran']], 
                                                  on= 'LSOACD', how= 'left')
col_order = ['year', 'LSOACD', 'LSOA_simpson','white_moran', 'asian_moran', 'white', 'asian', 'black', 'other',
       'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction',
       'other_fraction', 'geometry']

birmingham_LSOA_2001=birmingham_LSOA_2001[col_order]

  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)


In [58]:
# Creating shared border dictionary at MSOA_OA level- Birmingham
birmingham_MSOA_OA_borders_2001 = {}
for msoa in birmingham_2001['MSOACD'].unique():
    msoa_df = birmingham_2001[birmingham_2001['MSOACD'] == msoa]
    lsoa_borders = {}
    for idx1, row1 in msoa_df.set_index('OACD').iterrows():
        borders = []
        for idx2, row2 in msoa_df.set_index('OACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        lsoa_borders[idx1] = borders
    birmingham_MSOA_OA_borders_2001[msoa] = lsoa_borders
with open('preprocessed files/2001/birmingham_MSOA_OA_borders_2001.pkl', 'wb') as f:
    pickle.dump(birmingham_MSOA_OA_borders_2001, f)

In [59]:
# Creating shared border dictionary at MSOA_LSOA level- Birmingham
birmingham_MSOA_LSOA = pd.merge(birmingham_LSOA_2001,birmingham_2001.groupby('LSOACD').agg({'MSOACD': 'first'}).reset_index(), on = 'LSOACD', how = 'left')
birmingham_MSOA_LSOA_borders_2001 = {}
for msoa in birmingham_MSOA_LSOA['MSOACD'].unique():
    msoa_df = birmingham_MSOA_LSOA[birmingham_MSOA_LSOA['MSOACD'] == msoa]
    lsoa_borders = {}
    for idx1, row1 in msoa_df.set_index('LSOACD').iterrows():
        borders = []
        for idx2, row2 in msoa_df.set_index('LSOACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        lsoa_borders[idx1] = borders
    birmingham_MSOA_LSOA_borders_2001[msoa] = lsoa_borders
with open('preprocessed files/2001/birmingham_MSOA_LSOA_borders_2001.pkl', 'wb') as f:
    pickle.dump(birmingham_MSOA_LSOA_borders_2001, f)

In [60]:

# MSOA_OA based moran- Birmingham
birmingham_MSOA_OA_moran = {}
for msoa in birmingham_2001['MSOACD'].unique():
    msoa_df = birmingham_2001[birmingham_2001['MSOACD'] == msoa]
    OA_moran = moran(msoa_df[['OACD','white','asian','black','other']].set_index('OACD'),birmingham_MSOA_OA_borders_2001[msoa])
    birmingham_MSOA_OA_moran[msoa] = {'OA_moran':OA_moran}

flat_dict = [{'MSOACD': key,
              'white_moran_OA': value['OA_moran']['white'],
              'asian_moran_OA': value['OA_moran']['asian'], 
              'black_moran_OA': value['OA_moran']['black'],
              'other_moran_OA': value['OA_moran']['other'], 
              } for key, value in birmingham_MSOA_OA_moran.items()]

birmingham_MSOA_OA_moran_2001 = pd.DataFrame(flat_dict)

birmingham_MSOA_2001 = birmingham_MSOA_2001.merge(birmingham_MSOA_OA_moran_2001[['MSOACD','white_moran_OA','asian_moran_OA']], 
                                                  on= 'MSOACD', how= 'left')
# MSOA_LSOA based moran- Birmingham
birmingham_MSOA_LSOA_moran = {}
for msoa in birmingham_2001['MSOACD'].unique():
    msoa_df = birmingham_2001[birmingham_2001['MSOACD'] == msoa]
    msoa_df = msoa_df.groupby('LSOACD')[['white','asian','black','other']].sum().reset_index().set_index('LSOACD')
    LSOA_moran = moran(msoa_df,birmingham_MSOA_LSOA_borders_2001[msoa])
    birmingham_MSOA_LSOA_moran[msoa] = {'LSOA_moran':LSOA_moran}

flat_dict = [{'MSOACD': key,
              'white_moran_LSOA': value['LSOA_moran']['white'],
              'asian_moran_LSOA': value['LSOA_moran']['asian'], 
              'black_moran_LSOA': value['LSOA_moran']['black'],
              'other_moran_LSOA': value['LSOA_moran']['other'], 
              } for key, value in birmingham_MSOA_LSOA_moran.items()]

birmingham_MSOA_LSOA_moran_2001 = pd.DataFrame(flat_dict)

birmingham_MSOA_2001 = birmingham_MSOA_2001.merge(birmingham_MSOA_LSOA_moran_2001[['MSOACD','white_moran_LSOA','asian_moran_LSOA']], 
                                                  on= 'MSOACD', how= 'left')
# Reordering the columns
col_order = ['year', 'MSOACD', 'MSOA_simpson','white_moran_OA', 'asian_moran_OA', 'white_moran_LSOA', 'asian_moran_LSOA', 'white', 'asian', 'black', 'other',
       'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction',
       'other_fraction', 'geometry']

birmingham_MSOA_2001=birmingham_MSOA_2001[col_order]


## Leicester

In [61]:
# # Creating shared border dictionary at OA level- Leicester
# leicester_OA_borders_2001 = {}

# for idx1, row1 in leicester_2001.set_index('OACD').iterrows():
#     borders = []
#     for idx2, row2 in leicester_2001.set_index('OACD').iterrows():
#         if idx1 != idx2:
#             if row1.geometry.intersects(row2.geometry):
#                 borders.append(idx2)
#     leicester_OA_borders_2001[idx1] = borders
    
# with open('preprocessed files/2001/leicester_OA_borders_2001.pkl', 'wb') as f:
#     pickle.dump(leicester_OA_borders_2001, f)

In [62]:
# Creating shared border dictionary at LSOA level- Leicester
leicester_LSOA_borders_2001 = {}

for idx1, row1 in leicester_LSOA_2001.set_index('LSOACD').iterrows():
    borders = []
    for idx2, row2 in leicester_LSOA_2001.set_index('LSOACD').iterrows():
        if idx1 != idx2:
            if row1.geometry.intersects(row2.geometry):
                borders.append(idx2)
    leicester_LSOA_borders_2001[idx1] = borders

with open('preprocessed files/2001/leicester_LSOA_borders_2001.pkl', 'wb') as f:
    pickle.dump(leicester_LSOA_borders_2001, f)

In [63]:
# Creating shared border dictionary at LSOA_OA level- Leicester
leicester_LSOA_OA_borders_2001 = {}
for lsoa in leicester_2001['LSOACD'].unique():
    lsoa_df = leicester_2001[leicester_2001['LSOACD'] == lsoa]
    oa_borders = {}
    for idx1, row1 in lsoa_df.set_index('OACD').iterrows():
        borders = []
        for idx2, row2 in lsoa_df.set_index('OACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        oa_borders[idx1] = borders
    leicester_LSOA_OA_borders_2001[lsoa] = oa_borders
with open('preprocessed files/2001/leicester_LSOA_OA_borders_2001.pkl', 'wb') as f:
    pickle.dump(leicester_LSOA_OA_borders_2001, f)

In [64]:
# Adding moran index to the city LSOA dataset- Leicester
leicester_LSOA_OA_moran = {}
for lsoa in leicester_2001['LSOACD'].unique():
    lsoa_df = leicester_2001[leicester_2001['LSOACD'] == lsoa]
    OA_moran = moran(lsoa_df[['OACD','white','asian','black','other']].set_index('OACD'),leicester_LSOA_OA_borders_2001[lsoa])
    leicester_LSOA_OA_moran[lsoa] = {'OA_moran':OA_moran}

flat_dict = [{'LSOACD': key,
              'white_moran': value['OA_moran']['white'],
              'asian_moran': value['OA_moran']['asian'], 
              'black_moran': value['OA_moran']['black'],
              'other_moran': value['OA_moran']['other'], 
              } for key, value in leicester_LSOA_OA_moran.items()]

leicester_LSOA_OA_moran_2001 = pd.DataFrame(flat_dict)

leicester_LSOA_2001 = leicester_LSOA_2001.merge(leicester_LSOA_OA_moran_2001[['LSOACD','white_moran','asian_moran']], 
                                                  on= 'LSOACD', how= 'left')
col_order = ['year', 'LSOACD', 'LSOA_simpson','white_moran', 'asian_moran', 'white', 'asian', 'black', 'other',
       'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction',
       'other_fraction', 'geometry']

leicester_LSOA_2001=leicester_LSOA_2001[col_order]

  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)


In [65]:
# Creating shared border dictionary at MSOA_OA level- Leicester
leicester_MSOA_OA_borders_2001 = {}
for msoa in leicester_2001['MSOACD'].unique():
    msoa_df = leicester_2001[leicester_2001['MSOACD'] == msoa]
    lsoa_borders = {}
    for idx1, row1 in msoa_df.set_index('OACD').iterrows():
        borders = []
        for idx2, row2 in msoa_df.set_index('OACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        lsoa_borders[idx1] = borders
    leicester_MSOA_OA_borders_2001[msoa] = lsoa_borders
with open('preprocessed files/2001/leicester_MSOA_OA_borders_2001.pkl', 'wb') as f:
    pickle.dump(leicester_MSOA_OA_borders_2001, f)

In [66]:
# Creating shared border dictionary at MSOA_LSOA level- Leicester
leicester_MSOA_LSOA = pd.merge(leicester_LSOA_2001,leicester_2001.groupby('LSOACD').agg({'MSOACD': 'first'}).reset_index(), on = 'LSOACD', how = 'left')
leicester_MSOA_LSOA_borders_2001 = {}
for msoa in leicester_MSOA_LSOA['MSOACD'].unique():
    msoa_df = leicester_MSOA_LSOA[leicester_MSOA_LSOA['MSOACD'] == msoa]
    lsoa_borders = {}
    for idx1, row1 in msoa_df.set_index('LSOACD').iterrows():
        borders = []
        for idx2, row2 in msoa_df.set_index('LSOACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        lsoa_borders[idx1] = borders
    leicester_MSOA_LSOA_borders_2001[msoa] = lsoa_borders
with open('preprocessed files/2001/leicester_MSOA_LSOA_borders_2001.pkl', 'wb') as f:
    pickle.dump(leicester_MSOA_LSOA_borders_2001, f)

In [67]:
# MSOA_OA based moran- Leicester
leicester_MSOA_OA_moran = {}
for msoa in leicester_2001['MSOACD'].unique():
    msoa_df = leicester_2001[leicester_2001['MSOACD'] == msoa]
    OA_moran = moran(msoa_df[['OACD','white','asian','black','other']].set_index('OACD'),leicester_MSOA_OA_borders_2001[msoa])
    leicester_MSOA_OA_moran[msoa] = {'OA_moran':OA_moran}

flat_dict = [{'MSOACD': key,
              'white_moran_OA': value['OA_moran']['white'],
              'asian_moran_OA': value['OA_moran']['asian'], 
              'black_moran_OA': value['OA_moran']['black'],
              'other_moran_OA': value['OA_moran']['other'], 
              } for key, value in leicester_MSOA_OA_moran.items()]

leicester_MSOA_OA_moran_2001 = pd.DataFrame(flat_dict)

leicester_MSOA_2001 = leicester_MSOA_2001.merge(leicester_MSOA_OA_moran_2001[['MSOACD','white_moran_OA','asian_moran_OA']], 
                                                  on= 'MSOACD', how= 'left')
# MSOA_LSOA based moran- Leicester
leicester_MSOA_LSOA_moran = {}
for msoa in leicester_2001['MSOACD'].unique():
    msoa_df = leicester_2001[leicester_2001['MSOACD'] == msoa]
    msoa_df = msoa_df.groupby('LSOACD')[['white','asian','black','other']].sum().reset_index().set_index('LSOACD')
    LSOA_moran = moran(msoa_df,leicester_MSOA_LSOA_borders_2001[msoa])
    leicester_MSOA_LSOA_moran[msoa] = {'LSOA_moran':LSOA_moran}

flat_dict = [{'MSOACD': key,
              'white_moran_LSOA': value['LSOA_moran']['white'],
              'asian_moran_LSOA': value['LSOA_moran']['asian'], 
              'black_moran_LSOA': value['LSOA_moran']['black'],
              'other_moran_LSOA': value['LSOA_moran']['other'], 
              } for key, value in leicester_MSOA_LSOA_moran.items()]

leicester_MSOA_LSOA_moran_2001 = pd.DataFrame(flat_dict)

leicester_MSOA_2001 = leicester_MSOA_2001.merge(leicester_MSOA_LSOA_moran_2001[['MSOACD','white_moran_LSOA','asian_moran_LSOA']], 
                                                  on= 'MSOACD', how= 'left')
# Reordering the columns
col_order = ['year', 'MSOACD', 'MSOA_simpson','white_moran_OA', 'asian_moran_OA', 'white_moran_LSOA', 'asian_moran_LSOA', 'white', 'asian', 'black', 'other',
       'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction',
       'other_fraction', 'geometry']

leicester_MSOA_2001=leicester_MSOA_2001[col_order]


## Bradford

In [68]:
# # Creating shared border dictionary at OA level- Bradford
# bradford_OA_borders_2001 = {}

# for idx1, row1 in bradford_2001.set_index('OACD').iterrows():
#     borders = []
#     for idx2, row2 in bradford_2001.set_index('OACD').iterrows():
#         if idx1 != idx2:
#             if row1.geometry.intersects(row2.geometry):
#                 borders.append(idx2)
#     bradford_OA_borders_2001[idx1] = borders
    
# with open('preprocessed files/2001/bradford_OA_borders_2001.pkl', 'wb') as f:
#     pickle.dump(bradford_OA_borders_2001, f)

In [69]:
# Creating shared border dictionary at LSOA level- Bradford
bradford_LSOA_borders_2001 = {}

for idx1, row1 in bradford_LSOA_2001.set_index('LSOACD').iterrows():
    borders = []
    for idx2, row2 in bradford_LSOA_2001.set_index('LSOACD').iterrows():
        if idx1 != idx2:
            if row1.geometry.intersects(row2.geometry):
                borders.append(idx2)
    bradford_LSOA_borders_2001[idx1] = borders

with open('preprocessed files/2001/bradford_LSOA_borders_2001.pkl', 'wb') as f:
    pickle.dump(bradford_LSOA_borders_2001, f)

In [70]:
# Creating shared border dictionary at LSOA_OA level- Bradford
bradford_LSOA_OA_borders_2001 = {}
for lsoa in bradford_2001['LSOACD'].unique():
    lsoa_df = bradford_2001[bradford_2001['LSOACD'] == lsoa]
    oa_borders = {}
    for idx1, row1 in lsoa_df.set_index('OACD').iterrows():
        borders = []
        for idx2, row2 in lsoa_df.set_index('OACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        oa_borders[idx1] = borders
    bradford_LSOA_OA_borders_2001[lsoa] = oa_borders
with open('preprocessed files/2001/bradford_LSOA_OA_borders_2001.pkl', 'wb') as f:
    pickle.dump(bradford_LSOA_OA_borders_2001, f)

In [71]:
# Adding moran index to the city LSOA dataset- Bradford
bradford_LSOA_OA_moran = {}
for lsoa in bradford_2001['LSOACD'].unique():
    lsoa_df = bradford_2001[bradford_2001['LSOACD'] == lsoa]
    OA_moran = moran(lsoa_df[['OACD','white','asian','black','other']].set_index('OACD'),bradford_LSOA_OA_borders_2001[lsoa])
    bradford_LSOA_OA_moran[lsoa] = {'OA_moran':OA_moran}

flat_dict = [{'LSOACD': key,
              'white_moran': value['OA_moran']['white'],
              'asian_moran': value['OA_moran']['asian'], 
              'black_moran': value['OA_moran']['black'],
              'other_moran': value['OA_moran']['other'], 
              } for key, value in bradford_LSOA_OA_moran.items()]

bradford_LSOA_OA_moran_2001 = pd.DataFrame(flat_dict)

bradford_LSOA_2001 = bradford_LSOA_2001.merge(bradford_LSOA_OA_moran_2001[['LSOACD','white_moran','asian_moran']], 
                                                  on= 'LSOACD', how= 'left')
col_order = ['year', 'LSOACD', 'LSOA_simpson','white_moran', 'asian_moran', 'white', 'asian', 'black', 'other',
       'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction',
       'other_fraction', 'geometry']

bradford_LSOA_2001=bradford_LSOA_2001[col_order]

  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator

In [72]:
# Creating shared border dictionary at MSOA_OA level- Bradford
bradford_MSOA_OA_borders_2001 = {}
for msoa in bradford_2001['MSOACD'].unique():
    msoa_df = bradford_2001[bradford_2001['MSOACD'] == msoa]
    lsoa_borders = {}
    for idx1, row1 in msoa_df.set_index('OACD').iterrows():
        borders = []
        for idx2, row2 in msoa_df.set_index('OACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        lsoa_borders[idx1] = borders
    bradford_MSOA_OA_borders_2001[msoa] = lsoa_borders
with open('preprocessed files/2001/bradford_MSOA_OA_borders_2001.pkl', 'wb') as f:
    pickle.dump(bradford_MSOA_OA_borders_2001, f)

In [73]:
# Creating shared border dictionary at MSOA_LSOA level- Bradford
bradford_MSOA_LSOA = pd.merge(bradford_LSOA_2001,bradford_2001.groupby('LSOACD').agg({'MSOACD': 'first'}).reset_index(), on = 'LSOACD', how = 'left')
bradford_MSOA_LSOA_borders_2001 = {}
for msoa in bradford_MSOA_LSOA['MSOACD'].unique():
    msoa_df = bradford_MSOA_LSOA[bradford_MSOA_LSOA['MSOACD'] == msoa]
    lsoa_borders = {}
    for idx1, row1 in msoa_df.set_index('LSOACD').iterrows():
        borders = []
        for idx2, row2 in msoa_df.set_index('LSOACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        lsoa_borders[idx1] = borders
    bradford_MSOA_LSOA_borders_2001[msoa] = lsoa_borders
with open('preprocessed files/2001/bradford_MSOA_LSOA_borders_2001.pkl', 'wb') as f:
    pickle.dump(bradford_MSOA_LSOA_borders_2001, f)

In [74]:
# MSOA_OA based moran- Bradford
bradford_MSOA_OA_moran = {}
for msoa in bradford_2001['MSOACD'].unique():
    msoa_df = bradford_2001[bradford_2001['MSOACD'] == msoa]
    OA_moran = moran(msoa_df[['OACD','white','asian','black','other']].set_index('OACD'),bradford_MSOA_OA_borders_2001[msoa])
    bradford_MSOA_OA_moran[msoa] = {'OA_moran':OA_moran}

flat_dict = [{'MSOACD': key,
              'white_moran_OA': value['OA_moran']['white'],
              'asian_moran_OA': value['OA_moran']['asian'], 
              'black_moran_OA': value['OA_moran']['black'],
              'other_moran_OA': value['OA_moran']['other'], 
              } for key, value in bradford_MSOA_OA_moran.items()]

bradford_MSOA_OA_moran_2001 = pd.DataFrame(flat_dict)

bradford_MSOA_2001 = bradford_MSOA_2001.merge(bradford_MSOA_OA_moran_2001[['MSOACD','white_moran_OA','asian_moran_OA']], 
                                                  on= 'MSOACD', how= 'left')
# MSOA_LSOA based moran- Bradford
bradford_MSOA_LSOA_moran = {}
for msoa in bradford_2001['MSOACD'].unique():
    msoa_df = bradford_2001[bradford_2001['MSOACD'] == msoa]
    msoa_df = msoa_df.groupby('LSOACD')[['white','asian','black','other']].sum().reset_index().set_index('LSOACD')
    LSOA_moran = moran(msoa_df,bradford_MSOA_LSOA_borders_2001[msoa])
    bradford_MSOA_LSOA_moran[msoa] = {'LSOA_moran':LSOA_moran}

flat_dict = [{'MSOACD': key,
              'white_moran_LSOA': value['LSOA_moran']['white'],
              'asian_moran_LSOA': value['LSOA_moran']['asian'], 
              'black_moran_LSOA': value['LSOA_moran']['black'],
              'other_moran_LSOA': value['LSOA_moran']['other'], 
              } for key, value in bradford_MSOA_LSOA_moran.items()]

bradford_MSOA_LSOA_moran_2001 = pd.DataFrame(flat_dict)

bradford_MSOA_2001 = bradford_MSOA_2001.merge(bradford_MSOA_LSOA_moran_2001[['MSOACD','white_moran_LSOA','asian_moran_LSOA']], 
                                                  on= 'MSOACD', how= 'left')
# Reordering the columns
col_order = ['year', 'MSOACD', 'MSOA_simpson','white_moran_OA', 'asian_moran_OA', 'white_moran_LSOA', 'asian_moran_LSOA', 'white', 'asian', 'black', 'other',
       'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction',
       'other_fraction', 'geometry']

bradford_MSOA_2001=bradford_MSOA_2001[col_order]


## Blackburn

In [75]:
# # Creating shared border dictionary at OA level- Blackburn
# blackburn_OA_borders_2001 = {}

# for idx1, row1 in blackburn_2001.set_index('OACD').iterrows():
#     borders = []
#     for idx2, row2 in blackburn_2001.set_index('OACD').iterrows():
#         if idx1 != idx2:
#             if row1.geometry.intersects(row2.geometry):
#                 borders.append(idx2)
#     blackburn_OA_borders_2001[idx1] = borders
    
# with open('preprocessed files/2001/blackburn_OA_borders_2001.pkl', 'wb') as f:
#     pickle.dump(blackburn_OA_borders_2001, f)

In [76]:
# Creating shared border dictionary at LSOA level- Blackburn
blackburn_LSOA_borders_2001 = {}

for idx1, row1 in blackburn_LSOA_2001.set_index('LSOACD').iterrows():
    borders = []
    for idx2, row2 in blackburn_LSOA_2001.set_index('LSOACD').iterrows():
        if idx1 != idx2:
            if row1.geometry.intersects(row2.geometry):
                borders.append(idx2)
    blackburn_LSOA_borders_2001[idx1] = borders

with open('preprocessed files/2001/blackburn_LSOA_borders_2001.pkl', 'wb') as f:
    pickle.dump(blackburn_LSOA_borders_2001, f)

In [77]:
# Creating shared border dictionary at LSOA_OA level- Blackburn
blackburn_LSOA_OA_borders_2001 = {}
for lsoa in blackburn_2001['LSOACD'].unique():
    lsoa_df = blackburn_2001[blackburn_2001['LSOACD'] == lsoa]
    oa_borders = {}
    for idx1, row1 in lsoa_df.set_index('OACD').iterrows():
        borders = []
        for idx2, row2 in lsoa_df.set_index('OACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        oa_borders[idx1] = borders
    blackburn_LSOA_OA_borders_2001[lsoa] = oa_borders
with open('preprocessed files/2001/blackburn_LSOA_OA_borders_2001.pkl', 'wb') as f:
    pickle.dump(blackburn_LSOA_OA_borders_2001, f)

In [78]:
# Adding moran index to the city LSOA dataset- Blackburn
blackburn_LSOA_OA_moran = {}
for lsoa in blackburn_2001['LSOACD'].unique():
    lsoa_df = blackburn_2001[blackburn_2001['LSOACD'] == lsoa]
    OA_moran = moran(lsoa_df[['OACD','white','asian','black','other']].set_index('OACD'),blackburn_LSOA_OA_borders_2001[lsoa])
    blackburn_LSOA_OA_moran[lsoa] = {'OA_moran':OA_moran}

flat_dict = [{'LSOACD': key,
              'white_moran': value['OA_moran']['white'],
              'asian_moran': value['OA_moran']['asian'], 
              'black_moran': value['OA_moran']['black'],
              'other_moran': value['OA_moran']['other'], 
              } for key, value in blackburn_LSOA_OA_moran.items()]

blackburn_LSOA_OA_moran_2001 = pd.DataFrame(flat_dict)

blackburn_LSOA_2001 = blackburn_LSOA_2001.merge(blackburn_LSOA_OA_moran_2001[['LSOACD','white_moran','asian_moran']], 
                                                  on= 'LSOACD', how= 'left')
col_order = ['year', 'LSOACD', 'LSOA_simpson','white_moran', 'asian_moran', 'white', 'asian', 'black', 'other',
       'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction',
       'other_fraction', 'geometry']

blackburn_LSOA_2001=blackburn_LSOA_2001[col_order]


  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator

In [79]:
# Creating shared border dictionary at MSOA_OA level- Blackburn
blackburn_MSOA_OA_borders_2001 = {}
for msoa in blackburn_2001['MSOACD'].unique():
    msoa_df = blackburn_2001[blackburn_2001['MSOACD'] == msoa]
    lsoa_borders = {}
    for idx1, row1 in msoa_df.set_index('OACD').iterrows():
        borders = []
        for idx2, row2 in msoa_df.set_index('OACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        lsoa_borders[idx1] = borders
    blackburn_MSOA_OA_borders_2001[msoa] = lsoa_borders
with open('preprocessed files/2001/blackburn_MSOA_OA_borders_2001.pkl', 'wb') as f:
    pickle.dump(blackburn_MSOA_OA_borders_2001, f)

In [80]:
# Creating shared border dictionary at MSOA_LSOA level- Blackburn
blackburn_MSOA_LSOA = pd.merge(blackburn_LSOA_2001,blackburn_2001.groupby('LSOACD').agg({'MSOACD': 'first'}).reset_index(), on = 'LSOACD', how = 'left')
blackburn_MSOA_LSOA_borders_2001 = {}
for msoa in blackburn_MSOA_LSOA['MSOACD'].unique():
    msoa_df = blackburn_MSOA_LSOA[blackburn_MSOA_LSOA['MSOACD'] == msoa]
    lsoa_borders = {}
    for idx1, row1 in msoa_df.set_index('LSOACD').iterrows():
        borders = []
        for idx2, row2 in msoa_df.set_index('LSOACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        lsoa_borders[idx1] = borders
    blackburn_MSOA_LSOA_borders_2001[msoa] = lsoa_borders
with open('preprocessed files/2001/blackburn_MSOA_LSOA_borders_2001.pkl', 'wb') as f:
    pickle.dump(blackburn_MSOA_LSOA_borders_2001, f)

In [81]:
# MSOA_OA based moran- Blackburn
blackburn_MSOA_OA_moran = {}
for msoa in blackburn_2001['MSOACD'].unique():
    msoa_df = blackburn_2001[blackburn_2001['MSOACD'] == msoa]
    OA_moran = moran(msoa_df[['OACD','white','asian','black','other']].set_index('OACD'),blackburn_MSOA_OA_borders_2001[msoa])
    blackburn_MSOA_OA_moran[msoa] = {'OA_moran':OA_moran}

flat_dict = [{'MSOACD': key,
              'white_moran_OA': value['OA_moran']['white'],
              'asian_moran_OA': value['OA_moran']['asian'], 
              'black_moran_OA': value['OA_moran']['black'],
              'other_moran_OA': value['OA_moran']['other'], 
              } for key, value in blackburn_MSOA_OA_moran.items()]

blackburn_MSOA_OA_moran_2001 = pd.DataFrame(flat_dict)

blackburn_MSOA_2001 = blackburn_MSOA_2001.merge(blackburn_MSOA_OA_moran_2001[['MSOACD','white_moran_OA','asian_moran_OA']], 
                                                  on= 'MSOACD', how= 'left')
# MSOA_LSOA based moran- Blackburn
blackburn_MSOA_LSOA_moran = {}
for msoa in blackburn_2001['MSOACD'].unique():
    msoa_df = blackburn_2001[blackburn_2001['MSOACD'] == msoa]
    msoa_df = msoa_df.groupby('LSOACD')[['white','asian','black','other']].sum().reset_index().set_index('LSOACD')
    LSOA_moran = moran(msoa_df,blackburn_MSOA_LSOA_borders_2001[msoa])
    blackburn_MSOA_LSOA_moran[msoa] = {'LSOA_moran':LSOA_moran}

flat_dict = [{'MSOACD': key,
              'white_moran_LSOA': value['LSOA_moran']['white'],
              'asian_moran_LSOA': value['LSOA_moran']['asian'], 
              'black_moran_LSOA': value['LSOA_moran']['black'],
              'other_moran_LSOA': value['LSOA_moran']['other'], 
              } for key, value in blackburn_MSOA_LSOA_moran.items()]

blackburn_MSOA_LSOA_moran_2001 = pd.DataFrame(flat_dict)

blackburn_MSOA_2001 = blackburn_MSOA_2001.merge(blackburn_MSOA_LSOA_moran_2001[['MSOACD','white_moran_LSOA','asian_moran_LSOA']], 
                                                  on= 'MSOACD', how= 'left')
# Reordering the columns
col_order = ['year', 'MSOACD', 'MSOA_simpson','white_moran_OA', 'asian_moran_OA', 'white_moran_LSOA', 'asian_moran_LSOA', 'white', 'asian', 'black', 'other',
       'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction',
       'other_fraction', 'geometry']

blackburn_MSOA_2001=blackburn_MSOA_2001[col_order]


  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)


## Oldham

In [82]:
# # Creating shared border dictionary at OA level- Oldham
# oldham_OA_borders_2001 = {}

# for idx1, row1 in oldham_2001.set_index('OACD').iterrows():
#     borders = []
#     for idx2, row2 in oldham_2001.set_index('OACD').iterrows():
#         if idx1 != idx2:
#             if row1.geometry.intersects(row2.geometry):
#                 borders.append(idx2)
#     oldham_OA_borders_2001[idx1] = borders
    
# with open('preprocessed files/2001/oldham_OA_borders_2001.pkl', 'wb') as f:
#     pickle.dump(oldham_OA_borders_2001, f)

In [83]:
# Creating shared border dictionary at LSOA level- Oldham
oldham_LSOA_borders_2001 = {}

for idx1, row1 in oldham_LSOA_2001.set_index('LSOACD').iterrows():
    borders = []
    for idx2, row2 in oldham_LSOA_2001.set_index('LSOACD').iterrows():
        if idx1 != idx2:
            if row1.geometry.intersects(row2.geometry):
                borders.append(idx2)
    oldham_LSOA_borders_2001[idx1] = borders

with open('preprocessed files/2001/oldham_LSOA_borders_2001.pkl', 'wb') as f:
    pickle.dump(oldham_LSOA_borders_2001, f)

In [84]:
# Creating shared border dictionary at LSOA_OA level- Oldham
oldham_LSOA_OA_borders_2001 = {}
for lsoa in oldham_2001['LSOACD'].unique():
    lsoa_df = oldham_2001[oldham_2001['LSOACD'] == lsoa]
    oa_borders = {}
    for idx1, row1 in lsoa_df.set_index('OACD').iterrows():
        borders = []
        for idx2, row2 in lsoa_df.set_index('OACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        oa_borders[idx1] = borders
    oldham_LSOA_OA_borders_2001[lsoa] = oa_borders
with open('preprocessed files/2001/oldham_LSOA_OA_borders_2001.pkl', 'wb') as f:
    pickle.dump(oldham_LSOA_OA_borders_2001, f)

In [85]:
# Adding moran index to the city LSOA dataset- Oldham
oldham_LSOA_OA_moran = {}
for lsoa in oldham_2001['LSOACD'].unique():
    lsoa_df = oldham_2001[oldham_2001['LSOACD'] == lsoa]
    OA_moran = moran(lsoa_df[['OACD','white','asian','black','other']].set_index('OACD'),oldham_LSOA_OA_borders_2001[lsoa])
    oldham_LSOA_OA_moran[lsoa] = {'OA_moran':OA_moran}

flat_dict = [{'LSOACD': key,
              'white_moran': value['OA_moran']['white'],
              'asian_moran': value['OA_moran']['asian'], 
              'black_moran': value['OA_moran']['black'],
              'other_moran': value['OA_moran']['other'], 
              } for key, value in oldham_LSOA_OA_moran.items()]

oldham_LSOA_OA_moran_2001 = pd.DataFrame(flat_dict)

oldham_LSOA_2001 = oldham_LSOA_2001.merge(oldham_LSOA_OA_moran_2001[['LSOACD','white_moran','asian_moran']], 
                                                  on= 'LSOACD', how= 'left')
col_order = ['year', 'LSOACD', 'LSOA_simpson','white_moran', 'asian_moran', 'white', 'asian', 'black', 'other',
       'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction',
       'other_fraction', 'geometry']

oldham_LSOA_2001=oldham_LSOA_2001[col_order]

  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator

In [86]:
# Creating shared border dictionary at MSOA_OA level- Oldham
oldham_MSOA_OA_borders_2001 = {}
for msoa in oldham_2001['MSOACD'].unique():
    msoa_df = oldham_2001[oldham_2001['MSOACD'] == msoa]
    lsoa_borders = {}
    for idx1, row1 in msoa_df.set_index('OACD').iterrows():
        borders = []
        for idx2, row2 in msoa_df.set_index('OACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        lsoa_borders[idx1] = borders
    oldham_MSOA_OA_borders_2001[msoa] = lsoa_borders
with open('preprocessed files/2001/oldham_MSOA_OA_borders_2001.pkl', 'wb') as f:
    pickle.dump(oldham_MSOA_OA_borders_2001, f)

In [87]:
# Creating shared border dictionary at MSOA_LSOA level- Oldham
oldham_MSOA_LSOA = pd.merge(oldham_LSOA_2001,oldham_2001.groupby('LSOACD').agg({'MSOACD': 'first'}).reset_index(), on = 'LSOACD', how = 'left')
oldham_MSOA_LSOA_borders_2001 = {}
for msoa in oldham_MSOA_LSOA['MSOACD'].unique():
    msoa_df = oldham_MSOA_LSOA[oldham_MSOA_LSOA['MSOACD'] == msoa]
    lsoa_borders = {}
    for idx1, row1 in msoa_df.set_index('LSOACD').iterrows():
        borders = []
        for idx2, row2 in msoa_df.set_index('LSOACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        lsoa_borders[idx1] = borders
    oldham_MSOA_LSOA_borders_2001[msoa] = lsoa_borders
with open('preprocessed files/2001/oldham_MSOA_LSOA_borders_2001.pkl', 'wb') as f:
    pickle.dump(oldham_MSOA_LSOA_borders_2001, f)

In [88]:
# MSOA_OA based moran- Oldham
oldham_MSOA_OA_moran = {}
for msoa in oldham_2001['MSOACD'].unique():
    msoa_df = oldham_2001[oldham_2001['MSOACD'] == msoa]
    OA_moran = moran(msoa_df[['OACD','white','asian','black','other']].set_index('OACD'),oldham_MSOA_OA_borders_2001[msoa])
    oldham_MSOA_OA_moran[msoa] = {'OA_moran':OA_moran}

flat_dict = [{'MSOACD': key,
              'white_moran_OA': value['OA_moran']['white'],
              'asian_moran_OA': value['OA_moran']['asian'], 
              'black_moran_OA': value['OA_moran']['black'],
              'other_moran_OA': value['OA_moran']['other'], 
              } for key, value in oldham_MSOA_OA_moran.items()]

oldham_MSOA_OA_moran_2001 = pd.DataFrame(flat_dict)

oldham_MSOA_2001 = oldham_MSOA_2001.merge(oldham_MSOA_OA_moran_2001[['MSOACD','white_moran_OA','asian_moran_OA']], 
                                                  on= 'MSOACD', how= 'left')

# MSOA_LSOA based moran- Oldham
oldham_MSOA_LSOA_moran = {}
for msoa in oldham_2001['MSOACD'].unique():
    msoa_df = oldham_2001[oldham_2001['MSOACD'] == msoa]
    msoa_df = msoa_df.groupby('LSOACD')[['white','asian','black','other']].sum().reset_index().set_index('LSOACD')
    LSOA_moran = moran(msoa_df,oldham_MSOA_LSOA_borders_2001[msoa])
    oldham_MSOA_LSOA_moran[msoa] = {'LSOA_moran':LSOA_moran}

flat_dict = [{'MSOACD': key,
              'white_moran_LSOA': value['LSOA_moran']['white'],
              'asian_moran_LSOA': value['LSOA_moran']['asian'], 
              'black_moran_LSOA': value['LSOA_moran']['black'],
              'other_moran_LSOA': value['LSOA_moran']['other'], 
              } for key, value in oldham_MSOA_LSOA_moran.items()]

oldham_MSOA_LSOA_moran_2001 = pd.DataFrame(flat_dict)

oldham_MSOA_2001 = oldham_MSOA_2001.merge(oldham_MSOA_LSOA_moran_2001[['MSOACD','white_moran_LSOA','asian_moran_LSOA']], 
                                                  on= 'MSOACD', how= 'left')

# Reordering the columns
col_order = ['year', 'MSOACD', 'MSOA_simpson','white_moran_OA', 'asian_moran_OA', 'white_moran_LSOA', 'asian_moran_LSOA', 'white', 'asian', 'black', 'other',
       'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction',
       'other_fraction', 'geometry']

oldham_MSOA_2001=oldham_MSOA_2001[col_order]

## Pendle

In [89]:
# # Creating shared border dictionary at OA level- Pendle
# pendle_OA_borders_2001 = {}

# for idx1, row1 in pendle_2001.set_index('OACD').iterrows():
#     borders = []
#     for idx2, row2 in pendle_2001.set_index('OACD').iterrows():
#         if idx1 != idx2:
#             if row1.geometry.intersects(row2.geometry):
#                 borders.append(idx2)
#     pendle_OA_borders_2001[idx1] = borders
    
# with open('preprocessed files/2001/pendle_OA_borders_2001.pkl', 'wb') as f:
#     pickle.dump(pendle_OA_borders_2001, f)

In [90]:
# Creating shared border dictionary at LSOA level- Pendle
pendle_LSOA_borders_2001 = {}

for idx1, row1 in pendle_LSOA_2001.set_index('LSOACD').iterrows():
    borders = []
    for idx2, row2 in pendle_LSOA_2001.set_index('LSOACD').iterrows():
        if idx1 != idx2:
            if row1.geometry.intersects(row2.geometry):
                borders.append(idx2)
    pendle_LSOA_borders_2001[idx1] = borders

with open('preprocessed files/2001/pendle_LSOA_borders_2001.pkl', 'wb') as f:
    pickle.dump(pendle_LSOA_borders_2001, f)

In [91]:
# Creating shared border dictionary at LSOA_OA level- Pendle
pendle_LSOA_OA_borders_2001 = {}
for lsoa in pendle_2001['LSOACD'].unique():
    lsoa_df = pendle_2001[pendle_2001['LSOACD'] == lsoa]
    oa_borders = {}
    for idx1, row1 in lsoa_df.set_index('OACD').iterrows():
        borders = []
        for idx2, row2 in lsoa_df.set_index('OACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        oa_borders[idx1] = borders
    pendle_LSOA_OA_borders_2001[lsoa] = oa_borders
with open('preprocessed files/2001/pendle_LSOA_OA_borders_2001.pkl', 'wb') as f:
    pickle.dump(pendle_LSOA_OA_borders_2001, f)


In [92]:
# Adding moran index to the city LSOA dataset- Pendle
pendle_LSOA_OA_moran = {}
for lsoa in pendle_2001['LSOACD'].unique():
    lsoa_df = pendle_2001[pendle_2001['LSOACD'] == lsoa]
    OA_moran = moran(lsoa_df[['OACD','white','asian','black','other']].set_index('OACD'),pendle_LSOA_OA_borders_2001[lsoa])
    pendle_LSOA_OA_moran[lsoa] = {'OA_moran':OA_moran}

flat_dict = [{'LSOACD': key,
              'white_moran': value['OA_moran']['white'],
              'asian_moran': value['OA_moran']['asian'], 
              'black_moran': value['OA_moran']['black'],
              'other_moran': value['OA_moran']['other'], 
              } for key, value in pendle_LSOA_OA_moran.items()]

pendle_LSOA_OA_moran_2001 = pd.DataFrame(flat_dict)

pendle_LSOA_2001 = pendle_LSOA_2001.merge(pendle_LSOA_OA_moran_2001[['LSOACD','white_moran','asian_moran']], 
                                                  on= 'LSOACD', how= 'left')
col_order = ['year', 'LSOACD', 'LSOA_simpson','white_moran', 'asian_moran', 'white', 'asian', 'black', 'other',
       'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction',
       'other_fraction', 'geometry']

pendle_LSOA_2001=pendle_LSOA_2001[col_order]

  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator

In [93]:
# Creating shared border dictionary at MSOA_OA level- Pendle
pendle_MSOA_OA_borders_2001 = {}
for msoa in pendle_2001['MSOACD'].unique():
    msoa_df = pendle_2001[pendle_2001['MSOACD'] == msoa]
    lsoa_borders = {}
    for idx1, row1 in msoa_df.set_index('OACD').iterrows():
        borders = []
        for idx2, row2 in msoa_df.set_index('OACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        lsoa_borders[idx1] = borders
    pendle_MSOA_OA_borders_2001[msoa] = lsoa_borders
with open('preprocessed files/2001/pendle_MSOA_OA_borders_2001.pkl', 'wb') as f:
    pickle.dump(pendle_MSOA_OA_borders_2001, f)


In [94]:
# Creating shared border dictionary at MSOA_LSOA level- Pendle
pendle_MSOA_LSOA = pd.merge(pendle_LSOA_2001,pendle_2001.groupby('LSOACD').agg({'MSOACD': 'first'}).reset_index(), on = 'LSOACD', how = 'left')
pendle_MSOA_LSOA_borders_2001 = {}
for msoa in pendle_MSOA_LSOA['MSOACD'].unique():
    msoa_df = pendle_MSOA_LSOA[pendle_MSOA_LSOA['MSOACD'] == msoa]
    lsoa_borders = {}
    for idx1, row1 in msoa_df.set_index('LSOACD').iterrows():
        borders = []
        for idx2, row2 in msoa_df.set_index('LSOACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        lsoa_borders[idx1] = borders
    pendle_MSOA_LSOA_borders_2001[msoa] = lsoa_borders
with open('preprocessed files/2001/pendle_MSOA_LSOA_borders_2001.pkl', 'wb') as f:
    pickle.dump(pendle_MSOA_LSOA_borders_2001, f)

In [95]:
# MSOA_OA based moran- Pendle
pendle_MSOA_OA_moran = {}
for msoa in pendle_2001['MSOACD'].unique():
    msoa_df = pendle_2001[pendle_2001['MSOACD'] == msoa]
    OA_moran = moran(msoa_df[['OACD','white','asian','black','other']].set_index('OACD'),pendle_MSOA_OA_borders_2001[msoa])
    pendle_MSOA_OA_moran[msoa] = {'OA_moran':OA_moran}

flat_dict = [{'MSOACD': key,
              'white_moran_OA': value['OA_moran']['white'],
              'asian_moran_OA': value['OA_moran']['asian'], 
              'black_moran_OA': value['OA_moran']['black'],
              'other_moran_OA': value['OA_moran']['other'], 
              } for key, value in pendle_MSOA_OA_moran.items()]

pendle_MSOA_OA_moran_2001 = pd.DataFrame(flat_dict)

pendle_MSOA_2001 = pendle_MSOA_2001.merge(pendle_MSOA_OA_moran_2001[['MSOACD','white_moran_OA','asian_moran_OA']], 
                                                  on= 'MSOACD', how= 'left')

# MSOA_LSOA based moran- Pendle
pendle_MSOA_LSOA_moran = {}
for msoa in pendle_2001['MSOACD'].unique():
    msoa_df = pendle_2001[pendle_2001['MSOACD'] == msoa]
    msoa_df = msoa_df.groupby('LSOACD')[['white','asian','black','other']].sum().reset_index().set_index('LSOACD')
    LSOA_moran = moran(msoa_df,pendle_MSOA_LSOA_borders_2001[msoa])
    pendle_MSOA_LSOA_moran[msoa] = {'LSOA_moran':LSOA_moran}

flat_dict = [{'MSOACD': key,
              'white_moran_LSOA': value['LSOA_moran']['white'],
              'asian_moran_LSOA': value['LSOA_moran']['asian'], 
              'black_moran_LSOA': value['LSOA_moran']['black'],
              'other_moran_LSOA': value['LSOA_moran']['other'], 
              } for key, value in pendle_MSOA_LSOA_moran.items()]

pendle_MSOA_LSOA_moran_2001 = pd.DataFrame(flat_dict)

pendle_MSOA_2001 = pendle_MSOA_2001.merge(pendle_MSOA_LSOA_moran_2001[['MSOACD','white_moran_LSOA','asian_moran_LSOA']], 
                                                  on= 'MSOACD', how= 'left')

# Reordering the columns
col_order = ['year', 'MSOACD', 'MSOA_simpson','white_moran_OA', 'asian_moran_OA', 'white_moran_LSOA', 'asian_moran_LSOA', 'white', 'asian', 'black', 'other',
       'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction',
       'other_fraction', 'geometry']

pendle_MSOA_2001=pendle_MSOA_2001[col_order]

  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)


## London

In [96]:
# # Creating shared border dictionary at OA level- London
# london_OA_borders_2001 = {}

# for idx1, row1 in london_2001.set_index('OACD').iterrows():
#     borders = []
#     for idx2, row2 in london_2001.set_index('OACD').iterrows():
#         if idx1 != idx2:
#             if row1.geometry.intersects(row2.geometry):
#                 borders.append(idx2)
#     london_OA_borders_2001[idx1] = borders
    
# with open('preprocessed files/2001/london_OA_borders_2001.pkl', 'wb') as f:
#     pickle.dump(london_OA_borders_2001, f)


In [97]:
# Creating shared border dictionary at LSOA level- London
london_LSOA_borders_2001 = {}

for idx1, row1 in london_LSOA_2001.set_index('LSOACD').iterrows():
    borders = []
    for idx2, row2 in london_LSOA_2001.set_index('LSOACD').iterrows():
        if idx1 != idx2:
            if row1.geometry.intersects(row2.geometry):
                borders.append(idx2)
    london_LSOA_borders_2001[idx1] = borders

with open('preprocessed files/2001/london_LSOA_borders_2001.pkl', 'wb') as f:
    pickle.dump(london_LSOA_borders_2001, f)

In [98]:
# Creating shared border dictionary at LSOA_OA level- London
london_LSOA_OA_borders_2001 = {}
for lsoa in london_2001['LSOACD'].unique():
    lsoa_df = london_2001[london_2001['LSOACD'] == lsoa]
    oa_borders = {}
    for idx1, row1 in lsoa_df.set_index('OACD').iterrows():
        borders = []
        for idx2, row2 in lsoa_df.set_index('OACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        oa_borders[idx1] = borders
    london_LSOA_OA_borders_2001[lsoa] = oa_borders
with open('preprocessed files/2001/london_LSOA_OA_borders_2001.pkl', 'wb') as f:
    pickle.dump(london_LSOA_OA_borders_2001, f)

In [99]:
# Adding moran index to the city LSOA dataset- London
london_LSOA_OA_moran = {}
for lsoa in london_2001['LSOACD'].unique():
    lsoa_df = london_2001[london_2001['LSOACD'] == lsoa]
    OA_moran = moran(lsoa_df[['OACD','white','asian','black','other']].set_index('OACD'),london_LSOA_OA_borders_2001[lsoa])
    london_LSOA_OA_moran[lsoa] = {'OA_moran':OA_moran}

flat_dict = [{'LSOACD': key,
              'white_moran': value['OA_moran']['white'],
              'asian_moran': value['OA_moran']['asian'], 
              'black_moran': value['OA_moran']['black'],
              'other_moran': value['OA_moran']['other'], 
              } for key, value in london_LSOA_OA_moran.items()]

london_LSOA_OA_moran_2001 = pd.DataFrame(flat_dict)

london_LSOA_2001 = london_LSOA_2001.merge(london_LSOA_OA_moran_2001[['LSOACD','white_moran','asian_moran']], 
                                                  on= 'LSOACD', how= 'left')
col_order = ['year', 'LSOACD', 'LSOA_simpson','white_moran', 'asian_moran', 'white', 'asian', 'black', 'other',
       'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction',
       'other_fraction', 'geometry']

london_LSOA_2001=london_LSOA_2001[col_order]

  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator

In [59]:
# Creating shared border dictionary at MSOA level- London
london_MSOA_borders_2001 = {}

for idx1, row1 in london_MSOA_2001.set_index('MSOACD').iterrows():
    borders = []
    for idx2, row2 in london_MSOA_2001.set_index('MSOACD').iterrows():
        if idx1 != idx2:
            if row1.geometry.intersects(row2.geometry):
                borders.append(idx2)
    london_MSOA_borders_2001[idx1] = borders

with open('preprocessed files/2001/london_MSOA_borders_2001.pkl', 'wb') as f:
    pickle.dump(london_MSOA_borders_2001, f)

In [100]:
# Creating shared border dictionary at MSOA_OA level- London
london_MSOA_OA_borders_2001 = {}
for msoa in london_2001['MSOACD'].unique():
    msoa_df = london_2001[london_2001['MSOACD'] == msoa]
    lsoa_borders = {}
    for idx1, row1 in msoa_df.set_index('OACD').iterrows():
        borders = []
        for idx2, row2 in msoa_df.set_index('OACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        lsoa_borders[idx1] = borders
    london_MSOA_OA_borders_2001[msoa] = lsoa_borders
with open('preprocessed files/2001/london_MSOA_OA_borders_2001.pkl', 'wb') as f:
    pickle.dump(london_MSOA_OA_borders_2001, f)

In [101]:
# Creating shared border dictionary at MSOA_LSOA level- London
london_MSOA_LSOA = pd.merge(london_LSOA_2001,london_2001.groupby('LSOACD').agg({'MSOACD': 'first'}).reset_index(), on = 'LSOACD', how = 'left')
london_MSOA_LSOA_borders_2001 = {}
for msoa in london_MSOA_LSOA['MSOACD'].unique():
    msoa_df = london_MSOA_LSOA[london_MSOA_LSOA['MSOACD'] == msoa]
    lsoa_borders = {}
    for idx1, row1 in msoa_df.set_index('LSOACD').iterrows():
        borders = []
        for idx2, row2 in msoa_df.set_index('LSOACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        lsoa_borders[idx1] = borders
    london_MSOA_LSOA_borders_2001[msoa] = lsoa_borders
with open('preprocessed files/2001/london_MSOA_LSOA_borders_2001.pkl', 'wb') as f:
    pickle.dump(london_MSOA_LSOA_borders_2001, f)

In [102]:
# MSOA_OA based moran- London
london_MSOA_OA_moran = {}
for msoa in london_2001['MSOACD'].unique():
    msoa_df = london_2001[london_2001['MSOACD'] == msoa]
    OA_moran = moran(msoa_df[['OACD','white','asian','black','other']].set_index('OACD'),london_MSOA_OA_borders_2001[msoa])
    london_MSOA_OA_moran[msoa] = {'OA_moran':OA_moran}

flat_dict = [{'MSOACD': key,
              'white_moran_OA': value['OA_moran']['white'],
              'asian_moran_OA': value['OA_moran']['asian'], 
              'black_moran_OA': value['OA_moran']['black'],
              'other_moran_OA': value['OA_moran']['other'], 
              } for key, value in london_MSOA_OA_moran.items()]

london_MSOA_OA_moran_2001 = pd.DataFrame(flat_dict)

london_MSOA_2001 = london_MSOA_2001.merge(london_MSOA_OA_moran_2001[['MSOACD','white_moran_OA','asian_moran_OA']], 
                                                  on= 'MSOACD', how= 'left')

# MSOA_LSOA based moran- London
london_MSOA_LSOA_moran = {}
for msoa in london_2001['MSOACD'].unique():
    msoa_df = london_2001[london_2001['MSOACD'] == msoa]
    msoa_df = msoa_df.groupby('LSOACD')[['white','asian','black','other']].sum().reset_index().set_index('LSOACD')
    LSOA_moran = moran(msoa_df,london_MSOA_LSOA_borders_2001[msoa])
    london_MSOA_LSOA_moran[msoa] = {'LSOA_moran':LSOA_moran}

flat_dict = [{'MSOACD': key,
              'white_moran_LSOA': value['LSOA_moran']['white'],
              'asian_moran_LSOA': value['LSOA_moran']['asian'], 
              'black_moran_LSOA': value['LSOA_moran']['black'],
              'other_moran_LSOA': value['LSOA_moran']['other'], 
              } for key, value in london_MSOA_LSOA_moran.items()]

london_MSOA_LSOA_moran_2001 = pd.DataFrame(flat_dict)

london_MSOA_2001 = london_MSOA_2001.merge(london_MSOA_LSOA_moran_2001[['MSOACD','white_moran_LSOA','asian_moran_LSOA']], 
                                                  on= 'MSOACD', how= 'left')

# Reordering the columns
col_order = ['year', 'MSOACD', 'MSOA_simpson','white_moran_OA', 'asian_moran_OA', 'white_moran_LSOA', 'asian_moran_LSOA', 'white', 'asian', 'black', 'other',
       'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction',
       'other_fraction', 'geometry']

london_MSOA_2001=london_MSOA_2001[col_order]

In [116]:
# Creating simpson dissimilarity index dataset for year 2001.
# Calculating OA, LSOA, MSOA and LAD level simpson index inside London
OA_simp_london_2001 = simpson(london_2001[['white','asian','black','other']])
LSOA_simp_london_2001 = simpson(london_2001.groupby(['LSOACD'])[['white','asian','black','other']].sum().reset_index()[['white','asian','black','other']])
MSOA_simp_london_2001 = simpson(london_2001.groupby(['MSOACD'])[['white','asian','black','other']].sum().reset_index()[['white','asian','black','other']])
LAD_simp_london_2001 = simpson(london_2001.groupby(['LADCD'])[['white','asian','black','other']].sum().reset_index()[['white','asian','black','other']])

country_dic_sim_2001 = {'OA_simpson':OA_simp_london_2001[0], 'LSOA_simpson':LSOA_simp_london_2001[0],
                        'MSOA_simpson':MSOA_simp_london_2001[0], 'LAD_simpson':LAD_simp_london_2001[0], 'london':LAD_simp_london_2001[1]}

flat_dict = [{'year': 2001, 'total_population': london_2001['total_pop'].sum(),
              'white': london_2001['white'].sum(),
              'asian': london_2001['asian'].sum(),
              'black': london_2001['black'].sum(),
              'other': london_2001['other'].sum(),              
              'white_frac': round(london_2001['white'].sum()/london_2001['total_pop'].sum(),3),
              'asian_frac': round(london_2001['asian'].sum()/london_2001['total_pop'].sum(),3),
              'black_frac': round(london_2001['black'].sum()/london_2001['total_pop'].sum(),3),
              'other_frac': round(london_2001['other'].sum()/london_2001['total_pop'].sum(),3),
              'OA_simpson':OA_simp_london_2001[0], 'LSOA_simpson':LSOA_simp_london_2001[0],'MSOA_simpson':MSOA_simp_london_2001[0], 
              'LAD_simpson':LAD_simp_london_2001[0], 'London_simpson':LAD_simp_london_2001[1]}]

london_simpson_2001 = pd.DataFrame(flat_dict)
london_simpson_2001

Unnamed: 0,year,total_population,white,asian,black,other,white_frac,asian_frac,black_frac,other_frac,OA_simpson,LSOA_simpson,MSOA_simpson,LAD_simpson,London_simpson
0,2001,7171998,5103176,1059689,782851,226282,0.712,0.148,0.109,0.032,0.618,0.609,0.602,0.574,0.54


In [120]:
# Creating London dissimilarity index dataset for year 2001.
# Calculating OA, LSOA, MSOA and LAD level dissimilarity index inside London

OA_diss_london_2001 = dissimilarity(london_2001[['white','asian','black','other']])
LSOA_diss_london_2001 = dissimilarity(london_2001.groupby(['LSOACD'])[['white','asian','black','other']].sum().reset_index()[['white','asian','black','other']])
MSOA_diss_london_2001 = dissimilarity(london_2001.groupby(['MSOACD'])[['white','asian','black','other']].sum().reset_index()[['white','asian','black','other']])
LAD_diss_london_2001 = dissimilarity(london_2001.groupby(['LADCD'])[['white','asian','black','other']].sum().reset_index()[['white','asian','black','other']])
london_dic_diss_2001 = {'OA_level':OA_diss_london_2001, 'LSOA_level':LSOA_diss_london_2001,
                        'MSOA_level':MSOA_diss_london_2001, 'LAD_level':LAD_diss_london_2001}

flat_dict = [{'year': 2001,
              'OA_white_diss': london_dic_diss_2001['OA_level']['white'], 'LSOA_white_diss': london_dic_diss_2001['LSOA_level']['white'],
              'MSOA_white_diss': london_dic_diss_2001['MSOA_level']['white'], 'LAD_white_diss': london_dic_diss_2001['LAD_level']['white'],
              'OA_asian_diss': london_dic_diss_2001['OA_level']['asian'], 'LSOA_asian_diss': london_dic_diss_2001['LSOA_level']['asian'],
              'MSOA_asian_diss': london_dic_diss_2001['MSOA_level']['asian'], 'LAD_asian_diss': london_dic_diss_2001['LAD_level']['asian'],
              'OA_black_diss': london_dic_diss_2001['OA_level']['black'], 'LSOA_black_diss': london_dic_diss_2001['LSOA_level']['black'],
              'MSOA_black_diss': london_dic_diss_2001['MSOA_level']['black'], 'LAD_black_diss': london_dic_diss_2001['LAD_level']['black'],
              'OA_other_diss': london_dic_diss_2001['OA_level']['other'], 'LSOA_other_diss': london_dic_diss_2001['LSOA_level']['other'],
              'MSOA_other_diss': london_dic_diss_2001['MSOA_level']['other'], 'LAD_other_diss': london_dic_diss_2001['LAD_level']['other']}]


london_dissimilarity_2001 = pd.DataFrame(flat_dict)
london_dissimilarity_2001

Unnamed: 0,year,OA_white_diss,LSOA_white_diss,MSOA_white_diss,LAD_white_diss,OA_asian_diss,LSOA_asian_diss,MSOA_asian_diss,LAD_asian_diss,OA_black_diss,LSOA_black_diss,MSOA_black_diss,LAD_black_diss,OA_other_diss,LSOA_other_diss,MSOA_other_diss,LAD_other_diss
0,2001,0.407,0.382,0.363,0.264,0.436,0.409,0.394,0.32,0.455,0.427,0.408,0.322,0.271,0.192,0.165,0.126


In [61]:
with open('preprocessed files/2001/london_OA_borders_2001.pkl', 'rb') as f:
    london_OA_borders_2001 = pickle.load(f)
with open('preprocessed files/2001/london_LSOA_borders_2001.pkl', 'rb') as f:
    london_LSOA_borders_2001 = pickle.load(f)
with open('preprocessed files/2001/london_MSOA_borders_2001.pkl', 'rb') as f:
    london_MSOA_borders_2001 = pickle.load(f)

In [63]:
dic_mor_london_2001= {}

OA_mor_london_2001 = moran(london_2001[['OACD','white','asian','black','other']].set_index('OACD'),london_OA_borders_2001)
LSOA_mor_london_2001 = moran(london_2001.groupby(['LSOACD'])[['white','asian','black','other']].sum()[['white','asian','black','other']],london_LSOA_borders_2001)
MSOA_mor_london_2001 = moran(london_2001.groupby(['MSOACD'])[['white','asian','black','other']].sum()[['white','asian','black','other']],london_MSOA_borders_2001)
dic_mor_london_2001 = {'OA':OA_mor_london_2001, 'LSOA':LSOA_mor_london_2001, 'MSOA':MSOA_mor_london_2001}

flat_dict = {}
for level, values in dic_mor_london_2001.items():
    for ethnicity, value in values.items():
        flat_dict[f"{level}_{ethnicity}_mor"] = value

london_moran_2001 = pd.DataFrame([flat_dict])
london_moran_2001['year']= 2001
london_moran_2001

In [131]:
london_indexes_2001 = pd.merge(london_simpson_2001,london_dissimilarity_2001, how='left')
london_indexes_2001 = london_indexes_2001.merge(london_moran_2001, how='left')
london_indexes_2001

Unnamed: 0,year,total_population,white,asian,black,other,white_frac,asian_frac,black_frac,other_frac,...,OA_black_mor,OA_other_mor,LSOA_white_mor,LSOA_asian_mor,LSOA_black_mor,LSOA_other_mor,MSOA_white_mor,MSOA_asian_mor,MSOA_black_mor,MSOA_other_mor
0,2001,7171998,5103176,1059689,782851,226282,0.712,0.148,0.109,0.032,...,0.759,0.318,0.847,0.856,0.803,0.599,0.804,0.804,0.793,0.728


In [132]:
london_indexes_2001['LADCD'] = 'E00000000'
london_indexes_2001['LADNM'] = 'London'
col_order = ['year', 'LADCD', 'LADNM', 'white', 'asian', 'black', 'other', 'total_population', 
             'white_frac', 'asian_frac', 'black_frac', 'other_frac', 
             'OA_simpson', 'LSOA_simpson', 'MSOA_simpson', 'LAD_simpson', 'London_simpson',
             'OA_white_diss', 'LSOA_white_diss', 'MSOA_white_diss',
             'OA_asian_diss', 'LSOA_asian_diss', 'MSOA_asian_diss', 
             'OA_black_diss', 'LSOA_black_diss', 'MSOA_black_diss', 
             'OA_other_diss', 'LSOA_other_diss', 'MSOA_other_diss',
             'OA_white_mor', 'LSOA_white_mor','MSOA_white_mor', 
             'OA_asian_mor', 'LSOA_asian_mor', 'MSOA_asian_mor',
             'OA_black_mor', 'LSOA_black_mor', 'MSOA_black_mor', 
             'OA_other_mor', 'LSOA_other_mor', 'MSOA_other_mor']
london_indexes_2001 = london_indexes_2001[col_order]

In [103]:
birmingham_LSOA_2001.to_csv('preprocessed files/2001/birmingham_LSOA_2001.csv', index= False)
leicester_LSOA_2001.to_csv('preprocessed files/2001/leicester_LSOA_2001.csv', index= False)
bradford_LSOA_2001.to_csv('preprocessed files/2001/bradford_LSOA_2001.csv', index= False)
blackburn_LSOA_2001.to_csv('preprocessed files/2001/blackburn_LSOA_2001.csv', index= False)
oldham_LSOA_2001.to_csv('preprocessed files/2001/oldham_LSOA_2001.csv', index= False)
pendle_LSOA_2001.to_csv('preprocessed files/2001/pendle_LSOA_2001.csv', index= False)
london_LSOA_2001.to_csv('preprocessed files/2001/london_LSOA_2001.csv', index= False)

In [104]:
birmingham_MSOA_2001.to_csv('preprocessed files/2001/birmingham_MSOA_2001.csv', index= False)
leicester_MSOA_2001.to_csv('preprocessed files/2001/leicester_MSOA_2001.csv', index= False)
bradford_MSOA_2001.to_csv('preprocessed files/2001/bradford_MSOA_2001.csv', index= False)
blackburn_MSOA_2001.to_csv('preprocessed files/2001/blackburn_MSOA_2001.csv', index= False)
oldham_MSOA_2001.to_csv('preprocessed files/2001/oldham_MSOA_2001.csv', index= False)
pendle_MSOA_2001.to_csv('preprocessed files/2001/pendle_MSOA_2001.csv', index= False)
london_MSOA_2001.to_csv('preprocessed files/2001/london_MSOA_2001.csv', index= False)
london_LAD_2001.to_csv('preprocessed files/2001/london_LAD_2001.csv', index= False)
london_indexes_2001.to_csv('preprocessed files/2001/london_indexes_2001.csv', index= False)