In [1]:
import pandas as pd
import numpy as np
import pickle

import re

import warnings

import csv

import dash
from dash import dcc, html
from dash.dependencies import Input, Output

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from plotly.subplots import make_subplots 

import geopandas as gpd
import shapely.geometry as sg
from shapely.geometry import MultiPolygon, Polygon


import folium
from folium.plugins import DualMap, HeatMap

import plotly.express as px
import plotly.graph_objs as go

from dbfread import DBF

# Defining functions

In [2]:
# defining dissimilarity function
def dissimilarity(df):
    dissimilarity_results = {}
    for col in df.columns:
        col_numerator = []
        for i in range(df.shape[0]):
            col_numerator.append((df.iloc[i,:].sum() / df.sum().sum()) *
                                 np.abs((df.loc[i, col] / df.iloc[i,:].sum() - (df[col].sum() / df.sum().sum()))))
        col_numerator = sum(col_numerator)
        col_denominator = 2 * (df[col].sum() / df.sum().sum()) * (1 - (df[col].sum() / df.sum().sum()))
        dissimilarity_results[col] = round(col_numerator / col_denominator, 3)

        # print(f'dissimilarity {col} = {col_numerator/col_denominator}')
    return dissimilarity_results              
        

In [3]:
#| include: false

# defining simpson function

def simpson(df):
    simpson_series = []
    for i in range(df.shape[0]):
        area_ethnic_fraction_2 = []
        for col in df.columns:
            if col != 'other':
                area_ethnic_fraction_2.append((df.loc[i,col]/(df.iloc[i,:].sum()))**2)
        sum_area_ethnic_fraction_2 = sum(area_ethnic_fraction_2)
        simpson_series.append(sum_area_ethnic_fraction_2)
        
    simpson_series = pd.Series(simpson_series)
    simpson_index = 0
    for i in range(df.shape[0]):
        simpson_index += (simpson_series.iloc[i] * df.iloc[i, :].sum() / df.sum().sum())
    
    area_simpson = []
    for col in df.columns:
        if col != 'other':
            area_simpson.append((df[col].sum()/df.sum().sum())**2)
    area_simpson_index = sum(area_simpson)
    simpson_index = round(simpson_index, 3)
    area_simpson_index = round(area_simpson_index, 3)
    return [simpson_index, area_simpson_index]

In [4]:
# defining moran I function

def moran(df,border):
    positive_weights = []
    for key in border.keys():
        positive_weights.append(len(border[key]))
       
    fraction = {}
    for col in df.columns:
        # df[col] = pd.to_numeric(df[col], errors='coerce')
        fraction[col] = []
        for i in df.index:
            fraction[col].append(df.loc[i, col] / df.loc[i, :].sum())
    
    col_moran = {} 
    for col in df.columns:
        col_moran_list = []
        for i in df.index:
            morani = []
            for common in border[i]:    
                morani.append(((df.loc[i, col] / df.loc[i, :].sum()) - np.mean(fraction[col])) * ((df.loc[common, col] / df.loc[common, :].sum()) - np.mean(fraction[col])))
            row_moran = sum(morani)
            col_moran_list.append(row_moran)
        col_moran[col] = col_moran_list
    moran_results= {}
    for col in df.columns:
        moran_numerator = sum(col_moran[col])*df.shape[0]
        moran_denominator = sum((fraction[col] - np.mean(fraction[col]))**2)*sum(positive_weights)
        # moran_index = moran_numerator/moran_denominator
        moran_results[col] = round(moran_numerator/moran_denominator, 3)
    return moran_results


# Preprocessing

In [5]:
data_2011 = pd.read_csv('Census2011\\census2011.csv', encoding= 'latin1', delimiter= ',', skiprows= 7, skipfooter= 8, engine= 'python')

In [6]:
data_2011.head()

Unnamed: 0,2011 output area,White: English/Welsh/Scottish/Northern Irish/British,White: Irish,White: Gypsy or Irish Traveller,White: Other White,Mixed/multiple ethnic groups: White and Black Caribbean,Mixed/multiple ethnic groups: White and Black African,Mixed/multiple ethnic groups: White and Asian,Mixed/multiple ethnic groups: Other Mixed,Asian/Asian British: Indian,Asian/Asian British: Pakistani,Asian/Asian British: Bangladeshi,Asian/Asian British: Chinese,Asian/Asian British: Other Asian,Black/African/Caribbean/Black British: African,Black/African/Caribbean/Black British: Caribbean,Black/African/Caribbean/Black British: Other Black,Other ethnic group: Arab,Other ethnic group: Any other ethnic group
0,E00000001,150,7,0,18,3,0,4,3,2,0,0,4,0,0,0,0,0,3
1,E00000003,177,2,1,26,0,1,7,1,17,0,3,3,3,3,0,0,0,6
2,E00000005,254,14,0,53,0,2,5,5,9,1,0,10,5,2,0,2,0,5
3,E00000007,55,0,0,40,0,0,2,0,4,2,0,10,4,0,0,0,0,6
4,E00000010,62,3,0,15,0,0,1,2,2,1,3,5,1,0,4,1,2,0


In [7]:
data_2011.shape

(181408, 19)

In [8]:
melt_2011 = data_2011.melt(id_vars=['2011 output area'], value_vars= data_2011.columns[1:], 
                           var_name='Ethnic group', value_name='Observation')
melt_2011.head()

Unnamed: 0,2011 output area,Ethnic group,Observation
0,E00000001,White: English/Welsh/Scottish/Northern Irish/B...,150
1,E00000003,White: English/Welsh/Scottish/Northern Irish/B...,177
2,E00000005,White: English/Welsh/Scottish/Northern Irish/B...,254
3,E00000007,White: English/Welsh/Scottish/Northern Irish/B...,55
4,E00000010,White: English/Welsh/Scottish/Northern Irish/B...,62


In [9]:
melt_2011['Ethnic group'].unique()

array(['White: English/Welsh/Scottish/Northern Irish/British',
       'White: Irish', 'White: Gypsy or Irish Traveller',
       'White: Other White',
       'Mixed/multiple ethnic groups: White and Black Caribbean',
       'Mixed/multiple ethnic groups: White and Black African',
       'Mixed/multiple ethnic groups: White and Asian',
       'Mixed/multiple ethnic groups: Other Mixed',
       'Asian/Asian British: Indian', 'Asian/Asian British: Pakistani',
       'Asian/Asian British: Bangladeshi', 'Asian/Asian British: Chinese',
       'Asian/Asian British: Other Asian',
       'Black/African/Caribbean/Black British: African',
       'Black/African/Caribbean/Black British: Caribbean',
       'Black/African/Caribbean/Black British: Other Black',
       'Other ethnic group: Arab',
       'Other ethnic group: Any other ethnic group'], dtype=object)

In [10]:
melt_2011['ethnicity'] = melt_2011['Ethnic group'].apply( lambda x: x.split(':')[0])
melt_2011['sub_ethnicity'] = melt_2011['Ethnic group'].apply( lambda x: x.split(':')[1])
melt_2011.drop('Ethnic group', axis= 1, inplace= True)
melt_2011.head()

Unnamed: 0,2011 output area,Observation,ethnicity,sub_ethnicity
0,E00000001,150,White,English/Welsh/Scottish/Northern Irish/British
1,E00000003,177,White,English/Welsh/Scottish/Northern Irish/British
2,E00000005,254,White,English/Welsh/Scottish/Northern Irish/British
3,E00000007,55,White,English/Welsh/Scottish/Northern Irish/British
4,E00000010,62,White,English/Welsh/Scottish/Northern Irish/British


In [11]:
melt_2011['Observation'].sum()

56075912

In [12]:
melt_2011['ethnicity'].unique()

array(['White', 'Mixed/multiple ethnic groups', 'Asian/Asian British',
       'Black/African/Caribbean/Black British', 'Other ethnic group'],
      dtype=object)

In [13]:
melt_2011['ethnicity'] = melt_2011['ethnicity'].apply(lambda x: 'white' if 'White' in x else
                                                                'asian' if 'Asian' in x else 
                                                                'black' if 'Black' in x else 'other')

In [14]:
melt_2011['sub_ethnicity'] = melt_2011['sub_ethnicity'].apply(lambda x: 'British' if '/British' in x else
                                                                        'Irish' if 'Irish' in x else x)

In [15]:
melt_2011['sub_ethnicity']= melt_2011['sub_ethnicity'].apply(str.lstrip)

In [16]:
melt_2011.head()

Unnamed: 0,2011 output area,Observation,ethnicity,sub_ethnicity
0,E00000001,150,white,British
1,E00000003,177,white,British
2,E00000005,254,white,British
3,E00000007,55,white,British
4,E00000010,62,white,British


In [17]:
lookup_2011= pd.read_csv('Census2011\\lookup2011.csv', low_memory= False)
lookup_2011.head()

Unnamed: 0,OA11CD,LSOA11CD,LSOA11NM,MSOA11CD,MSOA11NM,LAD11CD,LAD11NM,LAD11NMW,ObjectId
0,E00000001,E01000001,City of London 001A,E02000001,City of London 001,E09000001,City of London,,1
1,E00000003,E01000001,City of London 001A,E02000001,City of London 001,E09000001,City of London,,2
2,E00000005,E01000001,City of London 001A,E02000001,City of London 001,E09000001,City of London,,3
3,E00000007,E01000001,City of London 001A,E02000001,City of London 001,E09000001,City of London,,4
4,E00000010,E01000003,City of London 001C,E02000001,City of London 001,E09000001,City of London,,5


In [18]:
merged_2011 = pd.merge(melt_2011, lookup_2011, left_on= '2011 output area', right_on= 'OA11CD', how= 'left')
merged_2011.head(3)

Unnamed: 0,2011 output area,Observation,ethnicity,sub_ethnicity,OA11CD,LSOA11CD,LSOA11NM,MSOA11CD,MSOA11NM,LAD11CD,LAD11NM,LAD11NMW,ObjectId
0,E00000001,150,white,British,E00000001,E01000001,City of London 001A,E02000001,City of London 001,E09000001,City of London,,1
1,E00000003,177,white,British,E00000003,E01000001,City of London 001A,E02000001,City of London 001,E09000001,City of London,,2
2,E00000005,254,white,British,E00000005,E01000001,City of London 001A,E02000001,City of London 001,E09000001,City of London,,3


In [19]:
column_order = ['OA11CD', 'LSOA11CD', 'MSOA11CD', 'LAD11CD', 'LSOA11NM',  
                'MSOA11NM', 'LAD11NM', 'ethnicity', 'sub_ethnicity', 'Observation']
merged_2011 = merged_2011[column_order]
# merged_2011.rename(columns={'2011 output area':'OA11CD'}, inplace= True)
merged_2011.head()

Unnamed: 0,OA11CD,LSOA11CD,MSOA11CD,LAD11CD,LSOA11NM,MSOA11NM,LAD11NM,ethnicity,sub_ethnicity,Observation
0,E00000001,E01000001,E02000001,E09000001,City of London 001A,City of London 001,City of London,white,British,150
1,E00000003,E01000001,E02000001,E09000001,City of London 001A,City of London 001,City of London,white,British,177
2,E00000005,E01000001,E02000001,E09000001,City of London 001A,City of London 001,City of London,white,British,254
3,E00000007,E01000001,E02000001,E09000001,City of London 001A,City of London 001,City of London,white,British,55
4,E00000010,E01000003,E02000001,E09000001,City of London 001C,City of London 001,City of London,white,British,62


In [20]:
merged_2011['sub_ethnicity'].unique()

array(['British', 'Irish', 'Other White', 'White and Black Caribbean',
       'White and Black African', 'White and Asian', 'Other Mixed',
       'Indian', 'Pakistani', 'Bangladeshi', 'Chinese', 'Other Asian',
       'African', 'Caribbean', 'Other Black', 'Arab',
       'Any other ethnic group'], dtype=object)

In [21]:
len(merged_2011['OA11CD'].unique())

181408

In [22]:
len(merged_2011['LSOA11CD'].unique())

34753

In [23]:
len(merged_2011['MSOA11CD'].unique())

7201

In [24]:
len(merged_2011['LAD11CD'].unique())

348

# Ethnicity Dataset

In [25]:
ethnicity_2011 = merged_2011.groupby(['OA11CD','LSOA11CD', 'MSOA11CD', 'LAD11CD', 'LSOA11NM', 'MSOA11NM', 'LAD11NM',
                                      'ethnicity'])['Observation'].sum().reset_index()
ethnicity_2011.head(3)

Unnamed: 0,OA11CD,LSOA11CD,MSOA11CD,LAD11CD,LSOA11NM,MSOA11NM,LAD11NM,ethnicity,Observation
0,E00000001,E01000001,E02000001,E09000001,City of London 001A,City of London 001,City of London,asian,6
1,E00000001,E01000001,E02000001,E09000001,City of London 001A,City of London 001,City of London,black,0
2,E00000001,E01000001,E02000001,E09000001,City of London 001A,City of London 001,City of London,other,13


In [26]:
ethnicity_2011 = ethnicity_2011.pivot(index = ['OA11CD','LSOA11CD', 'MSOA11CD', 'LAD11CD', 'LSOA11NM', 'MSOA11NM', 'LAD11NM'], 
                                      columns = 'ethnicity', values = 'Observation').reset_index().rename_axis(None, axis=1)

ethnicity_2011['total_pop']= ethnicity_2011['white']+ethnicity_2011['asian']+ethnicity_2011['black']+ethnicity_2011['other']

ethnicity_2011.head(3)

Unnamed: 0,OA11CD,LSOA11CD,MSOA11CD,LAD11CD,LSOA11NM,MSOA11NM,LAD11NM,asian,black,other,white,total_pop
0,E00000001,E01000001,E02000001,E09000001,City of London 001A,City of London 001,City of London,6,0,13,175,194
1,E00000003,E01000001,E02000001,E09000001,City of London 001A,City of London 001,City of London,26,3,15,206,250
2,E00000005,E01000001,E02000001,E09000001,City of London 001A,City of London 001,City of London,25,4,17,321,367


In [27]:
ethnicity_2011.shape

(181408, 12)

In [28]:
ethnicity_2011[['asian', 'black', 'other','white']].sum(axis= 0).sum()

56075912

In [29]:
ethnicity_2011.to_csv('preprocessed files/2011/ethnicity_2011.csv', index= False)

# Sub-Ethnicity Dataset

In [None]:
sub_ethnicity_2011 = merged_2011.groupby(['OA11CD','LSOA11CD', 'MSOA11CD', 'LAD11CD', 'LSOA11NM', 'MSOA11NM', 'LAD11NM',
                                          'sub_ethnicity'])['Observation'].sum().reset_index()
sub_ethnicity_2011.head(3)

In [None]:
sub_ethnicity_2011 = sub_ethnicity_2011.pivot(index = ['OA11CD','LSOA11CD', 'MSOA11CD', 'LAD11CD', 'LSOA11NM', 'MSOA11NM', 'LAD11NM'],
                                              columns = 'sub_ethnicity', values = 'Observation').reset_index().rename_axis(None, axis=1)
sub_ethnicity_2011.head(3)

In [None]:
sub_ethnicity_2011.to_csv('preprocessed files/2011/sub_ethnicity_2011.csv', index= False)

# Shape Files

In [None]:
shape_2011 = gpd.read_file('Census2011\\shape_2011\\OA_2011_EW_BGC_V2.shp')
# shape_2011.info()

In [None]:
shape_2011 = shape_2011.rename(columns={'geometry': 'OA_geometry'})
shape_2011.head(3)

In [None]:
shape_2011 = shape_2011.merge(ethnicity_2011[['OA11CD', 'LSOA11CD', 'MSOA11CD', 'LAD11CD']], on = 'OA11CD', how= 'left')
shape_column_order = ['OA11CD', 'LSOA11CD', 'MSOA11CD', 'LAD11CD_x', 'GlobalID', 'OA_geometry']
shape_2011 = shape_2011[shape_column_order]
shape_2011.rename(columns= {'LAD11CD_x': 'LAD11CD'}, inplace= True)
shape_2011 = gpd.GeoDataFrame(shape_2011, geometry='OA_geometry')
shape_2011.head(3)

In [None]:
# Saving OA_2011 into a shape file
shape_2011.to_file('preprocessed files/2011/OA_2011.shp')

In [None]:
myshape = gpd.read_file('preprocessed files/2011/OA_2011.shp').set_geometry('geometry')
myshape.plot(alpha=0.5, edgecolor='k')

In [None]:
# creating new culumns in shape file for LSOA geometry
lsoa_geom_2011 = shape_2011.groupby('LSOA11CD')['OA_geometry'].agg(lambda x: x.unary_union)
shape_2011['LSOA_geometry'] = shape_2011['LSOA11CD'].map(lsoa_geom_2011)

In [None]:
# Saving LSOA_2011 into a shape file
LSOA_2011 = shape_2011.groupby('LSOA11CD').agg({'LSOA_geometry': 'first'}).reset_index()
LSOA_2011 = LSOA_2011.merge(ethnicity_2011.groupby('LSOA11CD')[['asian', 'black', 'other', 'white']].sum().reset_index(), on='LSOA11CD' ,how= 'left')

LSOA_2011['total'] = LSOA_2011['white'] + LSOA_2011['asian']+ LSOA_2011['black']+ LSOA_2011['other']

for col in LSOA_2011[['asian', 'black', 'other', 'white']]:
    new_name = col + '_fraction'
    LSOA_2011[new_name] = round(LSOA_2011[col]/LSOA_2011['total'], 3)

LSOA_2011['year'] = 2011
LSOA_2011.rename(columns={'LSOA11CD':'LSOACD'}, inplace = True)
column_order = ['year', 'LSOACD', 'white', 'asian', 'black', 'other',
                'white_fraction', 'asian_fraction', 'black_fraction', 'other_fraction', 'total', 'LSOA_geometry']
LSOA_2011 = LSOA_2011[column_order]


LSOA_2011 = gpd.GeoDataFrame(LSOA_2011, geometry='LSOA_geometry')
LSOA_2011.to_file('preprocessed files/2011/LSOA_2011.shp', driver='ESRI Shapefile')

In [None]:
LSOA_2011 = gpd.read_file('preprocessed files/2011/LSOA_2011.shp').set_geometry('geometry')
LSOA_2011.plot(alpha=0.5, edgecolor='k')

In [None]:
# creating new culumns in shape file for MSOA geometry
msoa_geom_2011 = shape_2011.groupby('MSOA11CD')['LSOA_geometry'].agg(lambda x: x.unary_union)
shape_2011['MSOA_geometry'] = shape_2011['MSOA11CD'].map(msoa_geom_2011)

In [None]:
# Saving MSOA_2011 into a shape file
MSOA_2011 = shape_2011.groupby('MSOA11CD').agg({'MSOA_geometry': 'first'}).reset_index()
MSOA_2011 = MSOA_2011.merge(ethnicity_2011.groupby('MSOA11CD')[['asian', 'black', 'other', 'white']].sum().reset_index(), on='MSOA11CD' ,how= 'left')

MSOA_2011['total'] = MSOA_2011['white'] + MSOA_2011['asian']+ MSOA_2011['black']+ MSOA_2011['other']

for col in MSOA_2011[['asian', 'black', 'other', 'white']]:
    new_name = col + '_fraction'
    MSOA_2011[new_name] = round(MSOA_2011[col]/MSOA_2011['total'], 3)

MSOA_2011['year'] = 2011
MSOA_2011.rename(columns={'MSOA11CD':'MSOACD'}, inplace = True)
column_order = ['year', 'MSOACD', 'white', 'asian', 'black', 'other',
                'white_fraction', 'asian_fraction', 'black_fraction', 'other_fraction', 'total', 'MSOA_geometry']
MSOA_2011 = MSOA_2011[column_order]

MSOA_2011 = gpd.GeoDataFrame(MSOA_2011, geometry='MSOA_geometry')
MSOA_2011.to_file('preprocessed files/2011/MSOA_2011.shp', driver='ESRI Shapefile')

In [None]:
MSOA_2011 = gpd.read_file('preprocessed files/2011/MSOA_2011.shp').set_geometry('geometry')
MSOA_2011.plot(alpha=0.5, edgecolor='k')

In [None]:
# creating new culumns in shape file for LAD geometry
lad_geom_2011 = shape_2011.groupby('LAD11CD')['MSOA_geometry'].agg(lambda x: x.unary_union)
shape_2011['LAD_geometry'] = shape_2011['LAD11CD'].map(lad_geom_2011)

In [None]:
# In order t have better and clearer borders of LAD, I took border info from another files and mixed it with LAD borders of 2021
# For the LADs that geometry was unavailable in new_borders, I took LAD geopmetry from year 2011
new_borders = gpd.read_file('May_2020_Boundaries/LAD_May_2020_Boundaries_UK_BFE_2022_4839426458879395509.geojson')

In [None]:
# Saving LAD_2011 into a shape file
LAD_2011 = shape_2011.groupby('LAD11CD').agg({'LAD_geometry': 'first'}).reset_index()
LAD_2011 = LAD_2011.merge(ethnicity_2011.groupby(['LAD11CD','LAD11NM'])[['asian', 'black', 'other', 'white']].sum().reset_index(), on='LAD11CD' ,how= 'left')
LAD_2011['LAD_pop'] = LAD_2011['white'] + LAD_2011['asian']+ LAD_2011['black']+ LAD_2011['other']

for col in LAD_2011[['asian', 'black', 'other', 'white']]:
    new_name = col + '_fraction'
    LAD_2011[new_name] = round(LAD_2011[col]/LAD_2011['LAD_pop'], 3)

LAD_2011['year'] = 2011
LAD_2011.rename(columns={'LAD11CD':'LADCD', 'LAD11NM':'LADNM'}, inplace = True)
column_order = ['year', 'LADCD', 'LADNM', 'white', 'asian', 'black', 'other',
                'white_fraction', 'asian_fraction', 'black_fraction', 'other_fraction', 'LAD_pop', 'LAD_geometry']
LAD_2011 = LAD_2011[column_order]

LAD_2011 =  pd.merge(LAD_2011, new_borders[['lad20nm', 'geometry']], left_on= 'LADNM',right_on='lad20nm', how= 'left')

LAD_2011['geometry']= LAD_2011['geometry'].fillna(LAD_2011['LAD_geometry'])
LAD_2011.drop(columns=['LAD_geometry','lad20nm'], axis= 1, inplace= True)
LAD_2011.rename(columns={'geometry':'LAD_geometry'}, inplace= True)
LAD_2011 = gpd.GeoDataFrame(LAD_2011, geometry='LAD_geometry')
LAD_2011.to_file('preprocessed files/2011/LAD_2011.shp', driver='ESRI Shapefile')

In [None]:
LAD_2011 = gpd.read_file('preprocessed files/2011/LAD_2011.shp').set_geometry('geometry')
LAD_2011.plot(alpha=0.5, edgecolor='k')

# Indexes

## Simpson

In [None]:
#| include: false
# Simpson Index at coutry level in 2011
OA_simp_2011 = simpson(ethnicity_2011[['white','asian','black','other']])
LSOA_simp_2011 = simpson(ethnicity_2011.groupby(['LSOA11CD'])[['white','asian','black','other']].sum().reset_index()[['white','asian','black','other']])
MSOA_simp_2011 = simpson(ethnicity_2011.groupby(['MSOA11CD'])[['white','asian','black','other']].sum().reset_index()[['white','asian','black','other']])
LAD_simp_2011 = simpson(ethnicity_2011.groupby(['LAD11CD'])[['white','asian','black','other']].sum().reset_index()[['white','asian','black','other']])

country_dic_sim_2011 = {'OA':OA_simp_2011[0], 'LSOA':LSOA_simp_2011[0],
                        'MSOA':MSOA_simp_2011[0], 'LAD':LAD_simp_2011[0], 'country':LAD_simp_2011[1]}

flat_dict = [{'year': 2011, 'total_population': ethnicity_2011['total_pop'].sum(),
              'white_frac': round(ethnicity_2011['white'].sum()/ethnicity_2011['total_pop'].sum(),3),
              'asian_frac': round(ethnicity_2011['asian'].sum()/ethnicity_2011['total_pop'].sum(),3),
              'black_frac': round(ethnicity_2011['black'].sum()/ethnicity_2011['total_pop'].sum(),3),
              'other_frac': round(ethnicity_2011['other'].sum()/ethnicity_2011['total_pop'].sum(),3),
              'OA':OA_simp_2011[0], 'LSOA':LSOA_simp_2011[0],'MSOA':MSOA_simp_2011[0], 'LAD':LAD_simp_2011[0], 'country':LAD_simp_2011[1]}]

country_simpson_2011 = pd.DataFrame(flat_dict)
country_simpson_2011.to_csv('preprocessed files/2011/country_simpson_2011.csv', index= False)
country_simpson_2011

In [None]:
#| include: false
# Creating LAD level simpson index dataset for year 2011.
# Calculating OA, LSOA and MSOA level simpson index inside each LAD
LAD_list_2011= ethnicity_2011['LAD11CD'].unique()
LAD_dic = {}
for LAD in LAD_list_2011:
    df = ethnicity_2011[ethnicity_2011['LAD11CD'] == LAD].reset_index(drop= True)
    OA_LADsimp_2011 = simpson(df[['white','asian','black','other']])
    LSOA_LADsimp_2011 = simpson(df.groupby(['LSOA11CD'])[['white','asian','black','other']].sum().reset_index()[['white','asian','black','other']])
    MSOA_LADsimp_2011 = simpson(df.groupby(['MSOA11CD'])[['white','asian','black','other']].sum().reset_index()[['white','asian','black','other']])
    LAD_dic[LAD] = {'OA':OA_LADsimp_2011, 'LSOA':LSOA_LADsimp_2011,'MSOA':MSOA_LADsimp_2011}

flat_dict = [{'year': 2011, 'LADCD': key, 
              'OA': value['OA'][0], 'LSOA': value['LSOA'][0],
              'MSOA': value['MSOA'][0], 'LAD': value['MSOA'][1]} for key, value in LAD_dic.items()]

LAD_simpson_2011 = pd.DataFrame(flat_dict)

LAD_simpson_2011 = LAD_simpson_2011.merge(
                         LAD_2011.groupby(['LADCD','LADNM'])[['LAD_pop','white_frac','asian_frac','black_frac','other_frac']].sum().reset_index(),
                         on= 'LADCD', how='left')

LAD_simpson_2011.rename(columns = {'LAD_pop': 'LAD_population'} ,inplace= True)

column_order = ['year', 'LADNM', 'LADCD', 'LAD_population',
                'white_frac', 'asian_frac', 'black_frac', 'other_frac', 
                'OA', 'LSOA', 'MSOA', 'LAD']

LAD_simpson_2011 = LAD_simpson_2011[column_order]
LAD_simpson_2011.to_csv('preprocessed files/2011/LAD_simpson_2011.csv', index= False)
LAD_simpson_2011.head()

## Borders

In [None]:
LAD_borders_2011 = {}

for idx, row in shape_2011.groupby('LAD11CD').agg({'LAD_geometry': 'first'}).iterrows():
    borders = []
    for idx2, row2 in shape_2011.groupby('LAD11CD').agg({'LAD_geometry': 'first'}).iterrows():
        if idx != idx2:
            if row.LAD_geometry.intersects(row2.LAD_geometry):
                borders.append(idx2)
    LAD_borders_2011[idx] = borders

In [None]:
with open('preprocessed files/2011/LAD_borders_2011.pkl', 'wb') as f:
    pickle.dump(LAD_borders_2011, f)

In [None]:
MSOA_borders_2011 = {}
for lad_code in shape_2011['LAD11CD'].unique():
    df_lad = shape_2011[shape_2011['LAD11CD'] == lad_code]
    MSOA_borders_2011[lad_code] = {}
    df_msoa =df_lad.groupby('MSOA11CD').agg({'MSOA_geometry': 'first'})
    for idx, row in df_msoa.iterrows():
        borders = []
        for idx2, row2 in df_msoa.iterrows():
            if idx != idx2:
                if row['MSOA_geometry'].intersects(row2['MSOA_geometry']):
                    borders.append(idx2)
        MSOA_borders_2011[lad_code][idx] = borders


In [None]:
with open('preprocessed files/2011/MSOA_borders_2011.pkl', 'wb') as f:
    pickle.dump(MSOA_borders_2011, f)

In [None]:

LSOA_borders_2011 = {}
df_lad = shape_2011[shape_2011['LAD11CD'] == 'E09000001']
df_lsoa =df_lad.groupby('LSOA11CD').agg({'LSOA_geometry': 'first'})
df_lsoa
for idx, row in df_lsoa.iterrows():
    borders = []
    for idx2, row2 in df_lsoa.iterrows():
        if idx != idx2:
            if row['LSOA_geometry'].intersects(row2['LSOA_geometry']):
                borders.append(idx2)
    LSOA_borders_2011[idx] = borders

In [None]:
shape_2011['LSOA_geometry'] = shape_2011['LSOA_geometry'].buffer(0.000001)


In [None]:
LSOA_borders_2011 = {}
for lad_code in shape_2011['LAD11CD'].unique():
    df_lad = shape_2011[shape_2011['LAD11CD'] == lad_code]
    LSOA_borders_2011[lad_code] = {}
    df_lsoa =df_lad.groupby('LSOA11CD').agg({'LSOA_geometry': 'first'})
    for idx, row in df_lsoa.iterrows():
        borders = []
        for idx2, row2 in df_lsoa.iterrows():
            if idx != idx2:
                if row['LSOA_geometry'].intersects(row2['LSOA_geometry']):
                    borders.append(idx2)
        LSOA_borders_2011[lad_code][idx] = borders


In [None]:
with open('preprocessed files/2011/LSOA_borders_2011.pkl', 'wb') as f:
    pickle.dump(LSOA_borders_2011, f)

In [None]:
OA_borders_2011 = {}
for lad_code in shape_2011['LAD11CD'].unique():
    df_lad = shape_2011[shape_2011['LAD11CD'] == lad_code]
    OA_borders_2011[lad_code] = {}
    df_oa = df_lad.set_index('OA11CD')
    for idx, row in df_oa.iterrows():
        borders = []
        for idx2, row2 in df_oa.iterrows():
            if idx != idx2:
                if row['OA_geometry'].intersects(row2['OA_geometry']):
                    borders.append(idx2)
        OA_borders_2011[lad_code][idx] = borders


In [None]:
with open('preprocessed files/2011/OA_borders_2011.pkl', 'wb') as f:
    pickle.dump(OA_borders_2011, f)

## Dissimilarity

In [None]:
# Creating country level dissimilarity index dataset for year 2011.
# Calculating OA, LSOA, MSOA and LAD level dissimilarity index inside england

OA_diss_2011 = dissimilarity(ethnicity_2011[['white','asian','black','other']])
LSOA_diss_2011 = dissimilarity(ethnicity_2011.groupby(['LSOA11CD'])[['white','asian','black','other']].sum().reset_index()[['white','asian','black','other']])
MSOA_diss_2011 = dissimilarity(ethnicity_2011.groupby(['MSOA11CD'])[['white','asian','black','other']].sum().reset_index()[['white','asian','black','other']])
LAD_diss_2011 = dissimilarity(ethnicity_2011.groupby(['LAD11CD'])[['white','asian','black','other']].sum().reset_index()[['white','asian','black','other']])
country_dic_diss_2011 = {'OA_level': OA_diss_2011, 'LSOA_level': LSOA_diss_2011,
                        'MSOA_level': MSOA_diss_2011, 'LAD_level': LAD_diss_2011}

flat_dict = [{'year': 2011, 'total_population': ethnicity_2011['total_pop'].sum(),
              'white_frac': round(ethnicity_2011['white'].sum()/ethnicity_2011['total_pop'].sum(),3),
              'asian_frac': round(ethnicity_2011['asian'].sum()/ethnicity_2011['total_pop'].sum(),3),
              'black_frac': round(ethnicity_2011['black'].sum()/ethnicity_2011['total_pop'].sum(),3),
              'other_frac': round(ethnicity_2011['other'].sum()/ethnicity_2011['total_pop'].sum(),3),
              'OA_white': country_dic_diss_2011['OA_level']['white'], 'LSOA_white': country_dic_diss_2011['LSOA_level']['white'],
              'MSOA_white': country_dic_diss_2011['MSOA_level']['white'], 'LAD_white': country_dic_diss_2011['LAD_level']['white'],
              'OA_asian': country_dic_diss_2011['OA_level']['asian'], 'LSOA_asian': country_dic_diss_2011['LSOA_level']['asian'],
              'MSOA_asian': country_dic_diss_2011['MSOA_level']['asian'], 'LAD_asian': country_dic_diss_2011['LAD_level']['asian'],
              'OA_black': country_dic_diss_2011['OA_level']['black'], 'LSOA_black': country_dic_diss_2011['LSOA_level']['black'],
              'MSOA_black': country_dic_diss_2011['MSOA_level']['black'], 'LAD_black': country_dic_diss_2011['LAD_level']['black'],
              'OA_other': country_dic_diss_2011['OA_level']['other'], 'LSOA_other': country_dic_diss_2011['LSOA_level']['other'],
              'MSOA_other': country_dic_diss_2011['MSOA_level']['other'], 'LAD_other': country_dic_diss_2011['LAD_level']['other']}]

country_dissimilarity_2011 = pd.DataFrame(flat_dict)
country_dissimilarity_2011



In [None]:
country_dissimilarity_2011.to_csv('preprocessed files/2011/country_dissimilarity_2011.csv', index= False)

In [None]:
# Creating LAD level dissimilarity index dataset for year 2011.
# Calculating OA, LSOA and MSOA level dissimilarity index inside each LAD
LAD_list_2011= ethnicity_2011['LAD11CD'].unique()
LAD_dic_diss_2011 = {}
for LAD in LAD_list_2011:
    df = ethnicity_2011[ethnicity_2011['LAD11CD'] == LAD].reset_index(drop= True)
    OA_diss_2011 = dissimilarity(df[['white','asian','black','other']])
    LSOA_diss_2011 = dissimilarity(df.groupby(['LSOA11CD'])[['white','asian','black','other']].sum().reset_index()[['white','asian','black','other']])
    MSOA_diss_2011 = dissimilarity(df.groupby(['MSOA11CD'])[['white','asian','black','other']].sum().reset_index()[['white','asian','black','other']])
    LAD_dic_diss_2011[LAD] = {'OA_level':OA_diss_2011, 'LSOA_level':LSOA_diss_2011, 'MSOA_level':MSOA_diss_2011}

flat_dict = [{'year': 2011, 'LADCD': key,
              'OA_white': value['OA_level']['white'], 'LSOA_white': value['LSOA_level']['white'], 'MSOA_white': value['MSOA_level']['white'],
              'OA_asian': value['OA_level']['asian'], 'LSOA_asian': value['LSOA_level']['asian'], 'MSOA_asian': value['MSOA_level']['asian'],
              'OA_black': value['OA_level']['black'], 'LSOA_black': value['LSOA_level']['black'], 'MSOA_black': value['MSOA_level']['black'],
              'OA_other': value['OA_level']['other'], 'LSOA_other': value['LSOA_level']['other'], 'MSOA_other': value['MSOA_level']['other'],
              } for key, value in LAD_dic_diss_2011.items()]

LAD_dissimilarity_2011 = pd.DataFrame(flat_dict)

LAD_dissimilarity_2011 = LAD_dissimilarity_2011.merge(
                         LAD_2011.groupby(['LADCD','LADNM'])[['LAD_pop','white_frac','asian_frac','black_frac','other_frac']].sum().reset_index(),
                         on= 'LADCD', how='left')
LAD_dissimilarity_2011.rename(columns = {'LAD_pop': 'LAD_population'} ,inplace= True)

# LAD_dissimilarity_2011.drop(['LAD'], axis= 1, inplace= True)
column_order = ['year', 'LADNM', 'LADCD', 'LAD_population',
                'white_frac', 'asian_frac', 'black_frac', 'other_frac',
                'OA_white', 'LSOA_white', 'MSOA_white',
                'OA_asian', 'LSOA_asian', 'MSOA_asian',
                'OA_black', 'LSOA_black', 'MSOA_black',
                'OA_other', 'LSOA_other', 'MSOA_other']

LAD_dissimilarity_2011 = LAD_dissimilarity_2011[column_order]
LAD_dissimilarity_2011.head()

In [None]:
LAD_dissimilarity_2011.to_csv('preprocessed files/2011/LAD_dissimilarity_2011.csv', index= False)

In [None]:
LAD_2011 = gpd.read_file('preprocessed files/2011/LAD_2011.shp')

## Moran

In [None]:
with open('preprocessed files/2011/OA_borders_2011.pkl', 'rb') as f:
    OA_borders_2011 = pickle.load(f)
with open('preprocessed files/2011/LSOA_borders_2011.pkl', 'rb') as f:
    LSOA_borders_2011 = pickle.load(f)
with open('preprocessed files/2011/MSOA_borders_2011.pkl', 'rb') as f:
    MSOA_borders_2011 = pickle.load(f)
with open('preprocessed files/2011/LAD_borders_2011.pkl', 'rb') as f:
    LAD_borders_2011 = pickle.load(f)

In [None]:
# Creating LAD level dissimilarity index dataset for year 2011.
# Calculating OA, LSOA and MSOA level dissimilarity index inside each LAD
LAD_list_2011= ethnicity_2011['LAD11CD'].unique()
LAD_dic_mor_2011 = {}
for LAD in LAD_list_2011:
    df = ethnicity_2011[ethnicity_2011['LAD11CD'] == LAD].reset_index(drop= True)
    OA_mor_2011 = moran(df[['OA11CD','white','asian','black','other']].set_index('OA11CD'),OA_borders_2011[LAD])
    LSOA_mor_2011 = moran(df.groupby(['LSOA11CD'])[['white','asian','black','other']].sum()[['white','asian','black','other']],LSOA_borders_2011[LAD])
    MSOA_mor_2011 = moran(df.groupby(['MSOA11CD'])[['white','asian','black','other']].sum()[['white','asian','black','other']],MSOA_borders_2011[LAD])
    LAD_dic_mor_2011[LAD] = {'OA_level':OA_mor_2011, 'LSOA_level':LSOA_mor_2011, 'MSOA_level':MSOA_mor_2011}

flat_dict = [{'year': 2011, 'LADCD': key,
              'OA_white': value['OA_level']['white'], 'LSOA_white': value['LSOA_level']['white'], 'MSOA_white': value['MSOA_level']['white'],
              'OA_asian': value['OA_level']['asian'], 'LSOA_asian': value['LSOA_level']['asian'], 'MSOA_asian': value['MSOA_level']['asian'],
              'OA_black': value['OA_level']['black'], 'LSOA_black': value['LSOA_level']['black'], 'MSOA_black': value['MSOA_level']['black'],
              'OA_other': value['OA_level']['other'], 'LSOA_other': value['LSOA_level']['other'], 'MSOA_other': value['MSOA_level']['other'],
              } for key, value in LAD_dic_mor_2011.items()]

LAD_moran_2011 = pd.DataFrame(flat_dict)

LAD_moran_2011 = LAD_moran_2011.merge(
                         LAD_2011.groupby(['LADCD','LADNM'])[['LAD_pop','white_frac','asian_frac','black_frac','other_frac']].sum().reset_index(),
                         on= 'LADCD', how='left')
LAD_moran_2011.rename(columns = {'LAD_pop': 'LAD_population'} ,inplace= True)

column_order = ['year', 'LADNM', 'LADCD', 'LAD_population', 
                'white_frac', 'asian_frac', 'black_frac', 'other_frac',
                'OA_white', 'LSOA_white', 'MSOA_white',
                'OA_asian', 'LSOA_asian', 'MSOA_asian',
                'OA_black', 'LSOA_black', 'MSOA_black',
                'OA_other', 'LSOA_other', 'MSOA_other']

LAD_moran_2011 = LAD_moran_2011[column_order]
LAD_moran_2011.head()



In [None]:
LAD_moran_2011.to_csv('preprocessed files/2011/LAD_moran_2011.csv', index= False)

In [None]:
# OA_border_2011 = {}

# for idx, row in shape_2011.set_index(keys='OA11CD').iterrows():
#     borders = []
#     for idx2, row2 in shape_2011.set_index(keys='OA11CD').iterrows():
#         if idx != idx2:
#             if row.OA_geometry.intersects(row2.OA_geometry):
#                 borders.append(idx2)
#     OA_border_2011[idx] = borders

In [None]:
# LSOA_border_2011 = {}

# for idx, row in shape_2011.groupby('LSOA11CD').agg({'LSOA_geometry': 'first'}).iterrows():
#     borders = []
#     for idx2, row2 in shape_2011.groupby('LSOA11CD').agg({'LSOA_geometry': 'first'}).iterrows():
#         if idx != idx2:
#             if row.LSOA_geometry.intersects(row2.LSOA_geometry):
#                 borders.append(idx2)
#     LSOA_border_2011[idx] = borders

In [None]:
# MSOA_border_2011 = {}

# for idx, row in shape_2011.groupby('MSOA11CD').agg({'MSOA_geometry': 'first'}).iterrows():
#     borders = []
#     for idx2, row2 in shape_2011.groupby('MSOA11CD').agg({'MSOA_geometry': 'first'}).iterrows():
#         if idx != idx2:
#             if row.MSOA_geometry.intersects(row2.MSOA_geometry):
#                 borders.append(idx2)
#     MSOA_border_2011[idx] = borders

In [None]:
ethnicity_2011.groupby(['LSOA11CD'])['total_pop'].sum().describe()

In [None]:
ethnicity_2011.groupby(['MSOA11CD'])['total_pop'].sum().describe()

In [None]:
ethnicity_2011.groupby(['LAD11CD'])['total_pop'].sum().describe()

In [None]:
ethnicity_2011.groupby(['LAD11CD'])['OA11CD'].count().describe()

In [None]:
ethnicity_2011.groupby(['MSOA11CD'])['LSOA11CD'].count().describe()

# Cities

## OA

In [30]:
OA_2011 = gpd.read_file('preprocessed files/2011/OA_2011.shp')
OA_2021 = gpd.read_file('preprocessed files/2021/OA_2021.shp')

In [31]:
# Creating city datasets
birmingham_2011= ethnicity_2011[ethnicity_2011['LAD11NM'].str.contains('Birmingham')]
birmingham_2011['year']= '2011'
birmingham_2011.rename(columns={'OA11CD':'OACD', 'LSOA11CD':'LSOACD', 'MSOA11CD':'MSOACD', 'LAD11CD':'LADCD',
                        'LSOA11NM':'LSOANM', 'MSOA11NM':'MSOANM', 'LAD11NM':'LADNM'}, inplace=True)

birmingham_2011 = pd.merge(birmingham_2011,OA_2021[['OA21CD','geometry']], left_on='OACD', right_on='OA21CD', how= 'left').drop(['OA21CD'], axis = 1)
birmingham_2011['geometry'] = birmingham_2011['OACD'].map(OA_2011.set_index('OA11CD')['geometry']).fillna(birmingham_2011['geometry'])

cols = list(birmingham_2011.columns)
cols.insert(0, cols.pop(cols.index('year')))
birmingham_2011 = birmingham_2011[cols]
# birmingham_2011 = birmingham_2011.drop(['LSOACD', 'MSOACD', 'LADCD', 'LSOANM', 'MSOANM'], axis= 1)
birmingham_2011 = birmingham_2011.set_geometry('geometry')

leicester_2011= ethnicity_2011[ethnicity_2011['LAD11NM'].str.contains('Leicester') &
                                ~ethnicity_2011['LAD11NM'].str.contains('North West Leicestershire')]
leicester_2011['year']= '2011'
leicester_2011.rename(columns={'OA11CD':'OACD', 'LSOA11CD':'LSOACD', 'MSOA11CD':'MSOACD', 'LAD11CD':'LADCD',
                        'LSOA11NM':'LSOANM', 'MSOA11NM':'MSOANM', 'LAD11NM':'LADNM'}, inplace=True)

leicester_2011 = pd.merge(leicester_2011,OA_2021[['OA21CD','geometry']], left_on='OACD', right_on='OA21CD', how= 'left').drop(['OA21CD'], axis = 1)
leicester_2011['geometry'] = leicester_2011['OACD'].map(OA_2011.set_index('OA11CD')['geometry']).fillna(leicester_2011['geometry'])

cols = list(leicester_2011.columns)
cols.insert(0, cols.pop(cols.index('year')))
leicester_2011 = leicester_2011[cols]
# leicester_2011 = leicester_2011.drop(['LSOACD', 'MSOACD', 'LADCD', 'LSOANM', 'MSOANM'], axis= 1)
leicester_2011 = leicester_2011.set_geometry('geometry')

bradford_2011= ethnicity_2011[ethnicity_2011['LAD11NM'].str.contains('Bradford')]
bradford_2011['year']= '2011'
bradford_2011.rename(columns={'OA11CD':'OACD', 'LSOA11CD':'LSOACD', 'MSOA11CD':'MSOACD', 'LAD11CD':'LADCD',
                        'LSOA11NM':'LSOANM', 'MSOA11NM':'MSOANM', 'LAD11NM':'LADNM'}, inplace=True)

bradford_2011 = pd.merge(bradford_2011,OA_2021[['OA21CD','geometry']], left_on='OACD', right_on='OA21CD', how= 'left').drop(['OA21CD'], axis = 1)
bradford_2011['geometry'] = bradford_2011['OACD'].map(OA_2011.set_index('OA11CD')['geometry']).fillna(bradford_2011['geometry'])

cols = list(bradford_2011.columns)
cols.insert(0, cols.pop(cols.index('year')))
bradford_2011 = bradford_2011[cols]
# bradford_2011 = bradford_2011.drop(['LSOACD', 'MSOACD', 'LADCD', 'LSOANM', 'MSOANM'], axis= 1)
bradford_2011 = bradford_2011.set_geometry('geometry')

blackburn_2011= ethnicity_2011[ethnicity_2011['LAD11NM'].str.contains('Blackburn')]
blackburn_2011['year']= '2011'
blackburn_2011.rename(columns={'OA11CD':'OACD', 'LSOA11CD':'LSOACD', 'MSOA11CD':'MSOACD', 'LAD11CD':'LADCD',
                        'LSOA11NM':'LSOANM', 'MSOA11NM':'MSOANM', 'LAD11NM':'LADNM'}, inplace=True)

blackburn_2011 = pd.merge(blackburn_2011,OA_2021[['OA21CD','geometry']], left_on='OACD', right_on='OA21CD', how= 'left').drop(['OA21CD'], axis = 1)
blackburn_2011['geometry'] = blackburn_2011['OACD'].map(OA_2011.set_index('OA11CD')['geometry']).fillna(blackburn_2011['geometry'])

cols = list(blackburn_2011.columns)
cols.insert(0, cols.pop(cols.index('year')))
blackburn_2011 = blackburn_2011[cols]
# blackburn_2011 = blackburn_2011.drop(['LSOACD', 'MSOACD', 'LADCD', 'LSOANM', 'MSOANM'], axis= 1)
blackburn_2011 = blackburn_2011.set_geometry('geometry')

oldham_2011= ethnicity_2011[ethnicity_2011['LAD11NM'].str.contains('Oldham')]
oldham_2011['year']= '2011'
oldham_2011.rename(columns={'OA11CD':'OACD', 'LSOA11CD':'LSOACD', 'MSOA11CD':'MSOACD', 'LAD11CD':'LADCD',
                        'LSOA11NM':'LSOANM', 'MSOA11NM':'MSOANM', 'LAD11NM':'LADNM'}, inplace=True)

oldham_2011 = pd.merge(oldham_2011,OA_2021[['OA21CD','geometry']], left_on='OACD', right_on='OA21CD', how= 'left').drop(['OA21CD'], axis = 1)
oldham_2011['geometry'] = oldham_2011['OACD'].map(OA_2011.set_index('OA11CD')['geometry']).fillna(oldham_2011['geometry'])

cols = list(oldham_2011.columns)
cols.insert(0, cols.pop(cols.index('year')))
oldham_2011 = oldham_2011[cols]
# oldham_2011 = oldham_2011.drop(['LSOACD', 'MSOACD', 'LADCD', 'LSOANM', 'MSOANM'], axis= 1)
oldham_2011 = oldham_2011.set_geometry('geometry')

pendle_2011= ethnicity_2011[ethnicity_2011['LAD11NM'].str.contains('Pendle')]
pendle_2011['year']= '2011'
pendle_2011.rename(columns={'OA11CD':'OACD', 'LSOA11CD':'LSOACD', 'MSOA11CD':'MSOACD', 'LAD11CD':'LADCD',
                        'LSOA11NM':'LSOANM', 'MSOA11NM':'MSOANM', 'LAD11NM':'LADNM'}, inplace=True)

pendle_2011 = pd.merge(pendle_2011,OA_2021[['OA21CD','geometry']], left_on='OACD', right_on='OA21CD', how= 'left').drop(['OA21CD'], axis = 1)
pendle_2011['geometry'] = pendle_2011['OACD'].map(OA_2011.set_index('OA11CD')['geometry']).fillna(pendle_2011['geometry'])

cols = list(pendle_2011.columns)
cols.insert(0, cols.pop(cols.index('year')))
pendle_2011 = pendle_2011[cols]
# pendle_2011 = pendle_2011.drop(['LSOACD', 'MSOACD', 'LADCD', 'LSOANM', 'MSOANM'], axis= 1)
pendle_2011 = pendle_2011.set_geometry('geometry')

london_2011= ethnicity_2011[ethnicity_2011.LAD11CD.str.extract('([a-zA-Z]+)([^a-zA-Z]+)', expand=True)[1].astype(int).between(9000001,9000034)]
london_2011['year']= '2011'
london_2011.rename(columns={'OA11CD':'OACD', 'LSOA11CD':'LSOACD', 'MSOA11CD':'MSOACD', 'LAD11CD':'LADCD',
                        'LSOA11NM':'LSOANM', 'MSOA11NM':'MSOANM', 'LAD11NM':'LADNM'}, inplace=True)

london_2011 = pd.merge(london_2011,OA_2021[['OA21CD','geometry']], left_on='OACD', right_on='OA21CD', how= 'left').drop(['OA21CD'], axis = 1)
london_2011['geometry'] = london_2011['OACD'].map(OA_2011.set_index('OA11CD')['geometry']).fillna(london_2011['geometry'])

cols = list(london_2011.columns)
cols.insert(0, cols.pop(cols.index('year')))
london_2011 = london_2011[cols]
# london_2011 = london_2011.drop(['LSOACD', 'MSOACD', 'LADCD', 'LSOANM', 'MSOANM'], axis= 1)
london_2011 = london_2011.set_geometry('geometry')



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  birmingham_2011['year']= '2011'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  birmingham_2011.rename(columns={'OA11CD':'OACD', 'LSOA11CD':'LSOACD', 'MSOA11CD':'MSOACD', 'LAD11CD':'LADCD',
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  leicester_2011['year']= '2011'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the

In [32]:
# Additional Columns
ethnicity_list = ['asian', 'black', 'other', 'white']
col_order = ['year', 'OACD', 'LSOACD', 'MSOACD', 'LADCD', 'LSOANM', 'MSOANM', 'LADNM', 'OA_simpson',
             'white', 'asian', 'black', 'other', 'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction', 'other_fraction', 'geometry']

# Creating OA_simpson and fraction columns
for ethnicity in ethnicity_list:
    birmingham_2011[f'{ethnicity}_fraction'] = round(birmingham_2011[ethnicity]/birmingham_2011['total_pop'],3)
OA_simpson = []
for OA in birmingham_2011['OACD']:
    df = birmingham_2011[birmingham_2011['OACD']== OA]
    birmingham_OA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))[0]
    OA_simpson.append(birmingham_OA_simpson)

# Reordering the columns
birmingham_2011['OA_simpson']= OA_simpson
birmingham_2011 = birmingham_2011[col_order]

# Creating OA_simpson and fraction columns
for ethnicity in ethnicity_list:
    leicester_2011[f'{ethnicity}_fraction'] = round(leicester_2011[ethnicity]/leicester_2011['total_pop'],3)
OA_simpson = []
for OA in leicester_2011['OACD']:
    df = leicester_2011[leicester_2011['OACD']== OA]
    leicester_OA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))[0]
    OA_simpson.append(leicester_OA_simpson)

# Reordering the columns
leicester_2011['OA_simpson']= OA_simpson
leicester_2011 = leicester_2011[col_order]

# Creating OA_simpson and fraction columns
for ethnicity in ethnicity_list:
    bradford_2011[f'{ethnicity}_fraction'] = round(bradford_2011[ethnicity]/bradford_2011['total_pop'],3)
OA_simpson = []
for OA in bradford_2011['OACD']:
    df = bradford_2011[bradford_2011['OACD']== OA]
    bradford_OA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))[0]
    OA_simpson.append(bradford_OA_simpson)

# Reordering the columns
bradford_2011['OA_simpson']= OA_simpson
bradford_2011 = bradford_2011[col_order]

# Creating OA_simpson and fraction columns
for ethnicity in ethnicity_list:
    blackburn_2011[f'{ethnicity}_fraction'] = round(blackburn_2011[ethnicity]/blackburn_2011['total_pop'],3)
OA_simpson = []
for OA in blackburn_2011['OACD']:
    df = blackburn_2011[blackburn_2011['OACD']== OA]
    blackburn_OA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))[0]
    OA_simpson.append(blackburn_OA_simpson)

# Reordering the columns
blackburn_2011['OA_simpson']= OA_simpson
blackburn_2011 = blackburn_2011[col_order]

# Creating OA_simpson and fraction columns
for ethnicity in ethnicity_list:
    oldham_2011[f'{ethnicity}_fraction'] = round(oldham_2011[ethnicity]/oldham_2011['total_pop'],3)
OA_simpson = []
for OA in oldham_2011['OACD']:
    df = oldham_2011[oldham_2011['OACD']== OA]
    oldham_OA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))[0]
    OA_simpson.append(oldham_OA_simpson)

# Reordering the columns
oldham_2011['OA_simpson']= OA_simpson
oldham_2011 = oldham_2011[col_order]

# Creating OA_simpson and fraction columns
for ethnicity in ethnicity_list:
    pendle_2011[f'{ethnicity}_fraction'] = round(pendle_2011[ethnicity]/pendle_2011['total_pop'],3)
OA_simpson = []
for OA in pendle_2011['OACD']:
    df = pendle_2011[pendle_2011['OACD']== OA]
    pendle_OA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))[0]
    OA_simpson.append(pendle_OA_simpson)

# Reordering the columns
pendle_2011['OA_simpson']= OA_simpson
pendle_2011 = pendle_2011[col_order]

# Creating OA_simpson and fraction columns
for ethnicity in ethnicity_list:
    london_2011[f'{ethnicity}_fraction'] = round(london_2011[ethnicity]/london_2011['total_pop'],3)
OA_simpson = []
for OA in london_2011['OACD']:
    df = london_2011[london_2011['OACD']== OA]
    london_OA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))[0]
    OA_simpson.append(london_OA_simpson)

# Reordering the columns
london_2011['OA_simpson']= OA_simpson
london_2011 = london_2011[col_order]

In [33]:
birmingham_2011.to_csv('preprocessed files/2011/birmingham_2011.csv', index= False)
leicester_2011.to_csv('preprocessed files/2011/leicester_2011.csv', index= False)
bradford_2011.to_csv('preprocessed files/2011/bradford_2011.csv', index= False)
blackburn_2011.to_csv('preprocessed files/2011/blackburn_2011.csv', index= False)
oldham_2011.to_csv('preprocessed files/2011/oldham_2011.csv', index= False)
pendle_2011.to_csv('preprocessed files/2011/pendle_2011.csv', index= False)
london_2011.to_csv('preprocessed files/2011/london_2011.csv', index= False)

## LSOAs

In [34]:
LSOA_2011 = gpd.read_file('preprocessed files/2011/LSOA_2011.shp')
LSOA_2021 = gpd.read_file('preprocessed files/2021/LSOA_2021.shp')

In [35]:
# Creatin city LSOA datasets
birmingham_LSOA_2011 = birmingham_2011.groupby(['LSOACD','year'])[['white','asian','black','other']].sum().reset_index().merge(LSOA_2021[['LSOACD','geometry']], on='LSOACD', how= 'left')
birmingham_LSOA_2011['geometry'] = birmingham_LSOA_2011['LSOACD'].map(LSOA_2011.set_index('LSOACD')['geometry']).fillna(birmingham_LSOA_2011['geometry'])
birmingham_LSOA_2011= birmingham_LSOA_2011.set_geometry('geometry')

leicester_LSOA_2011 = leicester_2011.groupby(['LSOACD','year'])[['white','asian','black','other']].sum().reset_index().merge(LSOA_2021[['LSOACD','geometry']], on='LSOACD', how= 'left')
leicester_LSOA_2011['geometry'] = leicester_LSOA_2011['LSOACD'].map(LSOA_2011.set_index('LSOACD')['geometry']).fillna(leicester_LSOA_2011['geometry'])
leicester_LSOA_2011= leicester_LSOA_2011.set_geometry('geometry')

bradford_LSOA_2011 = bradford_2011.groupby(['LSOACD','year'])[['white','asian','black','other']].sum().reset_index().merge(LSOA_2021[['LSOACD','geometry']], on='LSOACD', how= 'left')
bradford_LSOA_2011['geometry'] = bradford_LSOA_2011['LSOACD'].map(LSOA_2011.set_index('LSOACD')['geometry']).fillna(bradford_LSOA_2011['geometry'])
bradford_LSOA_2011= bradford_LSOA_2011.set_geometry('geometry')

blackburn_LSOA_2011 = blackburn_2011.groupby(['LSOACD','year'])[['white','asian','black','other']].sum().reset_index().merge(LSOA_2021[['LSOACD','geometry']], on='LSOACD', how= 'left')
blackburn_LSOA_2011['geometry'] = blackburn_LSOA_2011['LSOACD'].map(LSOA_2011.set_index('LSOACD')['geometry']).fillna(blackburn_LSOA_2011['geometry'])
blackburn_LSOA_2011= blackburn_LSOA_2011.set_geometry('geometry')

oldham_LSOA_2011 = oldham_2011.groupby(['LSOACD','year'])[['white','asian','black','other']].sum().reset_index().merge(LSOA_2021[['LSOACD','geometry']], on='LSOACD', how= 'left')
oldham_LSOA_2011['geometry'] = oldham_LSOA_2011['LSOACD'].map(LSOA_2011.set_index('LSOACD')['geometry']).fillna(oldham_LSOA_2011['geometry'])
oldham_LSOA_2011= oldham_LSOA_2011.set_geometry('geometry')

pendle_LSOA_2011 = pendle_2011.groupby(['LSOACD','year'])[['white','asian','black','other']].sum().reset_index().merge(LSOA_2021[['LSOACD','geometry']], on='LSOACD', how= 'left')
pendle_LSOA_2011['geometry'] = pendle_LSOA_2011['LSOACD'].map(LSOA_2011.set_index('LSOACD')['geometry']).fillna(pendle_LSOA_2011['geometry'])
pendle_LSOA_2011= pendle_LSOA_2011.set_geometry('geometry')

london_LSOA_2011 = london_2011.groupby(['LSOACD','LADNM','year'])[['white','asian','black','other']].sum().reset_index().merge(LSOA_2021[['LSOACD','geometry']], on='LSOACD', how= 'left')
london_LSOA_2011['geometry'] = london_LSOA_2011['LSOACD'].map(LSOA_2011.set_index('LSOACD')['geometry']).fillna(london_LSOA_2011['geometry'])
london_LSOA_2011= london_LSOA_2011.set_geometry('geometry')


In [36]:
col_order = ['year', 'LSOACD', 'LSOA_simpson',
             'white', 'asian', 'black', 'other', 'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction', 'other_fraction', 'geometry']

# Creating LSOA_simpson and fraction column
birmingham_LSOA_2011['total_pop'] = (birmingham_LSOA_2011['white'] + birmingham_LSOA_2011['asian'] +
                                     birmingham_LSOA_2011['black'] + birmingham_LSOA_2011['other'])
for ethnicity in ethnicity_list:
    birmingham_LSOA_2011[f'{ethnicity}_fraction'] = round(birmingham_LSOA_2011[ethnicity]/birmingham_LSOA_2011['total_pop'],3)
LSOA_simpson = []
for LSOA in birmingham_LSOA_2011['LSOACD']:
    df = birmingham_LSOA_2011[birmingham_LSOA_2011['LSOACD']== LSOA]
    birmingham_LSOA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))
    LSOA_simpson.append(birmingham_LSOA_simpson[0])

# Reordering the columns
birmingham_LSOA_2011['LSOA_simpson']= LSOA_simpson
birmingham_LSOA_2011 = birmingham_LSOA_2011[col_order]

# Creating LSOA_simpson and fraction column
leicester_LSOA_2011['total_pop'] = (leicester_LSOA_2011['white'] + leicester_LSOA_2011['asian'] +
                                    leicester_LSOA_2011['black'] + leicester_LSOA_2011['other'])
for ethnicity in ethnicity_list:
    leicester_LSOA_2011[f'{ethnicity}_fraction'] = round(leicester_LSOA_2011[ethnicity]/leicester_LSOA_2011['total_pop'],3)
LSOA_simpson = []
for LSOA in leicester_LSOA_2011['LSOACD']:
    df = leicester_LSOA_2011[leicester_LSOA_2011['LSOACD']== LSOA]
    leicester_LSOA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))
    LSOA_simpson.append(leicester_LSOA_simpson[0])

# Reordering the columns
leicester_LSOA_2011['LSOA_simpson']= LSOA_simpson
leicester_LSOA_2011 = leicester_LSOA_2011[col_order]

# Creating LSOA_simpson and fraction column
bradford_LSOA_2011['total_pop'] = (bradford_LSOA_2011['white'] + bradford_LSOA_2011['asian'] +
                                   bradford_LSOA_2011['black'] + bradford_LSOA_2011['other'])
for ethnicity in ethnicity_list:
    bradford_LSOA_2011[f'{ethnicity}_fraction'] = round(bradford_LSOA_2011[ethnicity]/bradford_LSOA_2011['total_pop'],3)
LSOA_simpson = []
for LSOA in bradford_LSOA_2011['LSOACD']:
    df = bradford_LSOA_2011[bradford_LSOA_2011['LSOACD']== LSOA]
    bradford_LSOA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))
    LSOA_simpson.append(bradford_LSOA_simpson[0])

# Reordering the columns
bradford_LSOA_2011['LSOA_simpson']= LSOA_simpson
bradford_LSOA_2011 = bradford_LSOA_2011[col_order]

# Creating LSOA_simpson and fraction column
blackburn_LSOA_2011['total_pop'] = (blackburn_LSOA_2011['white'] + blackburn_LSOA_2011['asian'] +
                                    blackburn_LSOA_2011['black'] + blackburn_LSOA_2011['other'])
for ethnicity in ethnicity_list:
    blackburn_LSOA_2011[f'{ethnicity}_fraction'] = round(blackburn_LSOA_2011[ethnicity]/blackburn_LSOA_2011['total_pop'],3)
LSOA_simpson = []
for LSOA in blackburn_LSOA_2011['LSOACD']:
    df = blackburn_LSOA_2011[blackburn_LSOA_2011['LSOACD']== LSOA]
    blackburn_LSOA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))
    LSOA_simpson.append(blackburn_LSOA_simpson[0])

# Reordering the columns
blackburn_LSOA_2011['LSOA_simpson']= LSOA_simpson
blackburn_LSOA_2011 = blackburn_LSOA_2011[col_order]

# Creating LSOA_simpson and fraction column
oldham_LSOA_2011['total_pop'] = (oldham_LSOA_2011['white'] + oldham_LSOA_2011['asian'] +
                                 oldham_LSOA_2011['black'] + oldham_LSOA_2011['other'])
for ethnicity in ethnicity_list:
    oldham_LSOA_2011[f'{ethnicity}_fraction'] = round(oldham_LSOA_2011[ethnicity]/oldham_LSOA_2011['total_pop'],3)
LSOA_simpson = []
for LSOA in oldham_LSOA_2011['LSOACD']:
    df = oldham_LSOA_2011[oldham_LSOA_2011['LSOACD']== LSOA]
    oldham_LSOA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))
    LSOA_simpson.append(oldham_LSOA_simpson[0])

# Reordering the columns
oldham_LSOA_2011['LSOA_simpson']= LSOA_simpson
oldham_LSOA_2011 = oldham_LSOA_2011[col_order]

# Creating LSOA_simpson and fraction column
pendle_LSOA_2011['total_pop'] = (pendle_LSOA_2011['white'] + pendle_LSOA_2011['asian'] +
                                 pendle_LSOA_2011['black'] + pendle_LSOA_2011['other'])
for ethnicity in ethnicity_list:
    pendle_LSOA_2011[f'{ethnicity}_fraction'] =  round(pendle_LSOA_2011[ethnicity]/pendle_LSOA_2011['total_pop'],3)
LSOA_simpson = []
for LSOA in pendle_LSOA_2011['LSOACD']:
    df = pendle_LSOA_2011[pendle_LSOA_2011['LSOACD']== LSOA]
    pendle_LSOA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))
    LSOA_simpson.append(pendle_LSOA_simpson[0])

# Reordering the columns
pendle_LSOA_2011['LSOA_simpson']= LSOA_simpson
pendle_LSOA_2011 = pendle_LSOA_2011[col_order]

# Creating LSOA_simpson and fraction column
london_LSOA_2011['total_pop'] = (london_LSOA_2011['white'] + london_LSOA_2011['asian'] +
                                     london_LSOA_2011['black'] + london_LSOA_2011['other'])
for ethnicity in ethnicity_list:
    london_LSOA_2011[f'{ethnicity}_fraction'] = round(london_LSOA_2011[ethnicity]/london_LSOA_2011['total_pop'],3)
LSOA_simpson = []
for LSOA in london_LSOA_2011['LSOACD']:
    df = london_LSOA_2011[london_LSOA_2011['LSOACD']== LSOA]
    london_LSOA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))
    LSOA_simpson.append(london_LSOA_simpson[0])

# Reordering the columns
london_LSOA_2011['LSOA_simpson']= LSOA_simpson
col_order = ['year', 'LSOACD','LADNM', 'LSOA_simpson',
             'white', 'asian', 'black', 'other', 'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction', 'other_fraction', 'geometry']
london_LSOA_2011 = london_LSOA_2011[col_order]


## MSOAs

In [37]:
MSOA_2011 = gpd.read_file('preprocessed files/2011/MSOA_2011.shp')
MSOA_2021 = gpd.read_file('preprocessed files/2021/MSOA_2021.shp')

In [38]:
# Creatin city MSOA datasets
birmingham_MSOA_2011 = birmingham_2011.groupby(['MSOACD','year'])[['white','asian','black','other']].sum().reset_index().merge(MSOA_2021[['MSOACD','geometry']], on='MSOACD', how= 'left')
birmingham_MSOA_2011['geometry'] = birmingham_MSOA_2011['MSOACD'].map(MSOA_2011.set_index('MSOACD')['geometry']).fillna(birmingham_MSOA_2011['geometry'])
birmingham_MSOA_2011= birmingham_MSOA_2011.set_geometry('geometry')

leicester_MSOA_2011 = leicester_2011.groupby(['MSOACD','year'])[['white','asian','black','other']].sum().reset_index().merge(MSOA_2021[['MSOACD','geometry']], on='MSOACD', how= 'left')
leicester_MSOA_2011['geometry'] = leicester_MSOA_2011['MSOACD'].map(MSOA_2011.set_index('MSOACD')['geometry']).fillna(leicester_MSOA_2011['geometry'])
leicester_MSOA_2011= leicester_MSOA_2011.set_geometry('geometry')

bradford_MSOA_2011 = bradford_2011.groupby(['MSOACD','year'])[['white','asian','black','other']].sum().reset_index().merge(MSOA_2021[['MSOACD','geometry']], on='MSOACD', how= 'left')
bradford_MSOA_2011['geometry'] = bradford_MSOA_2011['MSOACD'].map(MSOA_2011.set_index('MSOACD')['geometry']).fillna(bradford_MSOA_2011['geometry'])
bradford_MSOA_2011= bradford_MSOA_2011.set_geometry('geometry')

blackburn_MSOA_2011 = blackburn_2011.groupby(['MSOACD','year'])[['white','asian','black','other']].sum().reset_index().merge(MSOA_2021[['MSOACD','geometry']], on='MSOACD', how= 'left')
blackburn_MSOA_2011['geometry'] = blackburn_MSOA_2011['MSOACD'].map(MSOA_2011.set_index('MSOACD')['geometry']).fillna(blackburn_MSOA_2011['geometry'])
blackburn_MSOA_2011= blackburn_MSOA_2011.set_geometry('geometry')

oldham_MSOA_2011 = oldham_2011.groupby(['MSOACD','year'])[['white','asian','black','other']].sum().reset_index().merge(MSOA_2021[['MSOACD','geometry']], on='MSOACD', how= 'left')
oldham_MSOA_2011['geometry'] = oldham_MSOA_2011['MSOACD'].map(MSOA_2011.set_index('MSOACD')['geometry']).fillna(oldham_MSOA_2011['geometry'])
oldham_MSOA_2011= oldham_MSOA_2011.set_geometry('geometry')

pendle_MSOA_2011 = pendle_2011.groupby(['MSOACD','year'])[['white','asian','black','other']].sum().reset_index().merge(MSOA_2021[['MSOACD','geometry']], on='MSOACD', how= 'left')
pendle_MSOA_2011['geometry'] = pendle_MSOA_2011['MSOACD'].map(MSOA_2011.set_index('MSOACD')['geometry']).fillna(pendle_MSOA_2011['geometry'])
pendle_MSOA_2011= pendle_MSOA_2011.set_geometry('geometry')

london_MSOA_2011 = london_2011.groupby(['MSOACD','LADNM','year'])[['white','asian','black','other']].sum().reset_index().merge(MSOA_2021[['MSOACD','geometry']], on='MSOACD', how= 'left')
london_MSOA_2011['geometry'] = london_MSOA_2011['MSOACD'].map(MSOA_2011.set_index('MSOACD')['geometry']).fillna(london_MSOA_2011['geometry'])
london_MSOA_2011= london_MSOA_2011.set_geometry('geometry')

In [39]:
col_order = ['year', 'MSOACD', 'MSOA_simpson',
             'white', 'asian', 'black', 'other', 'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction', 'other_fraction', 'geometry']

# Creating LMSOA_simpson and fraction column
birmingham_MSOA_2011['total_pop'] = (birmingham_MSOA_2011['white'] + birmingham_MSOA_2011['asian'] +
                                     birmingham_MSOA_2011['black'] + birmingham_MSOA_2011['other'])
for ethnicity in ethnicity_list:
    birmingham_MSOA_2011[f'{ethnicity}_fraction'] = round(birmingham_MSOA_2011[ethnicity]/birmingham_MSOA_2011['total_pop'],3)
MSOA_simpson = []
for MSOA in birmingham_MSOA_2011['MSOACD']:
    df = birmingham_MSOA_2011[birmingham_MSOA_2011['MSOACD']== MSOA]
    birmingham_MSOA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))
    MSOA_simpson.append(birmingham_MSOA_simpson[0])

# Reordering the columns
birmingham_MSOA_2011['MSOA_simpson']= MSOA_simpson
birmingham_MSOA_2011 = birmingham_MSOA_2011[col_order]

# Creating LMSOA_simpson and fraction column
leicester_MSOA_2011['total_pop'] = (leicester_MSOA_2011['white'] + leicester_MSOA_2011['asian'] +
                                    leicester_MSOA_2011['black'] + leicester_MSOA_2011['other'])
for ethnicity in ethnicity_list:
    leicester_MSOA_2011[f'{ethnicity}_fraction'] = round(leicester_MSOA_2011[ethnicity]/leicester_MSOA_2011['total_pop'],3)
MSOA_simpson = []
for MSOA in leicester_MSOA_2011['MSOACD']:
    df = leicester_MSOA_2011[leicester_MSOA_2011['MSOACD']== MSOA]
    leicester_MSOA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))
    MSOA_simpson.append(leicester_MSOA_simpson[0])

# Reordering the columns
leicester_MSOA_2011['MSOA_simpson']= MSOA_simpson
leicester_MSOA_2011 = leicester_MSOA_2011[col_order]

# Creating LMSOA_simpson and fraction column
bradford_MSOA_2011['total_pop'] = (bradford_MSOA_2011['white'] + bradford_MSOA_2011['asian'] +
                                   bradford_MSOA_2011['black'] + bradford_MSOA_2011['other'])
for ethnicity in ethnicity_list:
    bradford_MSOA_2011[f'{ethnicity}_fraction'] = round(bradford_MSOA_2011[ethnicity]/bradford_MSOA_2011['total_pop'],3)
MSOA_simpson = []
for MSOA in bradford_MSOA_2011['MSOACD']:
    df = bradford_MSOA_2011[bradford_MSOA_2011['MSOACD']== MSOA]
    bradford_MSOA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))
    MSOA_simpson.append(bradford_MSOA_simpson[0])

# Reordering the columns
bradford_MSOA_2011['MSOA_simpson']= MSOA_simpson
bradford_MSOA_2011 = bradford_MSOA_2011[col_order]

# Creating LMSOA_simpson and fraction column
blackburn_MSOA_2011['total_pop'] = (blackburn_MSOA_2011['white'] + blackburn_MSOA_2011['asian'] +
                                    blackburn_MSOA_2011['black'] + blackburn_MSOA_2011['other'])
for ethnicity in ethnicity_list:
    blackburn_MSOA_2011[f'{ethnicity}_fraction'] = round(blackburn_MSOA_2011[ethnicity]/blackburn_MSOA_2011['total_pop'],3)
MSOA_simpson = []
for MSOA in blackburn_MSOA_2011['MSOACD']:
    df = blackburn_MSOA_2011[blackburn_MSOA_2011['MSOACD']== MSOA]
    blackburn_MSOA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))
    MSOA_simpson.append(blackburn_MSOA_simpson[0])

# Reordering the columns
blackburn_MSOA_2011['MSOA_simpson']= MSOA_simpson
blackburn_MSOA_2011 = blackburn_MSOA_2011[col_order]

# Creating LMSOA_simpson and fraction column
oldham_MSOA_2011['total_pop'] = (oldham_MSOA_2011['white'] + oldham_MSOA_2011['asian'] +
                                 oldham_MSOA_2011['black'] + oldham_MSOA_2011['other'])
for ethnicity in ethnicity_list:
    oldham_MSOA_2011[f'{ethnicity}_fraction'] = round(oldham_MSOA_2011[ethnicity]/oldham_MSOA_2011['total_pop'],3)
MSOA_simpson = []
for MSOA in oldham_MSOA_2011['MSOACD']:
    df = oldham_MSOA_2011[oldham_MSOA_2011['MSOACD']== MSOA]
    oldham_MSOA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))
    MSOA_simpson.append(oldham_MSOA_simpson[0])

# Reordering the columns
oldham_MSOA_2011['MSOA_simpson']= MSOA_simpson
oldham_MSOA_2011 = oldham_MSOA_2011[col_order]

# Creating LMSOA_simpson and fraction column
pendle_MSOA_2011['total_pop'] = (pendle_MSOA_2011['white'] + pendle_MSOA_2011['asian'] +
                                 pendle_MSOA_2011['black'] + pendle_MSOA_2011['other'])
for ethnicity in ethnicity_list:
    pendle_MSOA_2011[f'{ethnicity}_fraction'] = round(pendle_MSOA_2011[ethnicity]/pendle_MSOA_2011['total_pop'],3)
MSOA_simpson = []
for MSOA in pendle_MSOA_2011['MSOACD']:
    df = pendle_MSOA_2011[pendle_MSOA_2011['MSOACD']== MSOA]
    pendle_MSOA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))
    MSOA_simpson.append(pendle_MSOA_simpson[0])

# Reordering the columns
pendle_MSOA_2011['MSOA_simpson']= MSOA_simpson
pendle_MSOA_2011 = pendle_MSOA_2011[col_order]

# Creating LMSOA_simpson and fraction column
london_MSOA_2011['total_pop'] = (london_MSOA_2011['white'] + london_MSOA_2011['asian'] +
                                 london_MSOA_2011['black'] + london_MSOA_2011['other'])
for ethnicity in ethnicity_list:
    london_MSOA_2011[f'{ethnicity}_fraction'] = round(london_MSOA_2011[ethnicity]/london_MSOA_2011['total_pop'],3)
MSOA_simpson = []
for MSOA in london_MSOA_2011['MSOACD']:
    df = london_MSOA_2011[london_MSOA_2011['MSOACD']== MSOA]
    london_MSOA_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))
    MSOA_simpson.append(london_MSOA_simpson[0])

# Reordering the columns
london_MSOA_2011['MSOA_simpson']= MSOA_simpson
col_order = ['year', 'MSOACD','LADNM', 'MSOA_simpson',
             'white', 'asian', 'black', 'other', 'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction', 'other_fraction', 'geometry']
london_MSOA_2011 = london_MSOA_2011[col_order]


## LAD

In [40]:
LAD_2011 = gpd.read_file('preprocessed files/2011/LAD_2011.shp')
LAD_2021 = gpd.read_file('preprocessed files/2021/LAD_2021.shp')

In [41]:
london_LAD_2011 = london_2011.groupby(['LADCD','LADNM','year'])[['white','asian','black','other']].sum().reset_index().merge(LAD_2021[['LADCD','geometry']], on='LADCD', how= 'left')
london_LAD_2011['geometry'] = london_LAD_2011['LADCD'].map(LAD_2011.set_index('LADCD')['geometry']).fillna(london_LAD_2011['geometry'])
london_LAD_2011= london_LAD_2011.set_geometry('geometry')

In [42]:

# Creating LAD_simpson and fraction column
london_LAD_2011['total_pop'] = (london_LAD_2011['white'] + london_LAD_2011['asian'] +
                                london_LAD_2011['black'] + london_LAD_2011['other'])
# Creating LAD_simpson and fraction column
for ethnicity in ethnicity_list:
    london_LAD_2011[f'{ethnicity}_fraction'] = round(london_LAD_2011[ethnicity]/london_LAD_2011['total_pop'],3)
LAD_simpson = []
for LAD in london_LAD_2011['LADCD']:
    df = london_LAD_2011[london_LAD_2011['LADCD']== LAD]
    london_LAD_simpson = simpson(df[['white','asian','black','other']].reset_index(drop= True))
    LAD_simpson.append(london_LAD_simpson[0])

# Reordering the columns
london_LAD_2011['LAD_simpson']= LAD_simpson
col_order = ['year', 'LADCD','LADNM', 'LAD_simpson',
             'white', 'asian', 'black', 'other', 'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction', 'other_fraction', 'geometry']
london_LAD_2011 = london_LAD_2011[col_order]
# london_LAD_2011


# City Borders

## Birmingham

In [None]:
# # Creating shared border dictionary at OA level- Birmingham
# birmingham_OA_borders_2011 = {}

# for idx1, row1 in birmingham_2011.set_index('OACD').iterrows():
#     borders = []
#     for idx2, row2 in birmingham_2011.set_index('OACD').iterrows():
#         if idx1 != idx2:
#             if row1.geometry.intersects(row2.geometry):
#                 borders.append(idx2)
#     birmingham_OA_borders_2011[idx1] = borders
    
# with open('preprocessed files/2011/birmingham_OA_borders_2011.pkl', 'wb') as f:
#     pickle.dump(birmingham_OA_borders_2011, f)

In [None]:
# Creating shared border dictionary at LSOA level- Birmingham
birmingham_LSOA_borders_2011 = {}

for idx1, row1 in birmingham_LSOA_2011.set_index('LSOACD').iterrows():
    borders = []
    for idx2, row2 in birmingham_LSOA_2011.set_index('LSOACD').iterrows():
        if idx1 != idx2:
            if row1.geometry.intersects(row2.geometry):
                borders.append(idx2)
    birmingham_LSOA_borders_2011[idx1] = borders

with open('preprocessed files/2011/birmingham_LSOA_borders_2011.pkl', 'wb') as f:
    pickle.dump(birmingham_LSOA_borders_2011, f)

In [None]:
# Creating shared border dictionary at LSOA_OA level- Birmingham
birmingham_LSOA_OA_borders_2011 = {}
for lsoa in birmingham_2011['LSOACD'].unique():
    lsoa_df = birmingham_2011[birmingham_2011['LSOACD'] == lsoa]
    oa_borders = {}
    for idx1, row1 in lsoa_df.set_index('OACD').iterrows():
        borders = []
        for idx2, row2 in lsoa_df.set_index('OACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        oa_borders[idx1] = borders
    birmingham_LSOA_OA_borders_2011[lsoa] = oa_borders
with open('preprocessed files/2011/birmingham_LSOA_OA_borders_2011.pkl', 'wb') as f:
    pickle.dump(birmingham_LSOA_OA_borders_2011, f)

In [None]:
# Adding moran index to the city LSOA dataset- Birmingham
birmingham_LSOA_OA_moran = {}
for lsoa in birmingham_2011['LSOACD'].unique():
    lsoa_df = birmingham_2011[birmingham_2011['LSOACD'] == lsoa]
    OA_moran = moran(lsoa_df[['OACD','white','asian','black','other']].set_index('OACD'),birmingham_LSOA_OA_borders_2011[lsoa])
    birmingham_LSOA_OA_moran[lsoa] = {'OA_moran':OA_moran}

flat_dict = [{'LSOACD': key,
              'white_moran': value['OA_moran']['white'],
              'asian_moran': value['OA_moran']['asian'], 
              'black_moran': value['OA_moran']['black'],
              'other_moran': value['OA_moran']['other'], 
              } for key, value in birmingham_LSOA_OA_moran.items()]

birmingham_LSOA_OA_moran_2011 = pd.DataFrame(flat_dict)

birmingham_LSOA_2011 = birmingham_LSOA_2011.merge(birmingham_LSOA_OA_moran_2011[['LSOACD','white_moran','asian_moran']], 
                                                  on= 'LSOACD', how= 'left')
col_order = ['year', 'LSOACD', 'LSOA_simpson','white_moran', 'asian_moran', 'white', 'asian', 'black', 'other',
       'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction',
       'other_fraction', 'geometry']

birmingham_LSOA_2011=birmingham_LSOA_2011[col_order]

In [None]:
# Creating shared border dictionary at MSOA_OA level- Birmingham
birmingham_MSOA_OA_borders_2011 = {}
for msoa in birmingham_2011['MSOACD'].unique():
    msoa_df = birmingham_2011[birmingham_2011['MSOACD'] == msoa]
    lsoa_borders = {}
    for idx1, row1 in msoa_df.set_index('OACD').iterrows():
        borders = []
        for idx2, row2 in msoa_df.set_index('OACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        lsoa_borders[idx1] = borders
    birmingham_MSOA_OA_borders_2011[msoa] = lsoa_borders
with open('preprocessed files/2011/birmingham_MSOA_OA_borders_2011.pkl', 'wb') as f:
    pickle.dump(birmingham_MSOA_OA_borders_2011, f)

In [None]:
# Creating shared border dictionary at MSOA_LSOA level- Birmingham
birmingham_MSOA_LSOA = pd.merge(birmingham_LSOA_2011,birmingham_2011.groupby('LSOACD').agg({'MSOACD': 'first'}).reset_index(), on = 'LSOACD', how = 'left')
birmingham_MSOA_LSOA_borders_2011 = {}
for msoa in birmingham_MSOA_LSOA['MSOACD'].unique():
    msoa_df = birmingham_MSOA_LSOA[birmingham_MSOA_LSOA['MSOACD'] == msoa]
    lsoa_borders = {}
    for idx1, row1 in msoa_df.set_index('LSOACD').iterrows():
        borders = []
        for idx2, row2 in msoa_df.set_index('LSOACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        lsoa_borders[idx1] = borders
    birmingham_MSOA_LSOA_borders_2011[msoa] = lsoa_borders
with open('preprocessed files/2011/birmingham_MSOA_LSOA_borders_2011.pkl', 'wb') as f:
    pickle.dump(birmingham_MSOA_LSOA_borders_2011, f)

In [None]:
# MSOA_OA based moran- Birmingham
birmingham_MSOA_OA_moran = {}
for msoa in birmingham_2011['MSOACD'].unique():
    msoa_df = birmingham_2011[birmingham_2011['MSOACD'] == msoa]
    OA_moran = moran(msoa_df[['OACD','white','asian','black','other']].set_index('OACD'),birmingham_MSOA_OA_borders_2011[msoa])
    birmingham_MSOA_OA_moran[msoa] = {'OA_moran':OA_moran}

flat_dict = [{'MSOACD': key,
              'white_moran_OA': value['OA_moran']['white'],
              'asian_moran_OA': value['OA_moran']['asian'], 
              'black_moran_OA': value['OA_moran']['black'],
              'other_moran_OA': value['OA_moran']['other'], 
              } for key, value in birmingham_MSOA_OA_moran.items()]

birmingham_MSOA_OA_moran_2011 = pd.DataFrame(flat_dict)

birmingham_MSOA_2011 = birmingham_MSOA_2011.merge(birmingham_MSOA_OA_moran_2011[['MSOACD','white_moran_OA','asian_moran_OA']], 
                                                  on= 'MSOACD', how= 'left')
# MSOA_LSOA based moran- Birmingham
birmingham_MSOA_LSOA_moran = {}
for msoa in birmingham_2011['MSOACD'].unique():
    msoa_df = birmingham_2011[birmingham_2011['MSOACD'] == msoa]
    msoa_df = msoa_df.groupby('LSOACD')[['white','asian','black','other']].sum().reset_index().set_index('LSOACD')
    LSOA_moran = moran(msoa_df,birmingham_MSOA_LSOA_borders_2011[msoa])
    birmingham_MSOA_LSOA_moran[msoa] = {'LSOA_moran':LSOA_moran}

flat_dict = [{'MSOACD': key,
              'white_moran_LSOA': value['LSOA_moran']['white'],
              'asian_moran_LSOA': value['LSOA_moran']['asian'], 
              'black_moran_LSOA': value['LSOA_moran']['black'],
              'other_moran_LSOA': value['LSOA_moran']['other'], 
              } for key, value in birmingham_MSOA_LSOA_moran.items()]

birmingham_MSOA_LSOA_moran_2011 = pd.DataFrame(flat_dict)

birmingham_MSOA_2011 = birmingham_MSOA_2011.merge(birmingham_MSOA_LSOA_moran_2011[['MSOACD','white_moran_LSOA','asian_moran_LSOA']], 
                                                  on= 'MSOACD', how= 'left')
# Reordering the columns
col_order = ['year', 'MSOACD', 'MSOA_simpson','white_moran_OA', 'asian_moran_OA', 'white_moran_LSOA', 'asian_moran_LSOA', 'white', 'asian', 'black', 'other',
       'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction',
       'other_fraction', 'geometry']

birmingham_MSOA_2011=birmingham_MSOA_2011[col_order]

## Leicester

In [None]:
# # Creating shared border dictionary at OA level- Leicester
# leicester_OA_borders_2011 = {}

# for idx1, row1 in leicester_2011.set_index('OACD').iterrows():
#     borders = []
#     for idx2, row2 in leicester_2011.set_index('OACD').iterrows():
#         if idx1 != idx2:
#             if row1.geometry.intersects(row2.geometry):
#                 borders.append(idx2)
#     leicester_OA_borders_2011[idx1] = borders
    
# with open('preprocessed files/2011/leicester_OA_borders_2011.pkl', 'wb') as f:
#     pickle.dump(leicester_OA_borders_2011, f)

In [None]:
# Creating shared border dictionary at LSOA level- Leicester
leicester_LSOA_borders_2011 = {}

for idx1, row1 in leicester_LSOA_2011.set_index('LSOACD').iterrows():
    borders = []
    for idx2, row2 in leicester_LSOA_2011.set_index('LSOACD').iterrows():
        if idx1 != idx2:
            if row1.geometry.intersects(row2.geometry):
                borders.append(idx2)
    leicester_LSOA_borders_2011[idx1] = borders

with open('preprocessed files/2011/leicester_LSOA_borders_2011.pkl', 'wb') as f:
    pickle.dump(leicester_LSOA_borders_2011, f)

In [None]:
# Creating shared border dictionary at LSOA_OA level- Leicester
leicester_LSOA_OA_borders_2011 = {}
for lsoa in leicester_2011['LSOACD'].unique():
    lsoa_df = leicester_2011[leicester_2011['LSOACD'] == lsoa]
    oa_borders = {}
    for idx1, row1 in lsoa_df.set_index('OACD').iterrows():
        borders = []
        for idx2, row2 in lsoa_df.set_index('OACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        oa_borders[idx1] = borders
    leicester_LSOA_OA_borders_2011[lsoa] = oa_borders
with open('preprocessed files/2011/leicester_LSOA_OA_borders_2011.pkl', 'wb') as f:
    pickle.dump(leicester_LSOA_OA_borders_2011, f)

In [None]:
# Adding moran index to the city LSOA dataset- Leicester
leicester_LSOA_OA_moran = {}
for lsoa in leicester_2011['LSOACD'].unique():
    lsoa_df = leicester_2011[leicester_2011['LSOACD'] == lsoa]
    OA_moran = moran(lsoa_df[['OACD','white','asian','black','other']].set_index('OACD'),leicester_LSOA_OA_borders_2011[lsoa])
    leicester_LSOA_OA_moran[lsoa] = {'OA_moran':OA_moran}

flat_dict = [{'LSOACD': key,
              'white_moran': value['OA_moran']['white'],
              'asian_moran': value['OA_moran']['asian'], 
              'black_moran': value['OA_moran']['black'],
              'other_moran': value['OA_moran']['other'], 
              } for key, value in leicester_LSOA_OA_moran.items()]

leicester_LSOA_OA_moran_2011 = pd.DataFrame(flat_dict)

leicester_LSOA_2011 = leicester_LSOA_2011.merge(leicester_LSOA_OA_moran_2011[['LSOACD','white_moran','asian_moran']], 
                                                  on= 'LSOACD', how= 'left')
col_order = ['year', 'LSOACD', 'LSOA_simpson','white_moran', 'asian_moran', 'white', 'asian', 'black', 'other',
       'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction',
       'other_fraction', 'geometry']

leicester_LSOA_2011=leicester_LSOA_2011[col_order]

In [None]:
# Creating shared border dictionary at MSOA_OA level- Leicester
leicester_MSOA_OA_borders_2011 = {}
for msoa in leicester_2011['MSOACD'].unique():
    msoa_df = leicester_2011[leicester_2011['MSOACD'] == msoa]
    lsoa_borders = {}
    for idx1, row1 in msoa_df.set_index('OACD').iterrows():
        borders = []
        for idx2, row2 in msoa_df.set_index('OACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        lsoa_borders[idx1] = borders
    leicester_MSOA_OA_borders_2011[msoa] = lsoa_borders
with open('preprocessed files/2011/leicester_MSOA_OA_borders_2011.pkl', 'wb') as f:
    pickle.dump(leicester_MSOA_OA_borders_2011, f)

In [None]:
# Creating shared border dictionary at MSOA_LSOA level- Leicester
leicester_MSOA_LSOA = pd.merge(leicester_LSOA_2011,leicester_2011.groupby('LSOACD').agg({'MSOACD': 'first'}).reset_index(), on = 'LSOACD', how = 'left')
leicester_MSOA_LSOA_borders_2011 = {}
for msoa in leicester_MSOA_LSOA['MSOACD'].unique():
    msoa_df = leicester_MSOA_LSOA[leicester_MSOA_LSOA['MSOACD'] == msoa]
    lsoa_borders = {}
    for idx1, row1 in msoa_df.set_index('LSOACD').iterrows():
        borders = []
        for idx2, row2 in msoa_df.set_index('LSOACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        lsoa_borders[idx1] = borders
    leicester_MSOA_LSOA_borders_2011[msoa] = lsoa_borders
with open('preprocessed files/2011/leicester_MSOA_LSOA_borders_2011.pkl', 'wb') as f:
    pickle.dump(leicester_MSOA_LSOA_borders_2011, f)

In [None]:
# MSOA_OA based moran- Leicester
leicester_MSOA_OA_moran = {}
for msoa in leicester_2011['MSOACD'].unique():
    msoa_df = leicester_2011[leicester_2011['MSOACD'] == msoa]
    OA_moran = moran(msoa_df[['OACD','white','asian','black','other']].set_index('OACD'),leicester_MSOA_OA_borders_2011[msoa])
    leicester_MSOA_OA_moran[msoa] = {'OA_moran':OA_moran}

flat_dict = [{'MSOACD': key,
              'white_moran_OA': value['OA_moran']['white'],
              'asian_moran_OA': value['OA_moran']['asian'], 
              'black_moran_OA': value['OA_moran']['black'],
              'other_moran_OA': value['OA_moran']['other'], 
              } for key, value in leicester_MSOA_OA_moran.items()]

leicester_MSOA_OA_moran_2011 = pd.DataFrame(flat_dict)

leicester_MSOA_2011 = leicester_MSOA_2011.merge(leicester_MSOA_OA_moran_2011[['MSOACD','white_moran_OA','asian_moran_OA']], 
                                                  on= 'MSOACD', how= 'left')
# MSOA_LSOA based moran- Leicester
leicester_MSOA_LSOA_moran = {}
for msoa in leicester_2011['MSOACD'].unique():
    msoa_df = leicester_2011[leicester_2011['MSOACD'] == msoa]
    msoa_df = msoa_df.groupby('LSOACD')[['white','asian','black','other']].sum().reset_index().set_index('LSOACD')
    LSOA_moran = moran(msoa_df,leicester_MSOA_LSOA_borders_2011[msoa])
    leicester_MSOA_LSOA_moran[msoa] = {'LSOA_moran':LSOA_moran}

flat_dict = [{'MSOACD': key,
              'white_moran_LSOA': value['LSOA_moran']['white'],
              'asian_moran_LSOA': value['LSOA_moran']['asian'], 
              'black_moran_LSOA': value['LSOA_moran']['black'],
              'other_moran_LSOA': value['LSOA_moran']['other'], 
              } for key, value in leicester_MSOA_LSOA_moran.items()]

leicester_MSOA_LSOA_moran_2011 = pd.DataFrame(flat_dict)

leicester_MSOA_2011 = leicester_MSOA_2011.merge(leicester_MSOA_LSOA_moran_2011[['MSOACD','white_moran_LSOA','asian_moran_LSOA']], 
                                                  on= 'MSOACD', how= 'left')
# Reordering the columns
col_order = ['year', 'MSOACD', 'MSOA_simpson','white_moran_OA', 'asian_moran_OA', 'white_moran_LSOA', 'asian_moran_LSOA', 'white', 'asian', 'black', 'other',
       'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction',
       'other_fraction', 'geometry']

leicester_MSOA_2011=leicester_MSOA_2011[col_order]

## Bradford

In [None]:
# # Creating shared border dictionary at OA level- Bradford
# bradford_OA_borders_2011 = {}

# for idx1, row1 in bradford_2011.set_index('OACD').iterrows():
#     borders = []
#     for idx2, row2 in bradford_2011.set_index('OACD').iterrows():
#         if idx1 != idx2:
#             if row1.geometry.intersects(row2.geometry):
#                 borders.append(idx2)
#     bradford_OA_borders_2011[idx1] = borders
    
# with open('preprocessed files/2011/bradford_OA_borders_2011.pkl', 'wb') as f:
#     pickle.dump(bradford_OA_borders_2011, f)

In [None]:
# Creating shared border dictionary at LSOA level- Bradford
bradford_LSOA_borders_2011 = {}

for idx1, row1 in bradford_LSOA_2011.set_index('LSOACD').iterrows():
    borders = []
    for idx2, row2 in bradford_LSOA_2011.set_index('LSOACD').iterrows():
        if idx1 != idx2:
            if row1.geometry.intersects(row2.geometry):
                borders.append(idx2)
    bradford_LSOA_borders_2011[idx1] = borders

with open('preprocessed files/2011/bradford_LSOA_borders_2011.pkl', 'wb') as f:
    pickle.dump(bradford_LSOA_borders_2011, f)

In [None]:
# Creating shared border dictionary at LSOA_OA level- Bradford
bradford_LSOA_OA_borders_2011 = {}
for lsoa in bradford_2011['LSOACD'].unique():
    lsoa_df = bradford_2011[bradford_2011['LSOACD'] == lsoa]
    oa_borders = {}
    for idx1, row1 in lsoa_df.set_index('OACD').iterrows():
        borders = []
        for idx2, row2 in lsoa_df.set_index('OACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        oa_borders[idx1] = borders
    bradford_LSOA_OA_borders_2011[lsoa] = oa_borders
with open('preprocessed files/2011/bradford_LSOA_OA_borders_2011.pkl', 'wb') as f:
    pickle.dump(bradford_LSOA_OA_borders_2011, f)

In [None]:
# Adding moran index to the city LSOA dataset- Bradford
bradford_LSOA_OA_moran = {}
for lsoa in bradford_2011['LSOACD'].unique():
    lsoa_df = bradford_2011[bradford_2011['LSOACD'] == lsoa]
    OA_moran = moran(lsoa_df[['OACD','white','asian','black','other']].set_index('OACD'),bradford_LSOA_OA_borders_2011[lsoa])
    bradford_LSOA_OA_moran[lsoa] = {'OA_moran':OA_moran}

flat_dict = [{'LSOACD': key,
              'white_moran': value['OA_moran']['white'],
              'asian_moran': value['OA_moran']['asian'], 
              'black_moran': value['OA_moran']['black'],
              'other_moran': value['OA_moran']['other'], 
              } for key, value in bradford_LSOA_OA_moran.items()]

bradford_LSOA_OA_moran_2011 = pd.DataFrame(flat_dict)

bradford_LSOA_2011 = bradford_LSOA_2011.merge(bradford_LSOA_OA_moran_2011[['LSOACD','white_moran','asian_moran']], 
                                                  on= 'LSOACD', how= 'left')
col_order = ['year', 'LSOACD', 'LSOA_simpson','white_moran', 'asian_moran', 'white', 'asian', 'black', 'other',
       'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction',
       'other_fraction', 'geometry']

bradford_LSOA_2011=bradford_LSOA_2011[col_order]

  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)


In [None]:
# Creating shared border dictionary at MSOA_OA level- Bradford
bradford_MSOA_OA_borders_2011 = {}
for msoa in bradford_2011['MSOACD'].unique():
    msoa_df = bradford_2011[bradford_2011['MSOACD'] == msoa]
    lsoa_borders = {}
    for idx1, row1 in msoa_df.set_index('OACD').iterrows():
        borders = []
        for idx2, row2 in msoa_df.set_index('OACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        lsoa_borders[idx1] = borders
    bradford_MSOA_OA_borders_2011[msoa] = lsoa_borders
with open('preprocessed files/2011/bradford_MSOA_OA_borders_2011.pkl', 'wb') as f:
    pickle.dump(bradford_MSOA_OA_borders_2011, f)

In [None]:
# Creating shared border dictionary at MSOA_LSOA level- Bradford
bradford_MSOA_LSOA = pd.merge(bradford_LSOA_2011,bradford_2011.groupby('LSOACD').agg({'MSOACD': 'first'}).reset_index(), on = 'LSOACD', how = 'left')
bradford_MSOA_LSOA_borders_2011 = {}
for msoa in bradford_MSOA_LSOA['MSOACD'].unique():
    msoa_df = bradford_MSOA_LSOA[bradford_MSOA_LSOA['MSOACD'] == msoa]
    lsoa_borders = {}
    for idx1, row1 in msoa_df.set_index('LSOACD').iterrows():
        borders = []
        for idx2, row2 in msoa_df.set_index('LSOACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        lsoa_borders[idx1] = borders
    bradford_MSOA_LSOA_borders_2011[msoa] = lsoa_borders
with open('preprocessed files/2011/bradford_MSOA_LSOA_borders_2011.pkl', 'wb') as f:
    pickle.dump(bradford_MSOA_LSOA_borders_2011, f)

In [None]:
# MSOA_OA based moran- Bradford
bradford_MSOA_OA_moran = {}
for msoa in bradford_2011['MSOACD'].unique():
    msoa_df = bradford_2011[bradford_2011['MSOACD'] == msoa]
    OA_moran = moran(msoa_df[['OACD','white','asian','black','other']].set_index('OACD'),bradford_MSOA_OA_borders_2011[msoa])
    bradford_MSOA_OA_moran[msoa] = {'OA_moran':OA_moran}

flat_dict = [{'MSOACD': key,
              'white_moran_OA': value['OA_moran']['white'],
              'asian_moran_OA': value['OA_moran']['asian'], 
              'black_moran_OA': value['OA_moran']['black'],
              'other_moran_OA': value['OA_moran']['other'], 
              } for key, value in bradford_MSOA_OA_moran.items()]

bradford_MSOA_OA_moran_2011 = pd.DataFrame(flat_dict)

bradford_MSOA_2011 = bradford_MSOA_2011.merge(bradford_MSOA_OA_moran_2011[['MSOACD','white_moran_OA','asian_moran_OA']], 
                                                  on= 'MSOACD', how= 'left')
# MSOA_LSOA based moran- Bradford
bradford_MSOA_LSOA_moran = {}
for msoa in bradford_2011['MSOACD'].unique():
    msoa_df = bradford_2011[bradford_2011['MSOACD'] == msoa]
    msoa_df = msoa_df.groupby('LSOACD')[['white','asian','black','other']].sum().reset_index().set_index('LSOACD')
    LSOA_moran = moran(msoa_df,bradford_MSOA_LSOA_borders_2011[msoa])
    bradford_MSOA_LSOA_moran[msoa] = {'LSOA_moran':LSOA_moran}

flat_dict = [{'MSOACD': key,
              'white_moran_LSOA': value['LSOA_moran']['white'],
              'asian_moran_LSOA': value['LSOA_moran']['asian'], 
              'black_moran_LSOA': value['LSOA_moran']['black'],
              'other_moran_LSOA': value['LSOA_moran']['other'], 
              } for key, value in bradford_MSOA_LSOA_moran.items()]

bradford_MSOA_LSOA_moran_2011 = pd.DataFrame(flat_dict)

bradford_MSOA_2011 = bradford_MSOA_2011.merge(bradford_MSOA_LSOA_moran_2011[['MSOACD','white_moran_LSOA','asian_moran_LSOA']], 
                                                  on= 'MSOACD', how= 'left')
# Reordering the columns
col_order = ['year', 'MSOACD', 'MSOA_simpson','white_moran_OA', 'asian_moran_OA', 'white_moran_LSOA', 'asian_moran_LSOA', 'white', 'asian', 'black', 'other',
       'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction',
       'other_fraction', 'geometry']

bradford_MSOA_2011=bradford_MSOA_2011[col_order]

## Blackburn

In [None]:
# # Creating shared border dictionary at OA level- Blackburn
# blackburn_OA_borders_2011 = {}

# for idx1, row1 in blackburn_2011.set_index('OACD').iterrows():
#     borders = []
#     for idx2, row2 in blackburn_2011.set_index('OACD').iterrows():
#         if idx1 != idx2:
#             if row1.geometry.intersects(row2.geometry):
#                 borders.append(idx2)
#     blackburn_OA_borders_2011[idx1] = borders
    
# with open('preprocessed files/2011/blackburn_OA_borders_2011.pkl', 'wb') as f:
#     pickle.dump(blackburn_OA_borders_2011, f)

In [None]:
# Creating shared border dictionary at LSOA level- Blackburn
blackburn_LSOA_borders_2011 = {}

for idx1, row1 in blackburn_LSOA_2011.set_index('LSOACD').iterrows():
    borders = []
    for idx2, row2 in blackburn_LSOA_2011.set_index('LSOACD').iterrows():
        if idx1 != idx2:
            if row1.geometry.intersects(row2.geometry):
                borders.append(idx2)
    blackburn_LSOA_borders_2011[idx1] = borders

with open('preprocessed files/2011/blackburn_LSOA_borders_2011.pkl', 'wb') as f:
    pickle.dump(blackburn_LSOA_borders_2011, f)

In [None]:
# Creating shared border dictionary at LSOA level- Blackburn
blackburn_LSOA_borders_2011 = {}

for idx1, row1 in blackburn_LSOA_2011.set_index('LSOACD').iterrows():
    borders = []
    for idx2, row2 in blackburn_LSOA_2011.set_index('LSOACD').iterrows():
        if idx1 != idx2:
            if row1.geometry.intersects(row2.geometry):
                borders.append(idx2)
    blackburn_LSOA_borders_2011[idx1] = borders

with open('preprocessed files/2011/blackburn_LSOA_borders_2011.pkl', 'wb') as f:
    pickle.dump(blackburn_LSOA_borders_2011, f)

In [None]:
# Creating shared border dictionary at LSOA_OA level- Blackburn
blackburn_LSOA_OA_borders_2011 = {}
for lsoa in blackburn_2011['LSOACD'].unique():
    lsoa_df = blackburn_2011[blackburn_2011['LSOACD'] == lsoa]
    oa_borders = {}
    for idx1, row1 in lsoa_df.set_index('OACD').iterrows():
        borders = []
        for idx2, row2 in lsoa_df.set_index('OACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        oa_borders[idx1] = borders
    blackburn_LSOA_OA_borders_2011[lsoa] = oa_borders
with open('preprocessed files/2011/blackburn_LSOA_OA_borders_2011.pkl', 'wb') as f:
    pickle.dump(blackburn_LSOA_OA_borders_2011, f)

In [None]:
# Adding moran index to the city LSOA dataset- Blackburn
blackburn_LSOA_OA_moran = {}
for lsoa in blackburn_2011['LSOACD'].unique():
    lsoa_df = blackburn_2011[blackburn_2011['LSOACD'] == lsoa]
    OA_moran = moran(lsoa_df[['OACD','white','asian','black','other']].set_index('OACD'),blackburn_LSOA_OA_borders_2011[lsoa])
    blackburn_LSOA_OA_moran[lsoa] = {'OA_moran':OA_moran}

flat_dict = [{'LSOACD': key,
              'white_moran': value['OA_moran']['white'],
              'asian_moran': value['OA_moran']['asian'], 
              'black_moran': value['OA_moran']['black'],
              'other_moran': value['OA_moran']['other'], 
              } for key, value in blackburn_LSOA_OA_moran.items()]

blackburn_LSOA_OA_moran_2011 = pd.DataFrame(flat_dict)

blackburn_LSOA_2011 = blackburn_LSOA_2011.merge(blackburn_LSOA_OA_moran_2011[['LSOACD','white_moran','asian_moran']], 
                                                  on= 'LSOACD', how= 'left')
col_order = ['year', 'LSOACD', 'LSOA_simpson','white_moran', 'asian_moran', 'white', 'asian', 'black', 'other',
       'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction',
       'other_fraction', 'geometry']

blackburn_LSOA_2011=blackburn_LSOA_2011[col_order]

  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)


In [None]:
# Creating shared border dictionary at MSOA_OA level- Blackburn
blackburn_MSOA_OA_borders_2011 = {}
for msoa in blackburn_2011['MSOACD'].unique():
    msoa_df = blackburn_2011[blackburn_2011['MSOACD'] == msoa]
    lsoa_borders = {}
    for idx1, row1 in msoa_df.set_index('OACD').iterrows():
        borders = []
        for idx2, row2 in msoa_df.set_index('OACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        lsoa_borders[idx1] = borders
    blackburn_MSOA_OA_borders_2011[msoa] = lsoa_borders
with open('preprocessed files/2011/blackburn_MSOA_OA_borders_2011.pkl', 'wb') as f:
    pickle.dump(blackburn_MSOA_OA_borders_2011, f)

In [None]:
# Creating shared border dictionary at MSOA_LSOA level- Blackburn
blackburn_MSOA_LSOA = pd.merge(blackburn_LSOA_2011,blackburn_2011.groupby('LSOACD').agg({'MSOACD': 'first'}).reset_index(), on = 'LSOACD', how = 'left')
blackburn_MSOA_LSOA_borders_2011 = {}
for msoa in blackburn_MSOA_LSOA['MSOACD'].unique():
    msoa_df = blackburn_MSOA_LSOA[blackburn_MSOA_LSOA['MSOACD'] == msoa]
    lsoa_borders = {}
    for idx1, row1 in msoa_df.set_index('LSOACD').iterrows():
        borders = []
        for idx2, row2 in msoa_df.set_index('LSOACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        lsoa_borders[idx1] = borders
    blackburn_MSOA_LSOA_borders_2011[msoa] = lsoa_borders
with open('preprocessed files/2011/blackburn_MSOA_LSOA_borders_2011.pkl', 'wb') as f:
    pickle.dump(blackburn_MSOA_LSOA_borders_2011, f)

In [None]:
# MSOA_OA based moran- Blackburn
blackburn_MSOA_OA_moran = {}
for msoa in blackburn_2011['MSOACD'].unique():
    msoa_df = blackburn_2011[blackburn_2011['MSOACD'] == msoa]
    OA_moran = moran(msoa_df[['OACD','white','asian','black','other']].set_index('OACD'),blackburn_MSOA_OA_borders_2011[msoa])
    blackburn_MSOA_OA_moran[msoa] = {'OA_moran':OA_moran}

flat_dict = [{'MSOACD': key,
              'white_moran_OA': value['OA_moran']['white'],
              'asian_moran_OA': value['OA_moran']['asian'], 
              'black_moran_OA': value['OA_moran']['black'],
              'other_moran_OA': value['OA_moran']['other'], 
              } for key, value in blackburn_MSOA_OA_moran.items()]

blackburn_MSOA_OA_moran_2011 = pd.DataFrame(flat_dict)

blackburn_MSOA_2011 = blackburn_MSOA_2011.merge(blackburn_MSOA_OA_moran_2011[['MSOACD','white_moran_OA','asian_moran_OA']], 
                                                  on= 'MSOACD', how= 'left')
# MSOA_LSOA based moran- Blackburn
blackburn_MSOA_LSOA_moran = {}
for msoa in blackburn_2011['MSOACD'].unique():
    msoa_df = blackburn_2011[blackburn_2011['MSOACD'] == msoa]
    msoa_df = msoa_df.groupby('LSOACD')[['white','asian','black','other']].sum().reset_index().set_index('LSOACD')
    LSOA_moran = moran(msoa_df,blackburn_MSOA_LSOA_borders_2011[msoa])
    blackburn_MSOA_LSOA_moran[msoa] = {'LSOA_moran':LSOA_moran}

flat_dict = [{'MSOACD': key,
              'white_moran_LSOA': value['LSOA_moran']['white'],
              'asian_moran_LSOA': value['LSOA_moran']['asian'], 
              'black_moran_LSOA': value['LSOA_moran']['black'],
              'other_moran_LSOA': value['LSOA_moran']['other'], 
              } for key, value in blackburn_MSOA_LSOA_moran.items()]

blackburn_MSOA_LSOA_moran_2011 = pd.DataFrame(flat_dict)

blackburn_MSOA_2011 = blackburn_MSOA_2011.merge(blackburn_MSOA_LSOA_moran_2011[['MSOACD','white_moran_LSOA','asian_moran_LSOA']], 
                                                  on= 'MSOACD', how= 'left')
# Reordering the columns
col_order = ['year', 'MSOACD', 'MSOA_simpson','white_moran_OA', 'asian_moran_OA', 'white_moran_LSOA', 'asian_moran_LSOA', 'white', 'asian', 'black', 'other',
       'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction',
       'other_fraction', 'geometry']

blackburn_MSOA_2011=blackburn_MSOA_2011[col_order]

## Oldham

In [None]:
# # Creating shared border dictionary at OA level- Oldham
# oldham_OA_borders_2011 = {}

# for idx1, row1 in oldham_2011.set_index('OACD').iterrows():
#     borders = []
#     for idx2, row2 in oldham_2011.set_index('OACD').iterrows():
#         if idx1 != idx2:
#             if row1.geometry.intersects(row2.geometry):
#                 borders.append(idx2)
#     oldham_OA_borders_2011[idx1] = borders
    
# with open('preprocessed files/2011/oldham_OA_borders_2011.pkl', 'wb') as f:
#     pickle.dump(oldham_OA_borders_2011, f)

In [None]:
# Creating shared border dictionary at LSOA level- Oldham
oldham_LSOA_borders_2011 = {}

for idx1, row1 in oldham_LSOA_2011.set_index('LSOACD').iterrows():
    borders = []
    for idx2, row2 in oldham_LSOA_2011.set_index('LSOACD').iterrows():
        if idx1 != idx2:
            if row1.geometry.intersects(row2.geometry):
                borders.append(idx2)
    oldham_LSOA_borders_2011[idx1] = borders

with open('preprocessed files/2011/oldham_LSOA_borders_2011.pkl', 'wb') as f:
    pickle.dump(oldham_LSOA_borders_2011, f)


In [None]:
# Creating shared border dictionary at LSOA_OA level- Oldham
oldham_LSOA_OA_borders_2011 = {}
for lsoa in oldham_2011['LSOACD'].unique():
    lsoa_df = oldham_2011[oldham_2011['LSOACD'] == lsoa]
    oa_borders = {}
    for idx1, row1 in lsoa_df.set_index('OACD').iterrows():
        borders = []
        for idx2, row2 in lsoa_df.set_index('OACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        oa_borders[idx1] = borders
    oldham_LSOA_OA_borders_2011[lsoa] = oa_borders
with open('preprocessed files/2011/oldham_LSOA_OA_borders_2011.pkl', 'wb') as f:
    pickle.dump(oldham_LSOA_OA_borders_2011, f)

In [None]:
# Adding moran index to the city LSOA dataset- Oldham
oldham_LSOA_OA_moran = {}
for lsoa in oldham_2011['LSOACD'].unique():
    lsoa_df = oldham_2011[oldham_2011['LSOACD'] == lsoa]
    OA_moran = moran(lsoa_df[['OACD','white','asian','black','other']].set_index('OACD'),oldham_LSOA_OA_borders_2011[lsoa])
    oldham_LSOA_OA_moran[lsoa] = {'OA_moran':OA_moran}

flat_dict = [{'LSOACD': key,
              'white_moran': value['OA_moran']['white'],
              'asian_moran': value['OA_moran']['asian'], 
              'black_moran': value['OA_moran']['black'],
              'other_moran': value['OA_moran']['other'], 
              } for key, value in oldham_LSOA_OA_moran.items()]

oldham_LSOA_OA_moran_2011 = pd.DataFrame(flat_dict)

oldham_LSOA_2011 = oldham_LSOA_2011.merge(oldham_LSOA_OA_moran_2011[['LSOACD','white_moran','asian_moran']], 
                                                  on= 'LSOACD', how= 'left')
col_order = ['year', 'LSOACD', 'LSOA_simpson','white_moran', 'asian_moran', 'white', 'asian', 'black', 'other',
       'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction',
       'other_fraction', 'geometry']

oldham_LSOA_2011=oldham_LSOA_2011[col_order]

  moran_results[col] = round(moran_numerator/moran_denominator, 3)


In [None]:
# Creating shared border dictionary at MSOA_OA level- Oldham
oldham_MSOA_OA_borders_2011 = {}
for msoa in oldham_2011['MSOACD'].unique():
    msoa_df = oldham_2011[oldham_2011['MSOACD'] == msoa]
    lsoa_borders = {}
    for idx1, row1 in msoa_df.set_index('OACD').iterrows():
        borders = []
        for idx2, row2 in msoa_df.set_index('OACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        lsoa_borders[idx1] = borders
    oldham_MSOA_OA_borders_2011[msoa] = lsoa_borders
with open('preprocessed files/2011/oldham_MSOA_OA_borders_2011.pkl', 'wb') as f:
    pickle.dump(oldham_MSOA_OA_borders_2011, f)

In [None]:
# Creating shared border dictionary at MSOA_LSOA level- Oldham
oldham_MSOA_LSOA = pd.merge(oldham_LSOA_2011,oldham_2011.groupby('LSOACD').agg({'MSOACD': 'first'}).reset_index(), on = 'LSOACD', how = 'left')
oldham_MSOA_LSOA_borders_2011 = {}
for msoa in oldham_MSOA_LSOA['MSOACD'].unique():
    msoa_df = oldham_MSOA_LSOA[oldham_MSOA_LSOA['MSOACD'] == msoa]
    lsoa_borders = {}
    for idx1, row1 in msoa_df.set_index('LSOACD').iterrows():
        borders = []
        for idx2, row2 in msoa_df.set_index('LSOACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        lsoa_borders[idx1] = borders
    oldham_MSOA_LSOA_borders_2011[msoa] = lsoa_borders
with open('preprocessed files/2011/oldham_MSOA_LSOA_borders_2011.pkl', 'wb') as f:
    pickle.dump(oldham_MSOA_LSOA_borders_2011, f)


In [None]:
# MSOA_OA based moran- Oldham
oldham_MSOA_OA_moran = {}
for msoa in oldham_2011['MSOACD'].unique():
    msoa_df = oldham_2011[oldham_2011['MSOACD'] == msoa]
    OA_moran = moran(msoa_df[['OACD','white','asian','black','other']].set_index('OACD'),oldham_MSOA_OA_borders_2011[msoa])
    oldham_MSOA_OA_moran[msoa] = {'OA_moran':OA_moran}

flat_dict = [{'MSOACD': key,
              'white_moran_OA': value['OA_moran']['white'],
              'asian_moran_OA': value['OA_moran']['asian'], 
              'black_moran_OA': value['OA_moran']['black'],
              'other_moran_OA': value['OA_moran']['other'], 
              } for key, value in oldham_MSOA_OA_moran.items()]

oldham_MSOA_OA_moran_2011 = pd.DataFrame(flat_dict)

oldham_MSOA_2011 = oldham_MSOA_2011.merge(oldham_MSOA_OA_moran_2011[['MSOACD','white_moran_OA','asian_moran_OA']], 
                                                  on= 'MSOACD', how= 'left')

# MSOA_LSOA based moran- Oldham
oldham_MSOA_LSOA_moran = {}
for msoa in oldham_2011['MSOACD'].unique():
    msoa_df = oldham_2011[oldham_2011['MSOACD'] == msoa]
    msoa_df = msoa_df.groupby('LSOACD')[['white','asian','black','other']].sum().reset_index().set_index('LSOACD')
    LSOA_moran = moran(msoa_df,oldham_MSOA_LSOA_borders_2011[msoa])
    oldham_MSOA_LSOA_moran[msoa] = {'LSOA_moran':LSOA_moran}

flat_dict = [{'MSOACD': key,
              'white_moran_LSOA': value['LSOA_moran']['white'],
              'asian_moran_LSOA': value['LSOA_moran']['asian'], 
              'black_moran_LSOA': value['LSOA_moran']['black'],
              'other_moran_LSOA': value['LSOA_moran']['other'], 
              } for key, value in oldham_MSOA_LSOA_moran.items()]

oldham_MSOA_LSOA_moran_2011 = pd.DataFrame(flat_dict)

oldham_MSOA_2011 = oldham_MSOA_2011.merge(oldham_MSOA_LSOA_moran_2011[['MSOACD','white_moran_LSOA','asian_moran_LSOA']], 
                                                  on= 'MSOACD', how= 'left')

# Reordering the columns
col_order = ['year', 'MSOACD', 'MSOA_simpson','white_moran_OA', 'asian_moran_OA', 'white_moran_LSOA', 'asian_moran_LSOA', 'white', 'asian', 'black', 'other',
       'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction',
       'other_fraction', 'geometry']

oldham_MSOA_2011=oldham_MSOA_2011[col_order]

## Pendle

In [None]:
# # Creating shared border dictionary at OA level- Pendle
# pendle_OA_borders_2011 = {}

# for idx1, row1 in pendle_2011.set_index('OACD').iterrows():
#     borders = []
#     for idx2, row2 in pendle_2011.set_index('OACD').iterrows():
#         if idx1 != idx2:
#             if row1.geometry.intersects(row2.geometry):
#                 borders.append(idx2)
#     pendle_OA_borders_2011[idx1] = borders
    
# with open('preprocessed files/2011/pendle_OA_borders_2011.pkl', 'wb') as f:
#     pickle.dump(pendle_OA_borders_2011, f)

In [None]:
# Creating shared border dictionary at LSOA level- Pendle
pendle_LSOA_borders_2011 = {}

for idx1, row1 in pendle_LSOA_2011.set_index('LSOACD').iterrows():
    borders = []
    for idx2, row2 in pendle_LSOA_2011.set_index('LSOACD').iterrows():
        if idx1 != idx2:
            if row1.geometry.intersects(row2.geometry):
                borders.append(idx2)
    pendle_LSOA_borders_2011[idx1] = borders

with open('preprocessed files/2011/pendle_LSOA_borders_2011.pkl', 'wb') as f:
    pickle.dump(pendle_LSOA_borders_2011, f)

In [None]:
# Creating shared border dictionary at LSOA_OA level- Pendle
pendle_LSOA_OA_borders_2011 = {}
for lsoa in pendle_2011['LSOACD'].unique():
    lsoa_df = pendle_2011[pendle_2011['LSOACD'] == lsoa]
    oa_borders = {}
    for idx1, row1 in lsoa_df.set_index('OACD').iterrows():
        borders = []
        for idx2, row2 in lsoa_df.set_index('OACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        oa_borders[idx1] = borders
    pendle_LSOA_OA_borders_2011[lsoa] = oa_borders
with open('preprocessed files/2011/pendle_LSOA_OA_borders_2011.pkl', 'wb') as f:
    pickle.dump(pendle_LSOA_OA_borders_2011, f)

In [None]:
# Adding moran index to the city LSOA dataset- Pendle
pendle_LSOA_OA_moran = {}
for lsoa in pendle_2011['LSOACD'].unique():
    lsoa_df = pendle_2011[pendle_2011['LSOACD'] == lsoa]
    OA_moran = moran(lsoa_df[['OACD','white','asian','black','other']].set_index('OACD'),pendle_LSOA_OA_borders_2011[lsoa])
    pendle_LSOA_OA_moran[lsoa] = {'OA_moran':OA_moran}

flat_dict = [{'LSOACD': key,
              'white_moran': value['OA_moran']['white'],
              'asian_moran': value['OA_moran']['asian'], 
              'black_moran': value['OA_moran']['black'],
              'other_moran': value['OA_moran']['other'], 
              } for key, value in pendle_LSOA_OA_moran.items()]

pendle_LSOA_OA_moran_2011 = pd.DataFrame(flat_dict)

pendle_LSOA_2011 = pendle_LSOA_2011.merge(pendle_LSOA_OA_moran_2011[['LSOACD','white_moran','asian_moran']], 
                                                  on= 'LSOACD', how= 'left')
col_order = ['year', 'LSOACD', 'LSOA_simpson','white_moran', 'asian_moran', 'white', 'asian', 'black', 'other',
       'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction',
       'other_fraction', 'geometry']

pendle_LSOA_2011=pendle_LSOA_2011[col_order]

  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator, 3)
  moran_results[col] = round(moran_numerator/moran_denominator

In [None]:
# Creating shared border dictionary at MSOA_OA level- Pendle
pendle_MSOA_OA_borders_2011 = {}
for msoa in pendle_2011['MSOACD'].unique():
    msoa_df = pendle_2011[pendle_2011['MSOACD'] == msoa]
    lsoa_borders = {}
    for idx1, row1 in msoa_df.set_index('OACD').iterrows():
        borders = []
        for idx2, row2 in msoa_df.set_index('OACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        lsoa_borders[idx1] = borders
    pendle_MSOA_OA_borders_2011[msoa] = lsoa_borders
with open('preprocessed files/2011/pendle_MSOA_OA_borders_2011.pkl', 'wb') as f:
    pickle.dump(pendle_MSOA_OA_borders_2011, f)

In [None]:
# Creating shared border dictionary at MSOA_LSOA level- Pendle
pendle_MSOA_LSOA = pd.merge(pendle_LSOA_2011,pendle_2011.groupby('LSOACD').agg({'MSOACD': 'first'}).reset_index(), on = 'LSOACD', how = 'left')
pendle_MSOA_LSOA_borders_2011 = {}
for msoa in pendle_MSOA_LSOA['MSOACD'].unique():
    msoa_df = pendle_MSOA_LSOA[pendle_MSOA_LSOA['MSOACD'] == msoa]
    lsoa_borders = {}
    for idx1, row1 in msoa_df.set_index('LSOACD').iterrows():
        borders = []
        for idx2, row2 in msoa_df.set_index('LSOACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        lsoa_borders[idx1] = borders
    pendle_MSOA_LSOA_borders_2011[msoa] = lsoa_borders
with open('preprocessed files/2011/pendle_MSOA_LSOA_borders_2011.pkl', 'wb') as f:
    pickle.dump(pendle_MSOA_LSOA_borders_2011, f)

In [None]:
# MSOA_OA based moran- Pendle
pendle_MSOA_OA_moran = {}
for msoa in pendle_2011['MSOACD'].unique():
    msoa_df = pendle_2011[pendle_2011['MSOACD'] == msoa]
    OA_moran = moran(msoa_df[['OACD','white','asian','black','other']].set_index('OACD'),pendle_MSOA_OA_borders_2011[msoa])
    pendle_MSOA_OA_moran[msoa] = {'OA_moran':OA_moran}

flat_dict = [{'MSOACD': key,
              'white_moran_OA': value['OA_moran']['white'],
              'asian_moran_OA': value['OA_moran']['asian'], 
              'black_moran_OA': value['OA_moran']['black'],
              'other_moran_OA': value['OA_moran']['other'], 
              } for key, value in pendle_MSOA_OA_moran.items()]

pendle_MSOA_OA_moran_2011 = pd.DataFrame(flat_dict)

pendle_MSOA_2011 = pendle_MSOA_2011.merge(pendle_MSOA_OA_moran_2011[['MSOACD','white_moran_OA','asian_moran_OA']], 
                                                  on= 'MSOACD', how= 'left')

# MSOA_LSOA based moran- Pendle
pendle_MSOA_LSOA_moran = {}
for msoa in pendle_2011['MSOACD'].unique():
    msoa_df = pendle_2011[pendle_2011['MSOACD'] == msoa]
    msoa_df = msoa_df.groupby('LSOACD')[['white','asian','black','other']].sum().reset_index().set_index('LSOACD')
    LSOA_moran = moran(msoa_df,pendle_MSOA_LSOA_borders_2011[msoa])
    pendle_MSOA_LSOA_moran[msoa] = {'LSOA_moran':LSOA_moran}

flat_dict = [{'MSOACD': key,
              'white_moran_LSOA': value['LSOA_moran']['white'],
              'asian_moran_LSOA': value['LSOA_moran']['asian'], 
              'black_moran_LSOA': value['LSOA_moran']['black'],
              'other_moran_LSOA': value['LSOA_moran']['other'], 
              } for key, value in pendle_MSOA_LSOA_moran.items()]

pendle_MSOA_LSOA_moran_2011 = pd.DataFrame(flat_dict)

pendle_MSOA_2011 = pendle_MSOA_2011.merge(pendle_MSOA_LSOA_moran_2011[['MSOACD','white_moran_LSOA','asian_moran_LSOA']], 
                                                  on= 'MSOACD', how= 'left')

# Reordering the columns
col_order = ['year', 'MSOACD', 'MSOA_simpson','white_moran_OA', 'asian_moran_OA', 'white_moran_LSOA', 'asian_moran_LSOA', 'white', 'asian', 'black', 'other',
       'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction',
       'other_fraction', 'geometry']

pendle_MSOA_2011=pendle_MSOA_2011[col_order]

## London

In [None]:
# # Creating shared border dictionary at OA level- London
# london_OA_borders_2011 = {}

# for idx1, row1 in london_2011.set_index('OACD').iterrows():
#     borders = []
#     for idx2, row2 in london_2011.set_index('OACD').iterrows():
#         if idx1 != idx2:
#             if row1.geometry.intersects(row2.geometry):
#                 borders.append(idx2)
#     london_OA_borders_2011[idx1] = borders
    
# with open('preprocessed files/2011/london_OA_borders_2011.pkl', 'wb') as f:
#     pickle.dump(london_OA_borders_2011, f)

In [None]:
# Creating shared border dictionary at LSOA level- London
london_LSOA_borders_2011 = {}

for idx1, row1 in london_LSOA_2011.set_index('LSOACD').iterrows():
    borders = []
    for idx2, row2 in london_LSOA_2011.set_index('LSOACD').iterrows():
        if idx1 != idx2:
            if row1.geometry.intersects(row2.geometry):
                borders.append(idx2)
    london_LSOA_borders_2011[idx1] = borders

with open('preprocessed files/2011/london_LSOA_borders_2011.pkl', 'wb') as f:
    pickle.dump(london_LSOA_borders_2011, f)

In [None]:
# Creating shared border dictionary at LSOA_OA level- London
london_LSOA_OA_borders_2011 = {}
for lsoa in london_2011['LSOACD'].unique():
    lsoa_df = london_2011[london_2011['LSOACD'] == lsoa]
    oa_borders = {}
    for idx1, row1 in lsoa_df.set_index('OACD').iterrows():
        borders = []
        for idx2, row2 in lsoa_df.set_index('OACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        oa_borders[idx1] = borders
    london_LSOA_OA_borders_2011[lsoa] = oa_borders
with open('preprocessed files/2011/london_LSOA_OA_borders_2011.pkl', 'wb') as f:
    pickle.dump(london_LSOA_OA_borders_2011, f)

In [None]:
# Adding moran index to the city LSOA dataset- London
london_LSOA_OA_moran = {}
for lsoa in london_2011['LSOACD'].unique():
    lsoa_df = london_2011[london_2011['LSOACD'] == lsoa]
    OA_moran = moran(lsoa_df[['OACD','white','asian','black','other']].set_index('OACD'),london_LSOA_OA_borders_2011[lsoa])
    london_LSOA_OA_moran[lsoa] = {'OA_moran':OA_moran}

flat_dict = [{'LSOACD': key,
              'white_moran': value['OA_moran']['white'],
              'asian_moran': value['OA_moran']['asian'], 
              'black_moran': value['OA_moran']['black'],
              'other_moran': value['OA_moran']['other'], 
              } for key, value in london_LSOA_OA_moran.items()]

london_LSOA_OA_moran_2011 = pd.DataFrame(flat_dict)

london_LSOA_2011 = london_LSOA_2011.merge(london_LSOA_OA_moran_2011[['LSOACD','white_moran','asian_moran']], 
                                                  on= 'LSOACD', how= 'left')
col_order = ['year', 'LSOACD', 'LSOA_simpson','white_moran', 'asian_moran', 'white', 'asian', 'black', 'other',
       'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction',
       'other_fraction', 'geometry']

london_LSOA_2011=london_LSOA_2011[col_order]

In [44]:
# Creating shared border dictionary at MSOA level- London
london_MSOA_borders_2011 = {}

for idx1, row1 in london_MSOA_2011.set_index('MSOACD').iterrows():
    borders = []
    for idx2, row2 in london_MSOA_2011.set_index('MSOACD').iterrows():
        if idx1 != idx2:
            if row1.geometry.intersects(row2.geometry):
                borders.append(idx2)
    london_MSOA_borders_2011[idx1] = borders

with open('preprocessed files/2011/london_MSOA_borders_2011.pkl', 'wb') as f:
    pickle.dump(london_MSOA_borders_2011, f)

In [None]:
# Creating shared border dictionary at MSOA_OA level- London
london_MSOA_OA_borders_2011 = {}
for msoa in london_2011['MSOACD'].unique():
    msoa_df = london_2011[london_2011['MSOACD'] == msoa]
    lsoa_borders = {}
    for idx1, row1 in msoa_df.set_index('OACD').iterrows():
        borders = []
        for idx2, row2 in msoa_df.set_index('OACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        lsoa_borders[idx1] = borders
    london_MSOA_OA_borders_2011[msoa] = lsoa_borders
with open('preprocessed files/2011/london_MSOA_OA_borders_2011.pkl', 'wb') as f:
    pickle.dump(london_MSOA_OA_borders_2011, f)

In [None]:
# Creating shared border dictionary at MSOA_LSOA level- London
london_MSOA_LSOA = pd.merge(london_LSOA_2011,london_2011.groupby('LSOACD').agg({'MSOACD': 'first'}).reset_index(), on = 'LSOACD', how = 'left')
london_MSOA_LSOA_borders_2011 = {}
for msoa in london_MSOA_LSOA['MSOACD'].unique():
    msoa_df = london_MSOA_LSOA[london_MSOA_LSOA['MSOACD'] == msoa]
    lsoa_borders = {}
    for idx1, row1 in msoa_df.set_index('LSOACD').iterrows():
        borders = []
        for idx2, row2 in msoa_df.set_index('LSOACD').iterrows():
            if idx1 != idx2:
                if row1.geometry.intersects(row2.geometry):
                    borders.append(idx2)
        lsoa_borders[idx1] = borders
    london_MSOA_LSOA_borders_2011[msoa] = lsoa_borders
with open('preprocessed files/2011/london_MSOA_LSOA_borders_2011.pkl', 'wb') as f:
    pickle.dump(london_MSOA_LSOA_borders_2011, f)

In [None]:
# MSOA_OA based moran- London
london_MSOA_OA_moran = {}
for msoa in london_2011['MSOACD'].unique():
    msoa_df = london_2011[london_2011['MSOACD'] == msoa]
    OA_moran = moran(msoa_df[['OACD','white','asian','black','other']].set_index('OACD'),london_MSOA_OA_borders_2011[msoa])
    london_MSOA_OA_moran[msoa] = {'OA_moran':OA_moran}

flat_dict = [{'MSOACD': key,
              'white_moran_OA': value['OA_moran']['white'],
              'asian_moran_OA': value['OA_moran']['asian'], 
              'black_moran_OA': value['OA_moran']['black'],
              'other_moran_OA': value['OA_moran']['other'], 
              } for key, value in london_MSOA_OA_moran.items()]

london_MSOA_OA_moran_2011 = pd.DataFrame(flat_dict)

london_MSOA_2011 = london_MSOA_2011.merge(london_MSOA_OA_moran_2011[['MSOACD','white_moran_OA','asian_moran_OA']], 
                                                  on= 'MSOACD', how= 'left')

# MSOA_LSOA based moran- London
london_MSOA_LSOA_moran = {}
for msoa in london_2011['MSOACD'].unique():
    msoa_df = london_2011[london_2011['MSOACD'] == msoa]
    msoa_df = msoa_df.groupby('LSOACD')[['white','asian','black','other']].sum().reset_index().set_index('LSOACD')
    LSOA_moran = moran(msoa_df,london_MSOA_LSOA_borders_2011[msoa])
    london_MSOA_LSOA_moran[msoa] = {'LSOA_moran':LSOA_moran}

flat_dict = [{'MSOACD': key,
              'white_moran_LSOA': value['LSOA_moran']['white'],
              'asian_moran_LSOA': value['LSOA_moran']['asian'], 
              'black_moran_LSOA': value['LSOA_moran']['black'],
              'other_moran_LSOA': value['LSOA_moran']['other'], 
              } for key, value in london_MSOA_LSOA_moran.items()]

london_MSOA_LSOA_moran_2011 = pd.DataFrame(flat_dict)

london_MSOA_2011 = london_MSOA_2011.merge(london_MSOA_LSOA_moran_2011[['MSOACD','white_moran_LSOA','asian_moran_LSOA']], 
                                                  on= 'MSOACD', how= 'left')

# Reordering the columns
col_order = ['year', 'MSOACD', 'MSOA_simpson','white_moran_OA', 'asian_moran_OA', 'white_moran_LSOA', 'asian_moran_LSOA', 'white', 'asian', 'black', 'other',
       'total_pop', 'white_fraction', 'asian_fraction', 'black_fraction',
       'other_fraction', 'geometry']

london_MSOA_2011=london_MSOA_2011[col_order]

In [51]:
# Creating london simpson index dataset for year 2011.
# Calculating OA, LSOA, MSOA and LAD level simpson index inside London
OA_simp_london_2011 = simpson(london_2011[['white','asian','black','other']])
LSOA_simp_london_2011 = simpson(london_2011.groupby(['LSOACD'])[['white','asian','black','other']].sum().reset_index()[['white','asian','black','other']])
MSOA_simp_london_2011 = simpson(london_2011.groupby(['MSOACD'])[['white','asian','black','other']].sum().reset_index()[['white','asian','black','other']])
LAD_simp_london_2011 = simpson(london_2011.groupby(['LADCD'])[['white','asian','black','other']].sum().reset_index()[['white','asian','black','other']])

country_dic_sim_2011 = {'OA_simpson':OA_simp_london_2011[0], 'LSOA_simpson':LSOA_simp_london_2011[0],
                        'MSOA_simpson':MSOA_simp_london_2011[0], 'LAD_simpson':LAD_simp_london_2011[0], 'london':LAD_simp_london_2011[1]}

flat_dict = [{'year': 2011, 'total_population': london_2011['total_pop'].sum(),
              'white': london_2011['white'].sum(),
              'asian': london_2011['asian'].sum(),
              'black': london_2011['black'].sum(),
              'other': london_2011['other'].sum(),              
              'white_frac': round(london_2011['white'].sum()/london_2011['total_pop'].sum(),3),
              'asian_frac': round(london_2011['asian'].sum()/london_2011['total_pop'].sum(),3),
              'black_frac': round(london_2011['black'].sum()/london_2011['total_pop'].sum(),3),
              'other_frac': round(london_2011['other'].sum()/london_2011['total_pop'].sum(),3),
              'OA_simpson':OA_simp_london_2011[0], 'LSOA_simpson':LSOA_simp_london_2011[0],'MSOA_simpson':MSOA_simp_london_2011[0], 
              'LAD_simpson':LAD_simp_london_2011[0], 'London_simpson':LAD_simp_london_2011[1]}]

london_simpson_2011 = pd.DataFrame(flat_dict)
london_simpson_2011

Unnamed: 0,year,total_population,white,asian,black,other,white_frac,asian_frac,black_frac,other_frac,OA_simpson,LSOA_simpson,MSOA_simpson,LAD_simpson,London_simpson
0,2011,8173941,4887435,1511546,1088640,686320,0.598,0.185,0.133,0.084,0.502,0.491,0.483,0.449,0.409


In [52]:
# Creating London dissimilarity index dataset for year 2011.
# Calculating OA, LSOA, MSOA and LAD level dissimilarity index inside London

OA_diss_london_2011 = dissimilarity(london_2011[['white','asian','black','other']])
LSOA_diss_london_2011 = dissimilarity(london_2011.groupby(['LSOACD'])[['white','asian','black','other']].sum().reset_index()[['white','asian','black','other']])
MSOA_diss_london_2011 = dissimilarity(london_2011.groupby(['MSOACD'])[['white','asian','black','other']].sum().reset_index()[['white','asian','black','other']])
LAD_diss_london_2011 = dissimilarity(london_2011.groupby(['LADCD'])[['white','asian','black','other']].sum().reset_index()[['white','asian','black','other']])
london_dic_diss_2011 = {'OA_level':OA_diss_london_2011, 'LSOA_level':LSOA_diss_london_2011,
                        'MSOA_level':MSOA_diss_london_2011, 'LAD_level':LAD_diss_london_2011}

flat_dict = [{'year': 2011,
              'OA_white_diss': london_dic_diss_2011['OA_level']['white'], 'LSOA_white_diss': london_dic_diss_2011['LSOA_level']['white'],
              'MSOA_white_diss': london_dic_diss_2011['MSOA_level']['white'], 'LAD_white_diss': london_dic_diss_2011['LAD_level']['white'],
              'OA_asian_diss': london_dic_diss_2011['OA_level']['asian'], 'LSOA_asian_diss': london_dic_diss_2011['LSOA_level']['asian'],
              'MSOA_asian_diss': london_dic_diss_2011['MSOA_level']['asian'], 'LAD_asian_diss': london_dic_diss_2011['LAD_level']['asian'],
              'OA_black_diss': london_dic_diss_2011['OA_level']['black'], 'LSOA_black_diss': london_dic_diss_2011['LSOA_level']['black'],
              'MSOA_black_diss': london_dic_diss_2011['MSOA_level']['black'], 'LAD_black_diss': london_dic_diss_2011['LAD_level']['black'],
              'OA_other_diss': london_dic_diss_2011['OA_level']['other'], 'LSOA_other_diss': london_dic_diss_2011['LSOA_level']['other'],
              'MSOA_other_diss': london_dic_diss_2011['MSOA_level']['other'], 'LAD_other_diss': london_dic_diss_2011['LAD_level']['other']}]


london_dissimilarity_2011 = pd.DataFrame(flat_dict)
london_dissimilarity_2011

Unnamed: 0,year,OA_white_diss,LSOA_white_diss,MSOA_white_diss,LAD_white_diss,OA_asian_diss,LSOA_asian_diss,MSOA_asian_diss,LAD_asian_diss,OA_black_diss,LSOA_black_diss,MSOA_black_diss,LAD_black_diss,OA_other_diss,LSOA_other_diss,MSOA_other_diss,LAD_other_diss
0,2011,0.382,0.356,0.335,0.227,0.43,0.409,0.395,0.332,0.42,0.386,0.363,0.28,0.228,0.186,0.17,0.145


In [47]:
with open('preprocessed files/2011/london_OA_borders_2011.pkl', 'rb') as f:
    london_OA_borders_2011 = pickle.load(f)
with open('preprocessed files/2011/london_LSOA_borders_2011.pkl', 'rb') as f:
    london_LSOA_borders_2011 = pickle.load(f)
with open('preprocessed files/2011/london_MSOA_borders_2011.pkl', 'rb') as f:
    london_MSOA_borders_2011 = pickle.load(f)

In [48]:
dic_mor_london_2011= {}

OA_mor_london_2011 = moran(london_2011[['OACD','white','asian','black','other']].set_index('OACD'),london_OA_borders_2011)
LSOA_mor_london_2011 = moran(london_2011.groupby(['LSOACD'])[['white','asian','black','other']].sum()[['white','asian','black','other']],london_LSOA_borders_2011)
MSOA_mor_london_2011 = moran(london_2011.groupby(['MSOACD'])[['white','asian','black','other']].sum()[['white','asian','black','other']],london_MSOA_borders_2011)
dic_mor_london_2011 = {'OA':OA_mor_london_2011, 'LSOA':LSOA_mor_london_2011, 'MSOA':MSOA_mor_london_2011}

flat_dict = {}
for level, values in dic_mor_london_2011.items():
    for ethnicity, value in values.items():
        flat_dict[f"{level}_{ethnicity}_mor"] = value

london_moran_2011 = pd.DataFrame([flat_dict])
london_moran_2011['year']= 2011
london_moran_2011

Unnamed: 0,OA_white_mor,OA_asian_mor,OA_black_mor,OA_other_mor,LSOA_white_mor,LSOA_asian_mor,LSOA_black_mor,LSOA_other_mor,MSOA_white_mor,MSOA_asian_mor,MSOA_black_mor,MSOA_other_mor,year
0,0.816,0.854,0.732,0.561,0.846,0.889,0.768,0.737,0.802,0.848,0.728,0.786,2011


In [60]:
london_indexes_2011 = pd.merge(london_simpson_2011,london_dissimilarity_2011, how='left')
london_indexes_2011 = london_indexes_2011.merge(london_moran_2011, how='left')
london_indexes_2011

Unnamed: 0,year,total_population,white,asian,black,other,white_frac,asian_frac,black_frac,other_frac,...,OA_black_mor,OA_other_mor,LSOA_white_mor,LSOA_asian_mor,LSOA_black_mor,LSOA_other_mor,MSOA_white_mor,MSOA_asian_mor,MSOA_black_mor,MSOA_other_mor
0,2011,8173941,4887435,1511546,1088640,686320,0.598,0.185,0.133,0.084,...,0.732,0.561,0.846,0.889,0.768,0.737,0.802,0.848,0.728,0.786


In [61]:
london_indexes_2011['LADCD'] = 'E00000000'
london_indexes_2011['LADNM'] = 'London'
col_order = ['year', 'LADCD', 'LADNM', 'white', 'asian', 'black', 'other', 'total_population', 
             'white_frac', 'asian_frac', 'black_frac', 'other_frac', 
             'OA_simpson', 'LSOA_simpson', 'MSOA_simpson', 'LAD_simpson', 'London_simpson',
             'OA_white_diss', 'LSOA_white_diss', 'MSOA_white_diss',
             'OA_asian_diss', 'LSOA_asian_diss', 'MSOA_asian_diss', 
             'OA_black_diss', 'LSOA_black_diss', 'MSOA_black_diss', 
             'OA_other_diss', 'LSOA_other_diss', 'MSOA_other_diss',
             'OA_white_mor', 'LSOA_white_mor','MSOA_white_mor', 
             'OA_asian_mor', 'LSOA_asian_mor', 'MSOA_asian_mor',
             'OA_black_mor', 'LSOA_black_mor', 'MSOA_black_mor', 
             'OA_other_mor', 'LSOA_other_mor', 'MSOA_other_mor']
london_indexes_2011 = london_indexes_2011[col_order]

In [None]:
birmingham_LSOA_2011.to_csv('preprocessed files/2011/birmingham_LSOA_2011.csv', index= False)
leicester_LSOA_2011.to_csv('preprocessed files/2011/leicester_LSOA_2011.csv', index= False)
bradford_LSOA_2011.to_csv('preprocessed files/2011/bradford_LSOA_2011.csv', index= False)
blackburn_LSOA_2011.to_csv('preprocessed files/2011/blackburn_LSOA_2011.csv', index= False)
oldham_LSOA_2011.to_csv('preprocessed files/2011/oldham_LSOA_2011.csv', index= False)
pendle_LSOA_2011.to_csv('preprocessed files/2011/pendle_LSOA_2011.csv', index= False)
london_LSOA_2011.to_csv('preprocessed files/2011/london_LSOA_2011.csv', index= False)

In [None]:
birmingham_MSOA_2011.to_csv('preprocessed files/2011/birmingham_MSOA_2011.csv', index= False)
leicester_MSOA_2011.to_csv('preprocessed files/2011/leicester_MSOA_2011.csv', index= False)
bradford_MSOA_2011.to_csv('preprocessed files/2011/bradford_MSOA_2011.csv', index= False)
blackburn_MSOA_2011.to_csv('preprocessed files/2011/blackburn_MSOA_2011.csv', index= False)
oldham_MSOA_2011.to_csv('preprocessed files/2011/oldham_MSOA_2011.csv', index= False)
pendle_MSOA_2011.to_csv('preprocessed files/2011/pendle_MSOA_2011.csv', index= False)
london_MSOA_2011.to_csv('preprocessed files/2011/london_MSOA_2011.csv', index= False)
london_LAD_2011.to_csv('preprocessed files/2011/london_LAD_2011.csv', index= False)
london_indexes_2011.to_csv('preprocessed files/2011/london_indexes_2011.csv', index= False)