In [71]:
%matplotlib inline
import geopandas as gpd
import pandas as pd
import matplotlib 
import matplotlib.pyplot as plt
from requests import get
import numpy as np
import os
import pysal
import libpysal
from pysal.explore import esda

  from .autonotebook import tqdm as notebook_tqdm


In [56]:
country_birth= pd.read_csv('https://raw.githubusercontent.com/BiuLei0527/CASA0003_Individual-Visualization/refs/heads/main/cob-borough.csv')
path = os.path.join('london_borough','ESRI','London_Borough_Excluding_MHW.shp')
borough = gpd.read_file(path).to_crs('EPSG:3857')

In [57]:
country_birth['Estimate'] = country_birth['Estimate'].astype(str)
country_birth['Estimate'] = country_birth['Estimate'].replace(":", np.nan)
country_birth['Estimate'] = pd.to_numeric(country_birth['Estimate'], errors='coerce')
regions_to_drop = ["East", "London", "South East", "South West", "Wales", "Scotland", "Northern Ireland", "England"]
cf_cleaned = country_birth[~country_birth["Broad_group"].isin(regions_to_drop)]
EU_birth = cf_cleaned[(cf_cleaned['Broad_group'] == "European Union") & (cf_cleaned['Detailed_group'] == 'All')]
NEU_birth = cf_cleaned[(cf_cleaned['Broad_group'] == "Other Europe") & (cf_cleaned['Detailed_group'] == 'Other Europe')]
Asia_birth = cf_cleaned[(cf_cleaned['Broad_group'] == "Asia") & (cf_cleaned['Detailed_group'] == 'All')]
Other_birth = cf_cleaned[(cf_cleaned['Broad_group'] == "Rest of the World") & (cf_cleaned['Detailed_group'] == 'All')]

In [50]:
pivot_table = pd.pivot_table(cf_cleaned, 
                             values='Estimate', 
                             index=['Area_Code', 'Broad_group'],  
                             columns='Year', 
                             aggfunc='first', 
                             fill_value=0)
pivot_reset = pivot_table.reset_index()

In [59]:
boroughs = borough[['GSS_CODE','NAME','geometry']].copy()
cf_cleaned['Estimate'] = cf_cleaned['Estimate'] * 1000
years = range(2008, 2019) 
for year in years:
    cb_year = cf_cleaned[cf_cleaned['Year'] == year]
    cb_year_sim = cb_year[['Area_Code', 'Broad_group', 'Estimate','Area_Name']].copy()
    cb_year_sim['Year'] = year
    pivot_table = pd.pivot_table(cb_year_sim, 
                                 values='Estimate', 
                                 index=['Area_Code','Area_Name'], 
                                 columns='Broad_group',  
                                 aggfunc='first', 
                                 fill_value=0)
    pivot_cb = pivot_table.reset_index()
    joined_cb = boroughs.merge(pivot_cb, left_on='GSS_CODE', right_on='Area_Code',how='left')
    joined_cb = joined_cb.sort_values(by='GSS_CODE').reset_index()
    joined_cb = joined_cb.drop(columns=['Area_Code','index','Area_Name'])
    globals()[f'pivot_{year}_cb'] = joined_cb
    globals()[f'pivot_{year}_cb'] = globals()[f'pivot_{year}_cb'].to_crs(epsg=4326)
    globals()[f'pivot_{year}_cb'].to_file(f"pivot_{year}.geojson", driver="GeoJSON")


In [81]:
pivot_2008 = gpd.read_file("pivot_2008.geojson")
pivot_2018 = gpd.read_file("pivot_2018.geojson")
pivot_2008_moran = pivot_2008[['GSS_CODE','NAME','Non-United Kingdom', 'Total','geometry']].copy().drop(0)
pivot_2018_moran = pivot_2018[['GSS_CODE','NAME','Non-United Kingdom', 'Total','geometry']].copy().drop(0)

In [86]:
pivot_2008_moran['ratio'] = pivot_2008_moran['Non-United Kingdom'] / pivot_2008_moran['Total'].round(2)
pivot_2018_moran['ratio'] = pivot_2018_moran['Non-United Kingdom'] / pivot_2018_moran['Total'].round(2)
value_1 = pivot_2008_moran['ratio'].values
value_2 = pivot_2018_moran['ratio'].values
weight_1 = libpysal.weights.Queen.from_dataframe(pivot_2008_moran)
weight_2 = libpysal.weights.Queen.from_dataframe(pivot_2018_moran)
moran_loc1 = esda.Moran_Local(value_1, weight_1)
moran_loc2 = esda.Moran_Local(value_2, weight_2)
significant1 = moran_loc1.p_sim < 0.5
pivot_2008_moran['cluster'] = 0
pivot_2008_moran.loc[significant1 & (moran_loc1.q == 1), 'cluster'] = 1  
pivot_2008_moran.loc[significant1 & (moran_loc1.q == 2), 'cluster'] = 2  
pivot_2008_moran.loc[significant1 & (moran_loc1.q == 3), 'cluster'] = 3  
pivot_2008_moran.loc[significant1 & (moran_loc1.q == 4), 'cluster'] = 4 

  weight_1 = libpysal.weights.Queen.from_dataframe(pivot_2008_moran)
  weight_2 = libpysal.weights.Queen.from_dataframe(pivot_2018_moran)


In [87]:
significant2 = moran_loc1.p_sim < 0.5
pivot_2018_moran['cluster'] = 0
pivot_2018_moran.loc[significant2 & (moran_loc2.q == 1), 'cluster'] = 1  
pivot_2018_moran.loc[significant2 & (moran_loc2.q == 2), 'cluster'] = 2  
pivot_2018_moran.loc[significant2 & (moran_loc2.q == 3), 'cluster'] = 3  
pivot_2018_moran.loc[significant2 & (moran_loc2.q == 4), 'cluster'] = 4 
pivot_2018_moran

Unnamed: 0,GSS_CODE,NAME,Non-United Kingdom,Total,geometry,ratio,cluster
1,E09000002,Barking and Dagenham,79000.0,215000.0,"MULTIPOLYGON (((0.07317 51.52937, 0.07316 51.5...",0.367442,4
2,E09000003,Barnet,148000.0,391000.0,"POLYGON ((-0.1999 51.67017, -0.1997 51.66986, ...",0.378517,1
3,E09000004,Bexley,42000.0,249000.0,"POLYGON ((0.12021 51.51144, 0.12159 51.51181, ...",0.168675,3
4,E09000005,Brent,171000.0,328000.0,"POLYGON ((-0.19657 51.52765, -0.19685 51.52773...",0.521341,1
5,E09000006,Bromley,60000.0,332000.0,"POLYGON ((0.01213 51.2996, 0.01196 51.2998, 0....",0.180723,3
6,E09000007,Camden,108000.0,256000.0,"POLYGON ((-0.14242 51.56912, -0.1425 51.56901,...",0.421875,1
7,E09000008,Croydon,124000.0,387000.0,"POLYGON ((-0.06402 51.31864, -0.06408 51.31861...",0.320413,3
8,E09000009,Ealing,143000.0,343000.0,"POLYGON ((-0.41183 51.53408, -0.41188 51.53412...",0.41691,1
9,E09000010,Enfield,124000.0,337000.0,"POLYGON ((-0.1058 51.69187, -0.10557 51.69187,...",0.367953,1
10,E09000011,Greenwich,94000.0,285000.0,"MULTIPOLYGON (((-0.02485 51.48555, -0.02479 51...",0.329825,3


In [89]:
pivot_2018_moran.to_file("2018_moran.geojson", driver="GeoJSON")
pivot_2008_moran.to_file("2008_moran.geojson", driver="GeoJSON")