# Neighbours

Notebook visualising graph construction methods.

## Google Drive

In [1]:
# mount Google Drive
from google.colab import drive
drive.mount('/content/drive')
root = '/content/drive/My Drive/Project/'

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


## Libraries

In [2]:
# standard libraries
import numpy as np
import pandas as pd

In [3]:
%%capture
# geopandas install
import os
!curl -L http://download.osgeo.org/libspatialindex/spatialindex-src-1.8.5.tar.gz | tar xz
os.chdir('/content/spatialindex-src-1.8.5')
!./configure
!make
!make install
!ldconfig
!pip install descartes
!pip install rtree
!pip install geopandas

# geopandas import
import geopandas as gpd

In [4]:
# bokeh import
from bokeh.models import HoverTool, ColumnDataSource
from bokeh.plotting import figure, show, output_file
from bokeh.layouts import column
# allows visualisation in notebook
from bokeh.io import output_notebook
from bokeh.resources import INLINE
output_notebook(INLINE)
# folium
import folium
import branca.colormap as cm

In [5]:
import sys
sys.path.append(root + 'Classes')
from GAM_class import GAM

## Load Data

Load in MSOA data.

In [6]:
MSOAs = gpd.read_file(root + 'MSOAs/MSOAs.shp')
print('Shape: ',MSOAs.shape)
MSOAs.head()

Shape:  (6790, 6)


Unnamed: 0,msoa11cd,msoa11nm,st_areasha,pop,con_trust,geometry
0,E02000001,City of London 001,2983633.0,6031.0,0,"POLYGON ((-0.09276 51.52139, -0.08813 51.51941..."
1,E02000002,Barking and Dagenham 001,2091907.0,7131.0,0,"POLYGON ((0.14112 51.58054, 0.13788 51.57812, ..."
2,E02000003,Barking and Dagenham 002,2122216.0,10437.0,0,"POLYGON ((0.14838 51.58075, 0.14698 51.57568, ..."
3,E02000004,Barking and Dagenham 003,2569470.0,6393.0,0,"POLYGON ((0.19018 51.55268, 0.18600 51.54753, ..."
4,E02000005,Barking and Dagenham 004,1111109.0,9116.0,0,"POLYGON ((0.15043 51.56561, 0.14998 51.56138, ..."


# Gets Neighbors

Poly neighbors dataframe

In [7]:
# poly neighbors dataframe
Poly = GAM(MSOAs)
Poly.get_neighs()
poly_df = Poly.gdf
poly_df.head()

Unnamed: 0,msoa11cd,msoa11nm,st_areasha,pop,con_trust,geometry,numneigh
0,E02000001,City of London 001,2983633.0,6031.0,0,"POLYGON ((-0.09276 51.52139, -0.08813 51.51941...",10.0
1,E02000002,Barking and Dagenham 001,2091907.0,7131.0,0,"POLYGON ((0.14112 51.58054, 0.13788 51.57812, ...",6.0
2,E02000003,Barking and Dagenham 002,2122216.0,10437.0,0,"POLYGON ((0.14838 51.58075, 0.14698 51.57568, ...",7.0
3,E02000004,Barking and Dagenham 003,2569470.0,6393.0,0,"POLYGON ((0.19018 51.55268, 0.18600 51.54753, ...",5.0
4,E02000005,Barking and Dagenham 004,1111109.0,9116.0,0,"POLYGON ((0.15043 51.56561, 0.14998 51.56138, ...",5.0


K neighbors dataframe.

In [8]:
# K neighbors dataframe
K = GAM(MSOAs)
K.get_neighs('k',6)
k_df = K.gdf
k_df.head()

Unnamed: 0,msoa11cd,msoa11nm,st_areasha,pop,con_trust,geometry,numneigh
0,E02000001,City of London 001,2983633.0,6031.0,0,"POLYGON ((-0.09276 51.52139, -0.08813 51.51941...",6
1,E02000002,Barking and Dagenham 001,2091907.0,7131.0,0,"POLYGON ((0.14112 51.58054, 0.13788 51.57812, ...",6
2,E02000003,Barking and Dagenham 002,2122216.0,10437.0,0,"POLYGON ((0.14838 51.58075, 0.14698 51.57568, ...",6
3,E02000004,Barking and Dagenham 003,2569470.0,6393.0,0,"POLYGON ((0.19018 51.55268, 0.18600 51.54753, ...",6
4,E02000005,Barking and Dagenham 004,1111109.0,9116.0,0,"POLYGON ((0.15043 51.56561, 0.14998 51.56138, ...",6


$\epsilon$-ball dataframe.

In [9]:
# K neighbors dataframe
Ball = GAM(MSOAs)
Ball.get_neighs('ball',0.03)
ball_df = Ball.gdf
ball_df.head()

Unnamed: 0,msoa11cd,msoa11nm,st_areasha,pop,con_trust,geometry,numneigh
0,E02000001,City of London 001,2983633.0,6031.0,0,"POLYGON ((-0.09276 51.52139, -0.08813 51.51941...",27
1,E02000002,Barking and Dagenham 001,2091907.0,7131.0,0,"POLYGON ((0.14112 51.58054, 0.13788 51.57812, ...",8
2,E02000003,Barking and Dagenham 002,2122216.0,10437.0,0,"POLYGON ((0.14838 51.58075, 0.14698 51.57568, ...",10
3,E02000004,Barking and Dagenham 003,2569470.0,6393.0,0,"POLYGON ((0.19018 51.55268, 0.18600 51.54753, ...",9
4,E02000005,Barking and Dagenham 004,1111109.0,9116.0,0,"POLYGON ((0.15043 51.56561, 0.14998 51.56138, ...",12


## Describe datasets.

In [10]:
poly_df.describe()

Unnamed: 0,st_areasha,pop,con_trust,numneigh
count,6790.0,6790.0,6790.0,6790.0
mean,19210100.0,7932.776436,0.020619,5.706038
std,47788610.0,1682.414016,0.142114,2.13505
min,269879.8,4950.0,0.0,1.0
25%,1671984.0,6639.0,0.0,4.0
50%,3028275.0,7726.5,0.0,5.0
75%,10564900.0,8904.5,0.0,7.0
max,1128113000.0,16794.0,1.0,22.0


In [11]:
k_df.describe()

Unnamed: 0,st_areasha,pop,con_trust,numneigh
count,6790.0,6790.0,6790.0,6790.0
mean,19210100.0,7932.776436,0.020619,6.0
std,47788610.0,1682.414016,0.142114,0.0
min,269879.8,4950.0,0.0,6.0
25%,1671984.0,6639.0,0.0,6.0
50%,3028275.0,7726.5,0.0,6.0
75%,10564900.0,8904.5,0.0,6.0
max,1128113000.0,16794.0,1.0,6.0


In [12]:
ball_df.describe()

Unnamed: 0,st_areasha,pop,con_trust,numneigh
count,6790.0,6790.0,6790.0,6790.0
mean,19210100.0,7932.776436,0.020619,5.685125
std,47788610.0,1682.414016,0.142114,6.644507
min,269879.8,4950.0,0.0,0.0
25%,1671984.0,6639.0,0.0,1.0
50%,3028275.0,7726.5,0.0,4.0
75%,10564900.0,8904.5,0.0,8.0
max,1128113000.0,16794.0,1.0,38.0


# Number of neighbors histogram.

In [14]:
"""diff = max(num) - min(num)
step = 2
if diff == 0:
    bins = [min(num)-1, max(num)]
else:
    bins = np.arange(min(num), max(num) + step, step)"""

'diff = max(num) - min(num)\nstep = 2\nif diff == 0:\n    bins = [min(num)-1, max(num)]\nelse:\n    bins = np.arange(min(num), max(num) + step, step)'

In [29]:
def hist_prep(df):
    """ Prepares data for histogram and creates layer"""
    # gets array of col and labels.
    num = np.array(df['numneigh'])

    # create bins
    bins = np.arange(0, 38, 1)

    hist, edges = np.histogram(num, bins = bins)

    hist_source = ColumnDataSource(data=dict(
        count=hist,
        start=edges[:-1],
        end=edges[1:]
    ))
    
    return hist_source

output_file(root  + 'Plots/Histogram.html')

# hover tool
hover = HoverTool(tooltips=[("Interval","@start to @end"),("Count","@count")])

# data for histogram
sources = []
for df in [poly_df, k_df, ball_df]:
    sources.append(hist_prep(df))

# poly fig
poly_fig = figure(title='Polygon Method Number of Neighbors Histogram', 
              x_axis_label = 'Number of Neighbors',y_axis_label = 'Count',
              plot_width=900, plot_height=400,toolbar_location=None)

poly_fig.quad(top='count', left='start', right='end',source = sources[0],
          hover_fill_alpha=0.8,line_color='white')

poly_fig.add_tools(hover)
poly_fig.title.text_font_size = '16pt'
poly_fig.axis.axis_label_text_font_size = '12pt'
poly_fig.axis.major_label_text_font_size = '10pt'

# k fig
k_fig = figure(title='kNN Method Number of Neighbors Histogram', 
              x_axis_label = 'Number of Neighbors',y_axis_label = 'Count',
              plot_width=900, plot_height=400,toolbar_location=None)

k_fig.quad(top='count', left='start', right='end',source = sources[1],
          hover_fill_alpha=0.8,line_color='white')

k_fig.add_tools(hover)
k_fig.title.text_font_size = '16pt'
k_fig.axis.axis_label_text_font_size = '12pt'
k_fig.axis.major_label_text_font_size = '10pt'

# ball
ball_fig = figure(title='\u03B5-Ball Method Number of Neighbors Histogram', 
              x_axis_label = 'Number of Neighbors',y_axis_label = 'Count',
              plot_width=900, plot_height=400,toolbar_location=None)

ball_fig.quad(top='count', left='start', right='end',source = sources[2],
          hover_fill_alpha=0.8,line_color='white')

ball_fig.add_tools(hover)

ball_fig.title.text_font_size = '16pt'
ball_fig.axis.axis_label_text_font_size = '12pt'
ball_fig.axis.major_label_text_font_size = '10pt'

from bokeh.models.widgets import Tabs, Panel

# Create panels
poly_panel = Panel(child=poly_fig, title='Polygon')
k_panel = Panel(child=k_fig, title='kNN')
ball_panel = Panel(child=ball_fig, title='Ball')

# Assign the panels to Tabs
tabs = Tabs(tabs=[poly_panel,k_panel,ball_panel])

# Show the tabbed layout
show(tabs)

Output hidden; open in https://colab.research.google.com to view.

## Case Studies

Give all nodes labels 1.

In [None]:
poly_df['label'] = 1
k_df['label'] = 1
ball_df['label'] = 1

Give case studies label 1.

In [None]:
cases = ['Cornwall 069','Lambeth 036','Lewes 002','Haringey 005','Eden 007']
poly_df.loc[poly_df['msoa11nm'].isin(cases),'label'] = 0
k_df.loc[k_df['msoa11nm'].isin(cases),'label'] = 0
ball_df.loc[ball_df['msoa11nm'].isin(cases),'label'] = 0

Gets neighbors poly list.

In [None]:
poly_neighs = []
for i, row in poly_df[poly_df['msoa11nm'].isin(cases)].iterrows():

                # finds neighbors
                neigh = np.array(poly_df[poly_df.geometry.intersects(
                    row['geometry'])].index)
                # removes self intersections
                neigh = neigh[neigh != i]

                # adds neigh to neighs
                poly_neighs = np.concatenate([poly_neighs, neigh])

In [None]:
from scipy import spatial
#finds centroids
centroids = ball_df['geometry'].apply(
    lambda g:[g.centroid.x,g.centroid.y]).tolist()

#spatially organising the points on a tree to reduce runtime
kdtree = spatial.KDTree(centroids)

case_centroids = centroids = ball_df[ball_df['msoa11nm'].isin(cases)]['geometry'].apply(
    lambda g:[g.centroid.x,g.centroid.y]).tolist()

#calculates the nearest neighbors
neighs = kdtree.query_ball_point(case_centroids, r=0.03)

# remove self as neighbor
idx = poly_df[poly_df['msoa11nm'].isin(cases)].index
for i, neigh in enumerate(neighs):
    neigh.remove(idx[i])

ball_neighs = [neigh for case_neigh in neighs.tolist() for neigh in case_neigh]

In [None]:
#calculates the nearest neighbors
_ , neighs = kdtree.query(case_centroids, k=6 + 1)

# remove self as neighbor
neighs = neighs[:,1:]
k_neighs= neighs.flatten()

Label neighbors as 2.

In [None]:
poly_df.loc[poly_df.index.isin(poly_neighs),'label'] = 2
k_df.loc[k_df.index.isin(k_neighs),'label']  = 2
ball_df.loc[ball_df.index.isin(ball_neighs),'label']  = 2

Color map.

In [None]:
colormap = cm.linear.RdBu_03.to_step(index=np.arange(-0.5,3))
colormap

## Neighbour map.

Folium map.

In [None]:
def MSOA_Overlay(df, style_function, highlight_function, name):
    """
    Makes plot of a clustering.
    """
    Plot = folium.features.GeoJson(
    df,
    name=name,
    style_function=style_function,
    highlight_function=highlight_function, 
    tooltip=folium.features.GeoJsonTooltip(
        fields=['msoa11nm','pop','st_areasha','numneigh','label'],
        aliases=['MSOA Name','Population','Area','Number of Neighbors','label'],
        localize = True,
        style=("background-color: white; color: #333333; font-family: arial; font-size: 12px; padding: 10px;") 
    ))
    return Plot

#style function
style_function = lambda x: {"weight":0.4,
                            "color":'black',
                            "opacity":1,
                            "fillColor":colormap(x['properties']['label']), 
                            'fillOpacity':1}
#sets style of highlight                            
highlight_function = lambda x: {'fillColor': '#000000', 
                                'color':'#000000', 
                                'fillOpacity': 0.50}

# makes Choropleth layers
poly_layer = MSOA_Overlay(poly_df,
                        style_function,
                        highlight_function,
                        'Polygon Method')

k_layer = MSOA_Overlay(k_df,
                        style_function,
                        highlight_function,
                        'kNN')

ball_layer = MSOA_Overlay(ball_df,
                        style_function,
                        highlight_function,
                        'Epsilon Ball')

In [None]:
#creates map
m = folium.Map([54.5,-3],zoom_start=6.8,min_zoom=6,zoom_control=False,tiles='cartodbpositron')

#titles
title_html = '''
             <h3 align="center" style="font-size:16px"><b>{}</b></h3>
             '''.format('MSOA Neighbors Case Studies')   

# create feature group.
poly_fg = folium.FeatureGroup(name='Polygon Method')
m.add_child(poly_fg)

# add layer to fg
poly_layer.add_to(poly_fg)

# create feature group.
k_fg = folium.FeatureGroup(name='kNN',show=False)
m.add_child(k_fg)

# add layer to fg
k_layer.add_to(k_fg)

# create feature group.
ball_fg = folium.FeatureGroup(name='Epsilon Ball',show=False)
m.add_child(ball_fg)

# add layer to fg
ball_layer.add_to(ball_fg)

#adds title and color map to plot
m.get_root().html.add_child(folium.Element(title_html))

# layer control
m.add_child(folium.LayerControl(collapsed=False))

from folium.plugins import FloatImage
FloatImage('https://dommc97.github.io/Applied-Maths-MSc-Thesis/figures/Key.PNG',left=10,bottom=80).add_to(m)

# save and display
m.save(root  + 'Plots/neighbors.html')
m