# All Methods

Plots GAM across time for all methods.

## Google Drive

In [None]:
# mount Google Drive
from google.colab import drive
drive.mount('/content/drive')
root = '/content/drive/My Drive/Project/'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Libraries

In [None]:
# standard libraries
import numpy as np
import pandas as pd

In [None]:
%%capture
# geopandas install
import os
!curl -L http://download.osgeo.org/libspatialindex/spatialindex-src-1.8.5.tar.gz | tar xz
os.chdir('/content/spatialindex-src-1.8.5')
!./configure
!make
!make install
!ldconfig
!pip install descartes
!pip install rtree
!pip install geopandas

# geopandas import
import geopandas as gpd

In [None]:
%%capture
# bokeh import
from bokeh.models import HoverTool, ColumnDataSource, LinearAxis, Range1d, LogAxis
from bokeh.plotting import figure, output_file, show
# allows visualisation in notebook
from bokeh.io import output_notebook
from bokeh.resources import INLINE
output_notebook(INLINE)

## Classes

In [None]:
# import classes
import sys
sys.path.append(root + 'Classes')
from Stability_class import Stability
from GAM_class import GAM
from Benchmark_class import Benchmark

## Data

In [None]:
MSOAs = gpd.read_file(root + 'MSOAs/MSOAs.shp')
print('Shape: ',MSOAs.shape)
MSOAs.head()

Shape:  (6790, 6)


Unnamed: 0,msoa11cd,msoa11nm,st_areasha,pop,con_trust,geometry
0,E02000001,City of London 001,2983633.0,6031.0,0,"POLYGON ((-0.09276 51.52139, -0.08813 51.51941..."
1,E02000002,Barking and Dagenham 001,2091907.0,7131.0,0,"POLYGON ((0.14112 51.58054, 0.13788 51.57812, ..."
2,E02000003,Barking and Dagenham 002,2122216.0,10437.0,0,"POLYGON ((0.14838 51.58075, 0.14698 51.57568, ..."
3,E02000004,Barking and Dagenham 003,2569470.0,6393.0,0,"POLYGON ((0.19018 51.55268, 0.18600 51.54753, ..."
4,E02000005,Barking and Dagenham 004,1111109.0,9116.0,0,"POLYGON ((0.15043 51.56561, 0.14998 51.56138, ..."


In [None]:
stability_data = Stability(root + 'Stability Data/longrun.mat')

# N x T array of cluster labels.
C = stability_data.C
# Array of number of communities.
k = stability_data.k
# Array of Markov times
times = stability_data.t
# Array of Variation of Information.
VI = stability_data.VI

## Benchmarks

In [None]:
poly_bench = Benchmark(MSOAs)
k_bench= Benchmark(MSOAs)
ball_bench = Benchmark(MSOAs)

In [None]:
%%time
# Calculates the expected GAM
poly_bench.get_mu(k)
# Obtains mu
poly_mu = poly_bench.mu

# Calculates the expected GAM
ball_bench.get_mu(k)
# Obtains mu
ball_mu = ball_bench.mu

# Calculates the expected GAM
k_bench.get_mu(k)
# Obtains mu
k_mu = k_bench.mu

CPU times: user 362 µs, sys: 0 ns, total: 362 µs
Wall time: 312 µs


## GAM Scores

In [None]:
%%time
# get stability dataframe
stability_df = stability_data.cluster_df(MSOAs,'all')

# get GAM scores across time
poly_scores = GAM(stability_df).GAM_scores()

# get GAM scores across time
k_scores = GAM(stability_df).GAM_scores()

# get GAM scores across time
ball_scores = GAM(stability_df).GAM_scores()

What method 'poly', 'ball' or 'k'?
poly
What method 'poly', 'ball' or 'k'?
K
Invalid Method


TypeError: ignored

## Plot

In [None]:
# root to name and store html file
output_file(root + 'Plots/MS-Bench.html',mode='inline')

# change datatype to allow hover functionality
k_source = ColumnDataSource(data=dict(
    time=times,
    k=k,
))

poly_GAM_source = ColumnDataSource(data=dict(
    time=times,
    GAM=poly_scores - poly_mu,
))

k_GAM_source = ColumnDataSource(data=dict(
    time=times,
    GAM=k_scores - k_mu,
))

ball_GAM_source = ColumnDataSource(data=dict(
    time=times,
    GAM=ball_scores - ball_mu,
))

# create figure
plot = figure(title='GAM Score - \u03BC for Clusterings Found Across Markov Time',
              toolbar_location='below',
              x_axis_label='Markov Time',
              x_axis_type='log',
              y_axis_type='log',
              y_axis_label = 'GAM - \u03BC',
              y_range=(np.min(poly_scores)*0.98,np.max(poly_scores)*1.02),
              plot_height=600,
              plot_width=800)
plot.title.text_font_size = '16pt'
plot.axis.axis_label_text_font_size = '12pt'
plot.axis.major_label_text_font_size = '10pt'
plot.yaxis.axis_label_text_color = 'blue'

# setting second y axis range name and range
plot.extra_y_ranges = {'clusters': Range1d(np.min(k)*0.98,np.max(k)*1.02)}

# adding the second axis to the plot.  
plot.add_layout(LogAxis(y_range_name='clusters',axis_label='Number of Communities',axis_label_text_color='red'), 'right')

# make plots
plot1 = plot.line('time','GAM',source=poly_GAM_source,legend_label='Polygon')
plot2 = plot.line('time','GAM',source=k_GAM_source,line_color='magenta',legend_label='kNN')
plot3 = plot.line('time','GAM',source=ball_GAM_source,line_color='green',legend_label='\u03B5-ball')
plot.add_tools(HoverTool(renderers=[plot1],tooltips=[('Markov time: ','@time'),('GAM - \u03BC: ','@GAM')],mode='vline'))
plot.add_tools(HoverTool(renderers=[plot2],tooltips=[('Markov time: ','@time'),('GAM - \u03BC: ','@GAM')],mode='vline'))
plot.add_tools(HoverTool(renderers=[plot3],tooltips=[('Markov time: ','@time'),('GAM - \u03BC: ','@GAM')],mode='vline'))

plot4 = plot.line('time','k',line_color='red',y_range_name='clusters',source=k_source)
plot.add_tools(HoverTool(renderers=[plot4],tooltips=[('Markov time: ','@time'),('Number of Communities: ','@k')],mode='vline'))

plot.legend.location = 'top_left'

show(plot)

Output hidden; open in https://colab.research.google.com to view.