In [1]:
# !pip install descartes
# !pip install geopandas
# !pip install pandas_bokeh

In [2]:
#!unzip FinalOutputs.zip

In [3]:
import geopandas as gpd
from shapely.geometry import Point
import random # library for random number generation
import numpy as np # library for vectorized computation
import pandas as pd # library to process data as dataframes

import pandas_bokeh # library for visualization dashboard
import matplotlib.pyplot as plt # plotting library
# backend for rendering plots within the browser
%matplotlib inline 

from sklearn.cluster import KMeans 
from sklearn.datasets.samples_generator import make_blobs
from bokeh.models.widgets import Panel, Tabs
from bokeh.io import output_file, show
from bokeh.plotting import figure
from bokeh.layouts import layout

print('Libraries imported.')


Libraries imported.


In [4]:
# Save final output to a web-based file
pandas_bokeh.output_file("Classification Visualization.html")

In [5]:
# Read files from local machine
ctstats = gpd.read_file('/Users/lirui/Desktop/ReginaOutputs/CT_Stats.shp')
dbstats = gpd.read_file('/Users/lirui/Desktop/ReginaOutputs/DB_Stats.shp')
gridstats = gpd.read_file('/Users/lirui/Desktop/ReginaOutputs/GRID_Stats.shp')

DriverError: /Users/lirui/Desktop/FeginaOutputs/GRID_Stats.shp: No such file or directory

# 1. Census Tract

In [None]:
ctstats.head()

### We need only the 5 statistics, the Census Tract ID and the geometry of the CT polygons

In [None]:
df_ct = ctstats.drop(['PRNAME','CMANAME','CMATYPE','area', 'BldgArea', 'BldgCount', 'ContCount'], axis=1)
df_ct = df_ct.dropna(axis='rows')
df_ct.head()

## 1.1 Classification Task on the Census Tract data

### Creating Standardized/normalized data for scale invariance as below:

### For every given variable x we standardize it as: $\frac{(x-\mu)}{\sigma}$ , where $\mu$ is the mean and $\sigma$ is standard deviation

In [None]:
from sklearn.preprocessing import StandardScaler #for standardization

X = df_ct.values[:,1:-1] #we are taking only the 5 statistics
cluster_dataset = StandardScaler().fit_transform(X) #creating standard scalar from unscaled data a 

### k-means clustering on Census Tract

In [None]:
k_means = KMeans(init = "k-means++", n_clusters = 3, n_init = 12)
# fit the X value to the model
k_means.fit(cluster_dataset)
labels = k_means.labels_

In [None]:
new_lab = []
for label in labels:
    new_lab.append("Class "+str(label))

In [None]:
new_lab = np.asarray(new_lab)

In [None]:
df_ct['Labels'] = new_lab
df_ct['LabelNum']=labels
df_ct.head()

In [None]:
df_ct['Labels'] = df_ct['Labels'].astype(object)

### Interpretation of Classes

In [None]:
CT_cluster0 = df_ct[df_ct.Labels=="Class 0"]

In [None]:
CT_cluster1 = df_ct[df_ct.Labels=="Class 1"]

In [None]:
CT_cluster2 = df_ct[df_ct.Labels=="Class 2"]

## 1.2 Visualization for classifications

### BCR visualization

In [None]:
ct_bcr = {
    'Class':['Class 1', 'Class 2', 'Class 3'],
    'Buliding Coverage Ratio': [CT_cluster0.median()["BCR"], CT_cluster1.median()["BCR"],CT_cluster2.median()["BCR"]]
  
}
df_ct_bcr = pd.DataFrame(ct_bcr).set_index("Class")

ct_bcr_bar = df_ct_bcr.plot_bokeh.bar(
    show_figure=False,
    figsize=(300, 200),  
    ylabel="value", 
    title="5 statistics for 3 classes",
    color="blue",
    alpha=0.6)

### BD visualization

In [None]:
ct_bd = {
    'Class':['Class 1', 'Class 2', 'Class 3'],
    'Buliding Density': [CT_cluster0.median()["BD"], CT_cluster1.median()["BD"],CT_cluster2.median()["BD"]]
  
}
df_ct_bd = pd.DataFrame(ct_bd).set_index("Class")

ct_bd_bar = df_ct_bd.plot_bokeh.bar(
    show_figure=False,
    figsize=(300, 200),
    ylabel="value", 
    title="5 statistics for 3 classes",
    color="purple",
    alpha=0.6)

### Proximity visualization

In [None]:
ct_p = {
    'Class':['Class 1', 'Class 2', 'Class 3'],
    'Proximity': [CT_cluster0.median()["ProxMean"], CT_cluster1.median()["ProxMean"],CT_cluster2.median()["ProxMean"]]
  
}
df_ct_p = pd.DataFrame(ct_p).set_index("Class")

ct_p_bar = df_ct_p.plot_bokeh.bar(
    figsize=(300, 200),
    show_figure=False, 
    ylabel="value", 
    title="5 statistics for 3 classes",
    color="Green",
    alpha=0.6)

### Size visualization

In [None]:
ct_s = {
    'Class':['Class 1', 'Class 2', 'Class 3'],
    'Average Size': [CT_cluster0.median()["AvgSize"], CT_cluster1.median()["AvgSize"],CT_cluster2.median()["AvgSize"]]
  
}
df_ct_s = pd.DataFrame(ct_s).set_index("Class")

ct_s_bar = df_ct_s.plot_bokeh.bar(
    show_figure=False,
    figsize=(300, 200), 
    ylabel="value", 
    title="5 statistics for 3 classes",
    color="Red",
    alpha=0.6)

### Contiguity Ratio visualization

In [None]:
ct_c = {
    'Class':['Class 1', 'Class 2', 'Class 3'],
    "Contiguity Ratio": [CT_cluster0.median()["ContRatio"], CT_cluster1.median()["ContRatio"],CT_cluster2.median()["ContRatio"]]
  
}
df_ct_c = pd.DataFrame(ct_c).set_index("Class")

ct_c_bar = df_ct_c.plot_bokeh.bar(
    figsize=(300, 200),
    show_figure=False, 
    ylabel="value", 
    title="5 statistics for 3 classes",
    color="Grey",
    alpha=0.6)

### Integrated visualization for census tract

In [None]:
Rct_class=df_ct.plot_bokeh(
    show_figure=False,
    figsize=(900, 600),
    category="LabelNum",
    show_colorbar=True,
    colormap="Viridis",
    hovertool_columns=['CTUID','Labels'],
    legend="Census Tract",
    toolbar_location="above",
    title="Classification for Census Tract")

In [None]:
ct = layout([[Rct_class],
                        [ct_bcr_bar, ct_bd_bar],
                        [ct_p_bar, ct_s_bar,ct_c_bar]],sizing_mode='fixed')

# 2. Dissemination Block

In [None]:
df_db = dbstats.drop(['PRNAME','CMANAME','CMATYPE','area', 'BldgArea', 'BldgCount', 'ContCount'], axis=1)

In [None]:
df_db = df_db.dropna(axis='rows')

In [None]:
df_db.head()

## 2.1 Classification Task on the Dissemination Block data

In [None]:
X = df_db.values[:,1:-1] #we are taking only the 5 statistics
cluster_dataset = StandardScaler().fit_transform(X) #creating standard scalar from unscaled data a 
cluster_dataset

In [None]:
k_means = KMeans(init = "k-means++", n_clusters = 3, n_init = 12)
# fit the X value to the model
k_means.fit(cluster_dataset)
labels = k_means.labels_

In [None]:
new_lab = []
for label in labels:
    new_lab.append("Class "+str(label))

In [None]:
new_lab = np.asarray(new_lab)

In [None]:
df_db['Labels'] = new_lab
df_db['LabelNum']=labels
df_db.head()

In [None]:
df_db['Labels'] = df_db['Labels'].astype(object)

### Interpretation of Classes

In [None]:
DB_cluster0 = df_db[df_db.Labels=="Class 0"]
DB_cluster1 = df_db[df_db.Labels=="Class 1"]
DB_cluster2 = df_db[df_db.Labels=="Class 2"]

## 2.2 Visualization for classification

### BCR visualization

In [None]:
db_bcr = {
    'Class':['Class 1', 'Class 2', 'Class 3'],
    'Buliding Coverage Ratio': [DB_cluster0.median()["BCR"], DB_cluster1.median()["BCR"],DB_cluster2.median()["BCR"]]
  
}
df_db_bcr = pd.DataFrame(db_bcr).set_index("Class")

db_bcr_bar = df_db_bcr.plot_bokeh.bar(
    show_figure=False, 
    figsize=(300,200),
    ylabel="value", 
    title="5 statistics for 3 classes",
    color="blue",
    alpha=0.6)

### BD visualization

In [None]:
db_bd = {
    'Class':['Class 1', 'Class 2', 'Class 3'],
    'Buliding Density': [DB_cluster0.median()["BD"], DB_cluster1.median()["BD"],DB_cluster2.median()["BD"]]
  
}
df_db_bd = pd.DataFrame(db_bd).set_index("Class")

db_bd_bar = df_db_bd.plot_bokeh.bar(
    show_figure=False,  
    figsize=(300,200),
    ylabel="value", 
    title="5 statistics for 3 classes",
    color="purple",
    alpha=0.6)

### Proximity visualization

In [None]:
db_p = {
    'Class':['Class 1', 'Class 2', 'Class 3'],
    'Proximity': [DB_cluster0.median()["ProxMean"], DB_cluster1.median()["ProxMean"],DB_cluster2.median()["ProxMean"]]
  
}
df_db_p = pd.DataFrame(db_p).set_index("Class")

db_p_bar = df_db_p.plot_bokeh.bar(
    show_figure=False, 
    figsize=(300,200),
    ylabel="value", 
    title="5 statistics for 3 classes",
    color="Green",
    alpha=0.6)

### Size visualization

In [None]:
db_s = {
    'Class':['Class 1', 'Class 2', 'Class 3'],
    'Average Size': [DB_cluster0.median()["AvgSize"], DB_cluster1.median()["AvgSize"],DB_cluster2.median()["AvgSize"]]
  
}
df_db_s = pd.DataFrame(db_s).set_index("Class")

db_s_bar = df_db_s.plot_bokeh.bar(
    show_figure=False, 
    figsize=(300,200),
    ylabel="value", 
    title="5 statistics for 3 classes",
    color="Red",
    alpha=0.6)

### Contiguity Ratio visualization

In [None]:
db_c = {
    'Class':['Class 1', 'Class 2', 'Class 3'],
    "Contiguity Ratio": [DB_cluster0.median()["ContRatio"], DB_cluster1.median()["ContRatio"],CT_cluster2.median()["ContRatio"]]
  
}
df_db_c = pd.DataFrame(db_c).set_index("Class")

db_c_bar = df_db_c.plot_bokeh.bar(
    show_figure=False, 
    figsize=(300,200),
    ylabel="value", 
    title="5 statistics for 3 classes",
    color="Grey",
    alpha=0.6)

### Integrated visualization for Dessimination Blocks

In [None]:
Rdb_class=df_db.plot_bokeh(
    show_figure=False,
    figsize=(900, 600),
    category="LabelNum",
    show_colorbar=True,
    colormap="Viridis",
    hovertool_columns=['DBUID','Labels'],
    legend="Dessimination Block",
    toolbar_location="above",
    title="Classification for Dessimination Block")

In [None]:
db = layout([[Rdb_class],
                        [db_bcr_bar, db_bd_bar],
                        [db_p_bar, db_s_bar,db_c_bar]],sizing_mode='fixed')

# 3. 1 km<sup>2</sup> Grid

In [None]:
df_grid = gridstats.drop(['area', 'BldgArea', 'BldgCount', 'ContCount'], axis=1)

In [None]:
df_grid = df_grid.dropna(axis='rows')

In [None]:
df_grid.head()

## 3.1 Classification Task on the Grid 1km^2 data

In [None]:
X = df_grid.values[:,1:-1] #we are taking only the 5 statistics
cluster_dataset = StandardScaler().fit_transform(X) #creating standard scalar from unscaled data a 

In [None]:
k_means = KMeans(init = "k-means++", n_clusters = 3, n_init = 12)
# fit the X value to the model
k_means.fit(cluster_dataset)
labels = k_means.labels_

In [None]:
new_lab = []
for label in labels:
    new_lab.append("Class "+str(label))

In [None]:
new_lab = np.asarray(new_lab)

In [None]:
df_grid['Labels'] = new_lab
df_grid['LabelNum']= labels
df_grid.head()

In [None]:
df_grid['Labels'] = df_grid['Labels'].astype(object)

## Interpretation of Classes

In [None]:
GRID_cluster0 = df_grid[df_grid.Labels=="Class 0"]
GRID_cluster1 = df_grid[df_grid.Labels=="Class 1"]
GRID_cluster2 = df_grid[df_grid.Labels=="Class 2"]

## 3.2 Visualization for classification

### BCR visualization

In [None]:
gd_bcr = {
    'Class':['Class 1', 'Class 2', 'Class 3'],
    'Buliding Coverage Ratio': [GRID_cluster0.median()["BCR"], GRID_cluster1.median()["BCR"],GRID_cluster2.median()["BCR"]]
  
}
df_gd_bcr = pd.DataFrame(gd_bcr).set_index("Class")

gd_bcr_bar = df_gd_bcr.plot_bokeh.bar(
    show_figure=False,   
    figsize=(300,200),
    ylabel="value", 
    title="5 statistics for 3 classes",
    color="blue",
    alpha=0.6)

### BD visualization

In [None]:
gd_bd = {
    'Class':['Class 1', 'Class 2', 'Class 3'],
    'Buliding Density': [GRID_cluster0.median()["BD"], GRID_cluster1.median()["BD"],GRID_cluster2.median()["BD"]]
  
}
df_gd_bd = pd.DataFrame(gd_bd).set_index("Class")

gd_bd_bar = df_gd_bd .plot_bokeh.bar(
    figsize=(300,200),
    show_figure=False,  
    ylabel="value", 
    title="5 statistics for 3 classes",
    color="purple",
    alpha=0.6)

### Proximity visualization

In [None]:
gd_p = {
    'Class':['Class 1', 'Class 2', 'Class 3'],
    'Proximity': [GRID_cluster0.median()["ProxMean"], GRID_cluster1.median()["ProxMean"],GRID_cluster2.median()["ProxMean"]]
  
}
df_gd_p = pd.DataFrame(gd_p).set_index("Class")

gd_p_bar = df_gd_p.plot_bokeh.bar(
    figsize=(300,200),
    show_figure=False, 
    ylabel="value", 
    title="5 statistics for 3 classes",
    color="Green",
    alpha=0.6)

### Size visualization

In [None]:
gd_s = {
    'Class':['Class 1', 'Class 2', 'Class 3'],
    'Average Size': [GRID_cluster0.median()["AvgSize"], GRID_cluster1.median()["AvgSize"],GRID_cluster2.median()["AvgSize"]]
  
}
df_gd_s = pd.DataFrame(gd_s).set_index("Class")

gd_s_bar = df_gd_s.plot_bokeh.bar(
    show_figure=False, 
    figsize=(300,200),
    ylabel="value", 
    title="5 statistics for 3 classes",
    color="Red",
    alpha=0.6)

### Contiguity Ratio

In [None]:
gd_c = {
    'Class':['Class 1', 'Class 2', 'Class 3'],
    "Contiguity Ratio": [GRID_cluster0.median()["ContRatio"], GRID_cluster1.median()["ContRatio"],GRID_cluster2.median()["ContRatio"]]
  
}
df_gd_c = pd.DataFrame(gd_c).set_index("Class")

gd_c_bar = df_gd_c.plot_bokeh.bar(
    figsize=(300,200),
    show_figure=False, 
    ylabel="value", 
    title="5 statistics for 3 classes",
    color="Grey",
    alpha=0.6)

### Integrated visualization for Grid 1km^2

In [None]:
Rgd_class=df_grid.plot_bokeh(
    show_figure=False,
    figsize=(900, 600),
    category="LabelNum",
    show_colorbar=True,
    colormap="Viridis",
    hovertool_columns=['id','Labels'],
    legend="1km^2 Grid",
    toolbar_location="above",
    title="Classification for 1km^2 Grid")

In [None]:
gd=layout([[Rgd_class],
                        [gd_bcr_bar, gd_bd_bar],
                        [gd_p_bar, gd_s_bar,gd_c_bar]], sizing_mode='fixed')

# 3. Integrated visualization for 3 Geographic Units

In [None]:
tab1 = Panel(child=ct,title="Census Tract")
tab2 = Panel(child=db,title="Dissimination Classification")
tab3 = Panel(child=gd,title="Grid 1km^2 Classification")
tabs = Tabs(tabs=[ tab1 ,tab2, tab3])

In [None]:
p1 = layout([[Rdb_class],
                        [db_bcr_bar, db_bd_bar],
                        [db_p_bar, db_s_bar,db_c_bar]],sizing_mode='fixed')



In [None]:
show(tabs)