In [33]:
import numpy
import numpy as np
from joblib import dump, load
import scipy.stats
import matplotlib.pyplot as plt

import mpld3
mpld3.enable_notebook()

# Hide all matplotlib figures
#plt.ioff()

np.random.seed(42)

plt.rcParams["figure.figsize"] = (20,3)

In [34]:
from utils import load_data
x, y = load_data()

In [35]:
closer_idx = np.where((np.sum(np.abs(x - [51.49354491805622, -2.618626674263542]), axis=1) < 0.01) & y==1)[0]
print(closer_idx)
x = np.delete(x, closer_idx, axis=0)
y = np.delete(y, closer_idx)

[]


In [36]:
closer_idx = np.where((np.sum(np.abs(x - [51.49354491805622, -2.618626674263542]), axis=1) < 0.01) & y==1)[0]
print(closer_idx)

[]


In [37]:
print(x.shape)
print(y.shape)

(5509, 2)
(5509,)


In [38]:
from utils import ipyleaflet_heatmap_per_class

m = ipyleaflet_heatmap_per_class(x, y, center=(53, -2), zoom=5.5)
display(m)

Map(center=[53, -2], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoom_out_tex…

In [39]:
from utils import ipyleaflet_scatterplot_per_class

m = ipyleaflet_scatterplot_per_class(x, y, center=(53, -2), zoom=5.5, proportion=0.1)
display(m)

Map(center=[53, -2], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoom_out_tex…

In [40]:
lon_local_grid = numpy.linspace(-2.70, -2.45, 500)
lat_local_grid = numpy.linspace(51.3, 51.55, 500)
lat_local, lon_local = numpy.meshgrid(lat_local_grid, lon_local_grid)

In [41]:
local_idx = (numpy.sqrt((x[:, 0] - 51.4545) ** 2 + (x[:, 1] + 2.5879) ** 2) <= 0.1)  

In [42]:
bristol_center = (51.4545, -2.5879)
m = ipyleaflet_scatterplot_per_class(x[local_idx], y[local_idx], center=bristol_center, zoom=11, proportion=1.0)
display(m)

Map(center=[51.4545, -2.5879], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zo…

In [43]:
x_local = x[local_idx, :]
y_local = y[local_idx]

xlim = (x_local[:,0].min(), x_local[:,0].max())
ylim = (x_local[:,1].min(), x_local[:,1].max())

In [44]:
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from utils import KDE

In [45]:
#mdl = SVC(probability=True)
#mdl = SVC(gamma=1e4, C=1e4, kernel='rbf', probability=True)
#mdl = LogisticRegression(C=1e1000, solver='liblinear', max_iter=100)
#mdl = DecisionTreeClassifier(min_samples_leaf=7)
#mdl = KNeighborsClassifier(n_neighbors=5)
#mdl = RandomForestClassifier()
mdl = KDE(bandwidth=0.01)
mdl.fit(x[local_idx], y[local_idx])
dump(mdl, './local_mdl.joblib')
predicted_class_local = mdl.predict(x_local)
print('Training accuracy = {:0.3f}'.format(np.mean(predicted_class_local == y_local)))
print('Count of predicted classes')
print(np.unique(predicted_class_local, return_counts=True))

Training accuracy = 0.924
Count of predicted classes
(array([0, 1]), array([54, 12]))


In [46]:
p_local = mdl.predict_proba(numpy.hstack([lat_local.reshape(-1, 1), 
                                         lon_local.reshape(-1, 1)]))[:, 1].reshape(lat_local.shape[0],
                                                                                   lon_local.shape[0])

In [47]:
plt.figure(dpi=128, figsize=(3, 3))
im = plt.imshow(p_local.transpose(), origin='lower', #cmap='viridis',
                         extent=[-2.70, -2.45, 51.3, 51.55],
          cmap='bwr')#, vmin=0, vmax=1)
plt.scatter(x[local_idx,1], x[local_idx,0], c=y[local_idx], edgecolors='k', cmap='bwr', s=1)
plt.colorbar(im)

<matplotlib.colorbar.Colorbar at 0x7f3c70ee9e20>

In [48]:
isolines = [0, 0.2, 0.4, 0.5, 0.6, 0.8, 1]

In [49]:
from utils import mpl_scatter_contourlines

In [50]:
# Extension of the largest grid
# About full great britain 
lon_all_grid = numpy.linspace(-6, 2, 700)
lat_all_grid = numpy.linspace(50, 58, 700)
# Small area round Bristol
lon_all_grid = numpy.linspace(-2.1, -3.1, 700)
lat_all_grid = numpy.linspace(51.1, 51.7, 700)
lat_all, lon_all = numpy.meshgrid(lat_all_grid, lon_all_grid)

In [51]:
probabilities = mdl.predict_proba(numpy.hstack([lat_local.reshape(-1, 1), 
                                                    lon_local.reshape(-1, 1)]))[:, 1].reshape(lat_local.shape[0], 
                                                                                             lon_local.shape[0])
np.histogram(probabilities)

(array([127560,  26678,  25392,  19014,  19055,  19023,   6061,   4846,
          1797,    574]),
 array([0.00152684, 0.08197856, 0.16243028, 0.242882  , 0.32333372,
        0.40378544, 0.48423716, 0.56468888, 0.6451406 , 0.72559232,
        0.80604404]))

In [52]:
from utils import mpl_scatter_contourf

In [53]:
# TODO Can we incorporate this call into ipyleaflet_contourmap without automatic ploting?
fig, ax, contourmap = mpl_scatter_contourf(mdl, lat_all, lon_all, x[local_idx], y[local_idx],
                                           xlim=xlim, ylim=ylim, isolines=isolines)

In [54]:
from utils import reversed_colormap
from utils import ipyleaflet_contourmap
from branca.colormap import linear

ipyleaflet_contourmap(center=bristol_center,
                      datapoints=[x[local_idx], y[local_idx]],
                      contourmap=contourmap,
                      isolines=isolines,
                      colormap=reversed_colormap(linear.RdBu_05), legend_title='Prob. Up',
                      zoom=11, fillopacity=0.7)

Map(center=[51.4545, -2.5879], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zo…

In [55]:
from sklearn.mixture import GaussianMixture

x_local = x[local_idx, :]
y_local = y[local_idx]
x_back = x[~local_idx, :]
y_back = y[~local_idx]

local_gmm = GaussianMixture(n_components=4, covariance_type='spherical')
local_gmm.fit(x_local)
back_gmm = GaussianMixture(n_components=32, covariance_type='spherical')
back_gmm.fit(x_back)

GaussianMixture(covariance_type='spherical', n_components=32)

In [56]:
pi = numpy.array([numpy.sum(local_idx), numpy.sum(~local_idx)])
pi = pi / numpy.sum(pi)

local_d = np.exp(local_gmm.score_samples(numpy.hstack([lat_all.reshape(-1, 1), lon_all.reshape(-1, 1)])))
back_d =  np.exp(back_gmm.score_samples(numpy.hstack([lat_all.reshape(-1, 1), lon_all.reshape(-1, 1)])))

p_back = (pi[1] * back_d / (pi[1] * back_d + pi[0] * local_d)).reshape(lat_all.shape[0], lon_all.shape[0])
p_local = 1 - p_back

In [57]:
import mpld3

mpld3.enable_notebook()
#isolines = np.linspace(0, 1, 100)
fig, ax = plt.subplots(1, figsize=(12, 9))
contourmap = ax.contourf(lat_all, lon_all,  p_local, isolines, alpha=0.8)
ax.scatter(x[local_idx,0], x[local_idx,1], c=y[local_idx], cmap='bwr')
fig.colorbar(contourmap, ax=ax)
ax.set_xlim(xlim)
ax.set_ylim(ylim)

(-2.6789896, -2.4918634)

In [58]:
ipyleaflet_contourmap(center=bristol_center, 
                      datapoints=[x[local_idx], y[local_idx]],
                      contourmap=contourmap,
                      isolines=isolines,
                      colormap=linear.viridis, legend_title='Prob. Foreground')

Map(center=[51.4545, -2.5879], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zo…

# Probability of class 1 and local

In [59]:
prob_per_class = mdl.predict_proba(numpy.hstack([lat_all.reshape(-1, 1), 
                                                lon_all.reshape(-1, 1)]))

p1 = prob_per_class[:, 1].reshape(lat_all_grid.shape[0], lon_all_grid.shape[0])
p0 = prob_per_class[:, 0].reshape(lat_all_grid.shape[0], lon_all_grid.shape[0])

p1_not_back = p1 * p_local
p0_not_back = p0 * p_local

In [60]:
import mpld3
mpld3.enable_notebook()
fig, ax = plt.subplots(1, figsize=(12, 9))
contourmap_c1_fg = ax.contourf(lat_all, lon_all,  p1_not_back, isolines, alpha=0.8)
ax.scatter(x[local_idx,0], x[local_idx,1], c=y[local_idx], cmap='bwr')
fig.colorbar(contourmap, ax=ax)
ax.set_xlim(xlim)
ax.set_ylim(ylim)

(-2.6789896, -2.4918634)

In [61]:
from utils import colormap_alpha

Alpha_Reds_08 = colormap_alpha(linear.Reds_08)

ipyleaflet_contourmap(center=bristol_center,
                      datapoints=[x[local_idx], y[local_idx]],
                      contourmap=contourmap_c1_fg, 
                      isolines=isolines,
                      colormap=Alpha_Reds_08, legend_title='Up & Foreground',
                      fillopacity=0.7)

Map(center=[51.4545, -2.5879], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zo…

In [62]:
import mpld3
mpld3.enable_notebook()
fig, ax = plt.subplots(1, figsize=(12, 9))
contourmap_c0_fg = ax.contourf(lat_all, lon_all,  p0_not_back, isolines, alpha=0.8)
ax.scatter(x[local_idx,0], x[local_idx,1], c=y[local_idx], cmap='bwr')
fig.colorbar(contourmap, ax=ax)
ax.set_xlim(xlim)
ax.set_ylim(ylim)

(-2.6789896, -2.4918634)

In [63]:
Alpha_blues_09 = colormap_alpha(linear.Blues_09)

ipyleaflet_contourmap(center=bristol_center,
                      datapoints=[x[local_idx], y[local_idx]],
                      contourmap=contourmap_c0_fg, 
                               isolines=isolines,
                               colormap=Alpha_blues_09, legend_title='Dn & Foreground')

Map(center=[51.4545, -2.5879], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zo…

In [64]:
Alpha_Reds_08 = colormap_alpha(linear.Reds_08)
m = ipyleaflet_contourmap(center=bristol_center, 
                          datapoints=None,
                          contourmap=contourmap_c1_fg,
                      isolines=isolines,
                      colormap=Alpha_Reds_08, legend_title='Up & Foreground')

Alpha_blues_09 = colormap_alpha(linear.Blues_09)
ipyleaflet_contourmap(center=bristol_center, 
                      datapoints=[x[local_idx], y[local_idx]],
                      contourmap=contourmap_c0_fg, 
                               isolines=isolines,
                               colormap=Alpha_blues_09, legend_title='Dn & Foreground',
                      m=m)

Map(center=[51.4545, -2.5879], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zo…