In [None]:
import geopandas as gpd
import pandas as pd
from shapely import geometry
from skimage import measure
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from scipy.spatial import (
    Voronoi,
    voronoi_plot_2d,
    Delaunay,
    delaunay_plot_2d,
    cKDTree
)
from scipy.spatial.distance import cdist
import numpy as np
import math, time, random
import interpolators
import itertools
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error

plt.rcParams["figure.figsize"] = 30, 20
plt.rcParams["font.size"] = 20
plt.rcParams["axes.titlesize"] = 50
plt.rcParams["axes.titlepad"] = 80

def get_polygons_per_zone_plt(xnew, ynew, interpolated_values, zones):
    fig, ax = plt.subplots()
    contour = ax.contourf(xnew, ynew, interpolated_values, zones, cmap="winter_r")
    plt.close()

    polygons_per_zone = []

    for col in contour.collections:
        zone_polygons = []
        # Loop through all polygons that have the same intensity level
        for contour_path in col.get_paths():

            # Create the polygon for this intensity level
            # The first polygon in the path is the main one, the following ones are "holes"
            poly = None
            for idx, poly_coords in enumerate(contour_path.to_polygons()):
                poly_coords = np.array(poly_coords)
                x = poly_coords[:, 0]
                y = poly_coords[:, 1]

                new_shape = geometry.Polygon(
                    [(point[0], point[1]) for point in zip(x, y)]
                )

                if idx == 0:
                    poly = new_shape
                else:
                    # Remove the holes if there are any
                    poly = poly.difference(new_shape)
                    # Can also be left out if you want to include all rings

            if poly is not None:
                zone_polygons.append(poly)
        polygons_per_zone.append(zone_polygons)
    return polygons_per_zone

def get_polygons_per_zone(xnew, ynew, interpolated_values, zones):
    xmin = np.min(xnew)
    xmax = np.max(xnew)
    ymin = np.min(ynew)
    ymax = np.max(ynew)
    scale_x = lambda x: xmin + (xmax-xmin)/len(xnew)*(x+0.5)
    scale_y = lambda y: ymin + (ymax-ymin)/len(ynew)*(y+0.5)

    polygons_per_zone = []

    # Iterate in reverse to go from most inner zones to outer zones
    # Makes it easier for hole detections
    for zone, zone_limit in enumerate(zones[::-1]):
        contours = measure.find_contours(interpolated_values, zone_limit)
        contour_polygons = list(map(lambda c: geometry.Polygon(zip(scale_x(c[:, 1]), scale_y(c[:, 0]))), contours))
        
        previous_polygons = list(itertools.chain(*polygons_per_zone))
        zone_polygons = []
        holes = []

        for p1 in contour_polygons:
            if p1 in holes:
                continue

            # Check for holes in this current contour
            for p2 in contour_polygons:
                if p1 == p2:
                    continue

                if p1.contains(p2):
                    p1 = p1.difference(p2)
                    holes.append(p2)
            
            # Check if inner contours are holes in current polygon
            for p2 in previous_polygons:
                if p1.contains(p2):
                    p1 = p1.difference(p2)
                    holes.append(p2)
            
            zone_polygons.append(p1)
        polygons_per_zone.append(zone_polygons)
    # Reverse again to return polygons in same order as input zones
    return polygons_per_zone[::-1]

def plot_polygons(polygons):
    polygon_df = gpd.GeoDataFrame()
    for polygon in polygons:
        temp_df = gpd.GeoDataFrame({"geometry": [polygon]})
        polygon_df = pd.concat([polygon_df, temp_df])
    polygon_df.plot()

def get_cmap_colors(cmap_name, n, rgb=True):
    cmap = cm.get_cmap(cmap_name, n)    # PiYG

    colors = []
    for i in range(cmap.N):
        rgb_values = cmap(i)[:3] # will return rgba, we take only first 3 so we get rgb
        if rgb:
            colors.append(",".join(list(map(str,rgb_values))))
        else:
            colors.append(mpl.colors.rgb2hex(rgb_values))
    return colors

external_crs = "EPSG:4326"
internal_crs = "EPSG:3068"
berlin_districts = gpd.read_file("../shared/berlinDistricts.geojson")
# measurements = gpd.read_file("raw-test/data_2020-02-12T14-00-00.geojson")
# measurements = gpd.read_file("meeting-test/data_2020-03-02T22-00-00.geojson")
# measurements = gpd.read_file("meeting-test/data_2020-03-02T03-00-00.geojson")
measurements = gpd.read_file("backup/data_2020-02-02T21-00-00.geojson")

berlin_districts = berlin_districts.to_crs(internal_crs)
measurements = measurements.to_crs(internal_crs)

x = np.array(measurements.geometry.x)
y = np.array(measurements.geometry.y)
values = np.array(measurements.value)
points = np.column_stack((x, y))

xmin, ymin, xmax, ymax = berlin_districts.total_bounds
size = 100  # grid cell size in meters
xnew = np.linspace(xmin, xmax, int((xmax - xmin) / size))
ynew = np.linspace(ymin, ymax, int((ymax - ymin) / size))
zones = [0, 20, 35, 50, 100, 1000]

In [None]:
from sklearn.kernel_ridge import KernelRidge

def kernel_regression(x, y, points, values, grid=True, k=None, kernel="gaussian"):
    points = interpolators.regularize_points(points)

    tree = cKDTree(points)

    if grid:
        meshgrid = np.meshgrid(x, y)
        point_matrix = np.reshape(meshgrid, (2, -1)).T
    else:
        point_matrix = np.column_stack((x, y))

    kr = KernelRidge(kernel="rbf")
    kr.fit(points, values)
    y_kr = kr.predict(point_matrix)

    y_kr = y_kr.reshape(meshgrid[0].shape)
    print(y_kr)
    print(y_kr.shape)

    return y_kr


    k = k or len(points)
    distances, idx = tree.query(point_matrix, k=k)

    if len(idx.shape) == 1:
        distances = np.atleast_2d(distances).reshape((-1, 1))
        idx = np.atleast_2d(idx).reshape((-1, 1))

    bandwidth = 0.1
    # distances = distances / bandwidth
    
    kernels = {
        "gaussian": lambda x: 1 / np.sqrt(2 * np.pi) * np.exp(-x ** 2 / 2),
        "cauchy": 1,
        "epanechnikov": 1,
        "uniform": 1
    }

    weights = kernels[kernel](distances)
    weights_sum = np.sum(weights, axis=1)
    neighbor_values = values[idx.ravel()].reshape(idx.shape)
    
    estimation = np.sum(weights * neighbor_values, axis=1) / weights_sum

    print(np.max(weights))

    print(estimation)

    if grid:
        return estimation.reshape(meshgrid[0].shape)
    else:
        return estimation

kernel_regression(xnew, ynew, points, values)

In [None]:
# interpolated_values = interpolators.nearest_neighbor(xnew, ynew, points, values)
# interpolated_values = interpolators.natural_neighbor(xnew, ynew, points, values)
# interpolated_values = interpolators.discrete_natural_neighbor(xnew, ynew, points, values)
# interpolated_values = interpolators.inverse_distance_weighting(xnew, ynew, points, values)
# interpolated_values = interpolators.radial_basis_function(xnew, ynew, points, values, function="linear")
# interpolated_values = interpolators.kriging(xnew, ynew, points, values, krige_type="ordinary", nlags=100)

interpolated_values = kernel_regression(xnew, ynew, points, values)

print(interpolated_values)

In [None]:
start = time.time()
# polygons_per_zone = get_polygons_per_zone_plt(xnew, ynew, interpolated_values, zones)
polygons_per_zone = get_polygons_per_zone(xnew, ynew, interpolated_values, zones)
print(time.time()-start)

for x in polygons_per_zone:
    print(len(x))

# print(polygons_per_zone)
# plot_polygons(polygons_per_zone[1])

In [None]:
# Plot Berlin Boundaries with Measurements

fig, ax = plt.subplots()
ax.set_title("Berlin Districts with Measurements")
# ax.get_xaxis().set_visible(False)
# ax.get_yaxis().set_visible(False)
berlinPlot = berlin_districts.boundary.plot(ax=ax, edgecolor="black")
measurements.plot(ax=ax, column="value", legend=True, cmap="winter_r", markersize=100)

In [None]:
# Plot Berlin boundaries with interpolation grid

fig, ax = plt.subplots()
ax.set_title("Berlin Districts with Interpolation Grid")
berlin_districts.boundary.plot(ax=ax, edgecolor="black")
xx, yy = np.meshgrid(xnew,ynew)
ax.scatter(xx, yy, s=1)

In [None]:
# Plot Berlin with Voronoi diagram

voronoi = Voronoi(points)

fig, ax = plt.subplots()
berlin_districts.boundary.plot(ax=ax, edgecolor="gray")
voronoi_plot_2d(voronoi, ax=ax, show_vertices=False, show_points=False, line_colors='black', line_width=1.5)
measurements.plot(ax=ax, column="value", legend=True, cmap="winter_r", markersize=50)

# ax.set_title("Berlin Districts with Voronoi Diagram")
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
ax.set_xlim([15000,21000])
ax.set_ylim([18000,23000])

fig.set_dpi(100)
fig.savefig("voronoi.png", bbox_inches="tight")

In [None]:
# Plot Berlin with Delauny diagram

delauny = Delaunay(points)

fig, ax = plt.subplots()
ax.set_title("Berlin Districts with Delauny Diagram")
berlin_districts.boundary.plot(ax=ax, edgecolor="black")
delaunay_plot_2d(delauny, ax=ax)

In [None]:
from sklearn.preprocessing import scale

fix, ax = plt.subplots()
ax.hist(values)
ax.hist(scale(values))

In [None]:
# Variogram Cloud

from scipy.spatial.distance import pdist, squareform

p_distances = pdist(points)

v_distances = 1/2 * (pdist(values.reshape(-1, 1)) ** 2)
fig, ax = plt.subplots()
ax.scatter(p_distances, v_distances)
ax.set_xlabel("h (lag)", fontsize=20)
ax.set_ylabel(r'$\gamma(h)$', fontsize=20)

In [None]:
# Histogram of value distances

fig, ax = plt.subplots()
ax.hist(v_distances)

In [None]:
# Experimental Variogram

bins = 10
n, bin_edges = np.histogram(p_distances, bins=bins)
summed_distances_per_bin, bin_edges = np.histogram(p_distances, bins=bins, weights=v_distances)
mean = summed_distances_per_bin / n

fig, ax = plt.subplots()
ax.scatter((bin_edges[1:] + bin_edges[:-1])/2, mean)

In [None]:
# Interpolated grid points in color

grid = np.meshgrid(xnew, ynew)
new_points = np.reshape(grid, (2, -1)).T

fig, ax = plt.subplots()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
berlinPlot = berlin_districts.boundary.plot(ax=ax, edgecolor="black")
scatter = ax.scatter(new_points[:,0], new_points[:,1], s=1, c=interpolated_values.ravel(), cmap="winter_r")
scatter.cmap.set_under("w")
plt.colorbar(scatter)
# fig.set_size_inches(30*2, 20*2)
fig.set_dpi(100)
fig.savefig("interpolated-grid.png", bbox_inches='tight')
# scatter.set_clim(zones[1])

In [None]:
# Colored grid with interpolation as image

fig, ax = plt.subplots()
img = ax.imshow(interpolated_values, origin="lower", cmap="Reds", extent=[xmin, xmax, ymin, ymax])
img.cmap.set_under("w")
img.set_clim(zones[1])

In [None]:
# Contours using matplotlib

fig, ax = plt.subplots()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)

berlin_districts.boundary.plot(ax=ax, edgecolor="black")

contour = ax.contour(xnew, ynew, interpolated_values, zones, linewidths=1)
contourf = ax.contourf(xnew, ynew, interpolated_values, zones[0:-1], cmap="winter_r")

contourf.cmap.set_under("w")
contourf.set_clim(zones[1])
fig.colorbar(contourf, ax=ax)

fig.set_dpi(100)
fig.savefig("extracted-zones.png", bbox_inches='tight')

In [None]:
# Contours using skimage and marching squares

from skimage import measure

fig, ax = plt.subplots()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)

berlin_districts.boundary.plot(ax=ax, edgecolor="black")

img = ax.imshow(interpolated_values, origin="lower", cmap="winter_r", extent=[xmin, xmax, ymin, ymax])
img.cmap.set_under("w")
img.set_clim(zones[1])

scale_x = lambda x: xmin + (xmax-xmin)/len(xnew)*(x+0.5)
scale_y = lambda y: ymin + (ymax-ymin)/len(ynew)*(y+0.5)

colors = get_cmap_colors("winter_r", len(zones))
for i, zone in enumerate(zones):
    contours = measure.find_contours(interpolated_values, zone)
    color = tuple(map(float, colors[i].split(",")))
    print(color)
    for n, contour in enumerate(contours):
        ax.plot(scale_x(contour[:, 1]), scale_y(contour[:, 0]), linewidth=1, color=color)

fig.colorbar(img)
fig.set_dpi(100)
fig.savefig("extracted-zones.png", bbox_inches='tight')

In [None]:
from pykrige.ok import OrdinaryKriging
from pykrige.uk import UniversalKriging


def kriging(x, y, points, values, nlags=10, cv=False, krige_type="ordinary"):
    points = interpolators.regularize_points(points)

    if cv:
        print("Doing CV to determine best number of lags...")
        folds = 10
        seed = random.randint(0,9999)
        kfold = KFold(folds, True, seed)
        avg_rmse_per_lag = {}
        for lags in range(2, 101):
            sum_rmse = 0
            for train, test in kfold.split(values):
                train_points = points[train]
                train_values = values[train]
                test_points = points[test]
                test_values = values[test]

                krige_interpolator = None
                if krige_type == "ordinary":
                    krige_interpolator = OrdinaryKriging(train_points[:, 0], train_points[:, 1], train_values, nlags=lags)
                
                if krige_type == "universal":
                    krige_interpolator = OrdinaryKriging(train_points[:, 0], train_points[:, 1], train_values, nlags=lags) 

                result = krige_interpolator.execute('points', test_points[:, 0], test_points[:, 1])
                rmse = mean_squared_error(test_values, result[0])
                sum_rmse +=rmse
            
            avg_rmse = sum_rmse/folds
            avg_rmse_per_lag[lags] = avg_rmse

        print("Done")
        nlags = min(avg_rmse_per_lag, key=avg_rmse_per_lag.get)
        print(f"Winning lag: {nlags}")
        print(f"Avg RMSE: {avg_rmse_per_lag[nlags]}")

    krige_interpolator = None
    if krige_type == "ordinary":
        krige_interpolator = OrdinaryKriging(points[:, 0], points[:, 1], values, nlags=nlags, verbose=True)

    if krige_type == "universal":
        krige_interpolator = OrdinaryKriging(points[:, 0], points[:, 1], values, nlags=nlags, verbose=True) 

    krige_interpolator.display_variogram_model()

    result = krige_interpolator.execute('grid', x, y)
    return result[0]

interpolated_values = kriging(xnew, ynew, points, values)