See description in [BestAddressAnomalies.md](BestAddressAnomalies.md)

# Main code

In [314]:
import os
import urllib

import geopandas as gpd
import pandas as pd
import numpy as np

import pickle
import contextily as ctx
import matplotlib.pyplot as plt

from matplotlib.backends.backend_pdf import PdfPages

from tqdm import tqdm, trange
tqdm.pandas()

import re
import jellyfish

import difflib
import requests

In [315]:
import shapely

from shapely.geometry import Point, LineString


import warnings
warnings.filterwarnings("ignore", category=shapely.errors.ShapelyDeprecationWarning) 

## Functions

In [316]:
def download_if_nexist(url, filename):
    """
    If the (local) file <filename> does not exists, download it from <url>

    Parameters
    ----------
    url: str
       url to fetch
    filename: str
       local file to save

    Returns
    -------

    None
    """
    if not os.path.isfile(filename):
        #gcontext = ssl.SSLContext()
        with urllib.request.urlopen(url) as response:
            with open(filename, "wb") as f:
                f.write(response.read())

In [317]:
def set_optimal_limits(ax, df):
    """
    Adapt xlim/ylim to a GeoDataFrame point plot to avoid plot to be too wide when 
    points are horizontally aligned, and to narrow when points are vertically aligned

    Usage : 
    
    ax = df.plot()
    set_optimal_limits(ax, df)
    
    Parameters
    ----------
    ax: AxesSubplot
       plot to resize
    df: GeoDataFrame
       data to be plotted

    Returns
    -------
        None
    """
    
    plot_ratio = 1.5 # optimal ratio between "one horizontal degree" and "one vertical degree". It depends of the CRS. 
                     # For "polar" CRS, it may also depend of the place on the globe

    minimal_width=600
   
    margins = 1.1 # Avoid having dots on edges of the plot
    

    # Compute dimension of the data
    xmin, ymin, xmax, ymax = df.total_bounds
    height = (ymax - ymin) 
    width  = (xmax - xmin)
    
    opt_height = max(height, width / plot_ratio, minimal_width / plot_ratio)
    opt_width  = max(width , height*plot_ratio, minimal_width)
    
#     print(xmin, ymin, xmax, ymax)
#     print(width, height, opt_width, opt_height)
    # If plot is too narrow, increase xmin. If plot is too wide, increase ylim

    if opt_height > height :
        ymid = (ymax+ymin)/2
        mid_height = opt_height * margins / 2
        ax.set_ylim(ymid - mid_height, ymid + mid_height)
    if opt_width > width:
        xmid = (xmax+xmin)/2
        mid_width = opt_width* margins/2
        ax.set_xlim(xmid - mid_width, xmid + mid_width)

In [318]:
def add_basemap(ax, zoom='auto'):
    """
    Add a basemap on a plot. Tries first default (Stamen) basemap. If errors, tries OpenStreetMap.Mapnik
    
    Parameters
    ----------
    ax: matplotlib axes
    
    Returns
    -------
        None
    """

    try: 
#         ctx.add_basemap(ax, zoom=zoom)
#     except requests.HTTPError:
#         print("Default basemap doesn't work...")
        ctx.add_basemap(ax, source=basemap_source, zoom=zoom)
    except requests.HTTPError as e:
        print("HTTP error: ", e)
        ctx.add_basemap(ax, source=ctx.providers.OpenStreetMap.Mapnik, zoom=zoom)
    except ValueError as e:
        print("Value error...")
        print(e)
    except NameError as e:
        print("contextily not installed")
        
    ax.axis(False)

In [319]:
def null_jaro(str1, str2):
    if pd.isnull(str1) or pd.isnull(str2):
        return pd.NA
    
    return jellyfish.jaro_winkler_similarity(str1, str2)

In [320]:

def line_sinuosity(geom):
    # Return the ratio between the distance of the geometry, and the straight distance between the start and the end of the geometry
    assert geom.geom_type == "LineString", geom.geom_type
    length = geom.length
    start_pt = geom.interpolate(0)
    end_pt = geom.interpolate(1, normalized=True)
    straight_dist = start_pt.distance(end_pt)
    if straight_dist == 0.0:
        if length == 0.0:
            return 0.0
        return float("inf")
    return length / straight_dist

In [321]:
def sliding_sinuosity(street_side, windows_size=5):
    if street_side.geometry.nunique() <3:
        return pd.NA
    geometry = street_side.geometry
    local_sinuosity = [line_sinuosity(LineString(geometry.iloc[i:i+windows_size].reset_index(drop=True))) for i in range(0, max(1, geometry.shape[0]-windows_size))]
#     print(local_sinuosity)
    
    return np.mean(local_sinuosity)
                                                                                                          
                                      
    
    

In [322]:
# region[region.streetname == "Abbaye de la Cambre"].geometry.iloc[5:10].reset_index(drop=True)

In [323]:
# region[(region.house_number_num.mod(2)==parity)].groupby(["streetname", "postcode"]).geometry.progress_apply(sliding_sinuosity)


In [324]:
def bloc_sinuosity(street_side):
    if street_side.geometry.nunique() <3:
        return pd.NA
    street_side_linestring = LineString(street_side.reset_index().geometry)
    
    return line_sinuosity(street_side_linestring)

In [325]:
def bloc_length(street_side):
    if street_side.geometry.nunique() <3:
        return pd.NA
    street_side_linestring = LineString(street_side.reset_index().geometry)
    
    return street_side_linestring.length

In [326]:
# bloc_length(region[region.streetname == "Abbaye de la Cambre"])

In [327]:
def get_max_delta_ratio(street_bloc):
    if  street_bloc.house_number_num.max() - street_bloc.house_number_num.min() < 20:
        return 0, "-"
    
    b1 = street_bloc.reset_index()[["house_number_num", "geometry", "index"]].assign(mg=1)
    b2 = street_bloc.reset_index()[["house_number_num", "geometry", "index"]].assign(mg=1)
    dot_prod = b1.merge(b2, on="mg")
    dot_prod = dot_prod[dot_prod.index_x < dot_prod.index_y]
    
    dot_prod["delta_num"] = (dot_prod.house_number_num_x - dot_prod.house_number_num_y).abs()
    
    dot_prod = dot_prod[dot_prod["delta_num"]>10]
    
    dot_prod["distance"] = gpd.GeoSeries(dot_prod.geometry_x).distance(gpd.GeoSeries(dot_prod.geometry_y))
    dot_prod["delta_ratio"] = dot_prod.delta_num/dot_prod.distance
    
    dot_prod = dot_prod[dot_prod.delta_ratio<np.inf]
    
    if dot_prod.shape[0]==0:
        return 0, "-"
    
    id_max = dot_prod.delta_ratio.idxmax()
    
    rec_max = dot_prod.loc[id_max]
    return (rec_max.delta_ratio, f"{rec_max.house_number_num_x} -> {rec_max.house_number_num_y}") 
#     display(dot_prod.sort_values("delta_ratio"))
#     return  .delta_ratio.max()
#     return f"{dot_prod.loc[imax].delta_ratio} ({})


In [328]:
def get_street_bloc(region, streetname, postcode, parity):
    street_bloc=region[(region.streetname==streetname) & (region.postcode==postcode) & (region.house_number_num.mod(2)==parity) ].copy()
    
    street_bloc = street_bloc.sort_values(["house_number_num", "house_number"])

    street_bloc = street_bloc.drop_duplicates(["geometry", "house_number"])
    
    return street_bloc

In [329]:
def plot_street_bloc(street_bloc, title=None, ax=None):
    
    ax = street_bloc.plot(column="house_number_num", figsize=(10,10),alpha=0.7, ax=ax, zorder=10)
    
    ax.plot(street_bloc.geometry.x.values, street_bloc.geometry.y.values, alpha=0.5)#, ax=ax)#, ax=ax, kind="line")
#     ax.title = title
    ax.set_title(title)

    for idx, row in street_bloc.iterrows():
    #     print(row["housenumber"])
        an = ax.annotate(text=row["house_number"], 
                         xy=(row["geometry"].x, row["geometry"].y), 
                         fontsize="x-small",
                         xytext=(5, 5), textcoords='offset pixels',)
        
#     ax = street_bloc.plot(column="house_number_num", alpha=1, ax=ax)
    set_optimal_limits(ax, street_bloc)

    add_basemap(ax)


In [330]:
# def plot_street_bloc(street_bloc, title=None, ax=None):
    
# #     if ax:
# #         ax=ax.plot(street_bloc.geometry.x.values, street_bloc.geometry.y.values, alpha=0.5)
# #     else:
# #         ax=plt.plot(street_bloc.geometry.x.values, street_bloc.geometry.y.values, alpha=0.5)[0]#, ax=ax, kind="line")
# #     print(ax)
# #     ax.title = title
#     ax = street_bloc.plot(street_bloc.geometry.x.values, street_bloc.geometry.y.values, alpha=0.5)

#     for idx, row in street_bloc.iterrows():
#     #     print(row["housenumber"])
#         an = ax.annotate(text=row["house_number"], xy=(row["geometry"].x, row["geometry"].y), fontsize="x-small")
#         an.set_zorder(10)
#     set_optimal_limits(ax, street_bloc)

#     ax = street_bloc.plot(column="house_number_num", alpha=0.5, ax=ax, figsize=(10,10))
#     ax.set_title(title)

#     ctx.add_basemap(ax, source=basemap_source)


In [331]:
# plot_street_bloc(street_bloc, "title")

In [332]:
import plotly.graph_objects as go

def plot_street_bloc_plotly(street_bloc):
    street_bloc_osm = street_bloc.to_crs(osm_crs)


    fig = go.Figure(go.Scattermapbox(
        mode = "markers+lines",
    #     width=950, height=800,
        lat = street_bloc_osm.geometry.y,
        lon = street_bloc_osm.geometry.x,

    #     color= ,
        marker=go.scattermapbox.Marker(
                    #line=dict(width = 1),
                    color = street_bloc_osm['house_number_num'],
                    size = 10
                ),

            hovertext=street_bloc_osm[["house_number", "streetname", "postcode", "postname", "municipality"]]
        ))


    fig.update_layout(
    #    margin ={'l':0,'t':0,'b':0,'r':0},
        mapbox = {
            'center': {'lon': street_bloc_osm.geometry.x.median(), 
                       'lat': street_bloc_osm.geometry.y.median()},
            'style': "open-street-map",

    #         'center': {'lon': -20, 'lat': -20},
             'zoom': 10,

        },
    width=950, height=800,
    )


    fig.show()
    return fig
    


In [333]:
def make_table(street_bloc, title=None):
    street_bloc = street_bloc.reset_index()
    
    street_bloc["dist_to_prev"] = street_bloc.distance(street_bloc.shift(1)).fillna(0).astype(int)
    
    if street_bloc.shape[0]>20:
        df1 = street_bloc[["house_number", "address_id", "geometry", "dist_to_prev"]].head(10)
        df2 = street_bloc[["house_number", "address_id", "geometry", "dist_to_prev"]].tail(10)
        df3=pd.DataFrame(columns=df1.columns, 
                         index=["..."],
                         data = [ ["..."]*df1.shape[1]])

        df = pd.concat([df1, df3, df2])
    else:
        df = street_bloc[["house_number",  "address_id", "geometry", "dist_to_prev"]].copy()
    
    df["address_id"] = df["address_id"].apply(";".join)
    for fld in ["address_id"]:
        df[fld]  = df[fld].str[0:30]

    fig, ax = plt.subplots(figsize=(10,.3*df.shape[0]))
    ax.axis('tight')
    ax.axis('off')

    the_table = ax.table(cellText=df.values,
                            rowLabels=df.index,
                            colLabels=df.columns,
                            rowColours=['lightblue']*len(df),
                            colColours=['lightblue']*len(df.columns),
    #                         cellColours=alternating_colors,
                            loc='center')
    plt.title(title)
    return fig


In [334]:
def make_metric_table(rec, metrics, ax=None):
    metrics_table = pd.DataFrame({m: {
        "metric": f"{rec[m]:.2f}",
        "house_number": rec[f"{m}_house_number"] if f"{m}_house_number" in rec else "",
        "ranking" : "" if pd.isnull(rec[f"{m}_ranking"]) else int(rec[f"{m}_ranking"]) ,
    } for m in metrics})

    if ax is None:
        fig, ax = plt.subplots(figsize=(10,1))
    ax.axis('tight')
    ax.axis('off')

    the_table = ax.table(cellText=metrics_table.values,
                            rowLabels=metrics_table.index,
                            colLabels=metrics_table.columns,
                            rowColours=['lightblue']*len(metrics_table),
                            colColours=['lightblue']*len(metrics_table.columns),
    #                         cellColours=alternating_colors,
                            loc='center')
    #return fig

In [335]:
def plot_building_boxes(boxes, full_region, with_explore=False):
    #rec=region_buildings.iloc[[k]]
    r= boxes.reset_index().iloc[0]
    #title = f"{r.streetname}, {r.house_number}, {r.postcode} {r.municipality} ({int(r.ch_perimeter)} m, {int(r.ch_area/10000)} ha)"
    title = f"{r.streetname}, {r.house_number}, {r.postcode}/{r.municipality_id} {r.municipality} ({int(r.mrr_length)} m x {int(r.mrr_width)} m)"

    if with_explore:
        m=boxes.explore()
        return boxes.set_geometry("min_rot_rect").explore(m=m,  color="red")
    else:
        ax = boxes.plot()
        boxes.set_geometry("min_rot_rect").plot(ax=ax, alpha=0.2, color="red")
        ax.set_title(title)

        set_optimal_limits(ax, boxes)
        add_basemap(ax)

        all_boxes = boxes.reset_index()[boxes.index.names].merge(full_region)
    #     display(all_boxes)
        for idx, row in all_boxes.fillna("/").iterrows():
        #     print(row["housenumber"])
            ax.annotate(text=row["box_number"], xy=(row["geometry"].x, row["geometry"].y))

    

In [336]:
def get_min_rot_rect_size(min_rot_rect):
    if isinstance(min_rot_rect, Point):
        return 0,0
    
    if isinstance(min_rot_rect, LineString):
        return min_rot_rect.length, 0
    x, y = min_rot_rect.exterior.coords.xy

    # get length of bounding box edges
    edge_length = (Point(x[0], y[0]).distance(Point(x[1], y[1])), Point(x[1], y[1]).distance(Point(x[2], y[2])))

    # get length of polygon as the longest edge of the bounding box
    length = max(edge_length)

    # get width of polygon as the shortest edge of the bounding box
    width = min(edge_length)

    return length, width

## Parameters

In [337]:
root_output_dir = "output/best_anomalies"

data_dir = "data/best_anomalies"

topn = 50

In [338]:
import xyzservices


In [339]:
# basemap_source, base = "https://cartoweb.wmts.ngi.be/1.0.0/topo/default/3857/{z}/{y}/{x}.png", "topo"
# basemap_source, base = "https://cartoweb.wmts.ngi.be/1.0.0/topo/default_bw/3857/{z}/{y}/{x}.png", "grey"
basemap_source, base = "https://cartoweb.wmts.ngi.be/1.0.0/overlay/default/3857/{z}/{y}/{x}.png", "overlay"

basemap_source = xyzservices.TileProvider(name=f"IGN tiles (base)", url=basemap_source, attribution="(C) CartoWeb.be")


# basemap_source, base = = ctx.providers.OpenStreetMap.Mapnik, "osm"



In [340]:
# os.makedirs(output_dir, exist_ok=True)
os.makedirs(data_dir, exist_ok=True)

In [341]:
# datadir = "data/geocoding/"

In [342]:
region_name = "bru"
# region_name = "vlg"
# region_name = "wal"

In [343]:
case_name = f"{region_name}_{base}"

In [344]:
municipality_id_prefix = None
# municipality_id_prefix = "25"
# municipality_id_prefix = "25112" # Wavre
# municipality_id_prefix = "93088" # Walcourt

In [345]:
if municipality_id_prefix is not None:
    case_name = f"{case_name}_{municipality_id_prefix}"

In [346]:
output_dir = f"{root_output_dir}/{case_name}"
os.makedirs(output_dir, exist_ok=True)

## Data reading

In [347]:
best_fn = f"{data_dir}/openaddress-be{region_name}.zip"
download_if_nexist(f"https://opendata.bosa.be/download/best/openaddress-be{region_name}.zip", best_fn)


In [348]:
full_region = pd.read_csv(best_fn, usecols=["municipality_name_de", "municipality_name_fr", "municipality_name_nl", "municipality_id",
                                            "streetname_de", "streetname_fr", "streetname_nl", "street_id",
                                            "postname_fr", "postname_nl",
                                            "postcode", "house_number", "box_number", "region_code", 
                                            "EPSG:31370_x", "EPSG:31370_y", 
                                            "EPSG:4326_lat", "EPSG:4326_lon", 
                                            "address_id", "status"], dtype=str)
full_region

Unnamed: 0,EPSG:31370_x,EPSG:31370_y,EPSG:4326_lat,EPSG:4326_lon,address_id,box_number,house_number,municipality_id,municipality_name_de,municipality_name_fr,municipality_name_nl,postcode,postname_fr,postname_nl,street_id,streetname_de,streetname_fr,streetname_nl,region_code,status
0,150627.21000,168757.03000,50.82924,4.37766,178958,,32,21009,,Ixelles,Elsene,1050,Ixelles + Bruxelles (Louise-Roosevelt),Elsene + Brussel (Louiza-Roosevelt),4226,,Rue de la Brasserie,Brouwerijstraat,BE-BRU,retired
1,149788.58000,168015.61000,50.82257,4.36575,761644,3+4e,8,21009,,Ixelles,Elsene,1050,Ixelles + Bruxelles (Louise-Roosevelt),Elsene + Brussel (Louiza-Roosevelt),4166,,Rue Buchholtz,Buchholtzstraat,BE-BRU,retired
2,149788.57900,168015.61300,50.82257,4.36575,25059,,8,21009,,Ixelles,Elsene,1050,Ixelles + Bruxelles (Louise-Roosevelt),Elsene + Brussel (Louiza-Roosevelt),4166,,Rue Buchholtz,Buchholtzstraat,BE-BRU,retired
3,150004.37000,168564.34000,50.82751,4.36881,763805,b1,6,21009,,Ixelles,Elsene,1050,Ixelles + Bruxelles (Louise-Roosevelt),Elsene + Brussel (Louiza-Roosevelt),4965,,Rue Dautzenberg,Dautzenbergstraat,BE-BRU,retired
4,149797.49800,168012.51500,50.82255,4.36588,44247,,12,21009,,Ixelles,Elsene,1050,Ixelles + Bruxelles (Louise-Roosevelt),Elsene + Brussel (Louiza-Roosevelt),4166,,Rue Buchholtz,Buchholtzstraat,BE-BRU,retired
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
840343,148284.46000,170281.96000,50.84294,4.34440,656933,b004,96,21004,,Bruxelles,Brussel,1000,Bruxelles (Centre),Brussel (Centrum),1758,,Boulevard Maurice Lemonnier,Maurice Lemonnierlaan,BE-BRU,current
840344,150442.76400,172669.68800,50.86441,4.37504,1432364,ETES,21,21015,,Schaerbeek,Schaarbeek,1030,"Schaerbeek + Bruxelles (Pont Van Praet, Teichm...","Schaarbeek + Brussel (Van Praet, Teichmann Brug)",647,,Rue Jenatzy,Jenatzystraat,BE-BRU,current
840345,150479.79900,172675.90000,50.86447,4.37557,1432352,ETSS,6,21015,,Schaerbeek,Schaarbeek,1030,"Schaerbeek + Bruxelles (Pont Van Praet, Teichm...","Schaarbeek + Brussel (Van Praet, Teichmann Brug)",647,,Rue Jenatzy,Jenatzystraat,BE-BRU,current
840346,150201.86900,173377.22900,50.87077,4.37162,1432344,b003,9,21015,,Schaerbeek,Schaarbeek,1030,"Schaerbeek + Bruxelles (Pont Van Praet, Teichm...","Schaarbeek + Brussel (Van Praet, Teichmann Brug)",2169,,Rue James Watt,James Wattstraat,BE-BRU,current


In [349]:
# full_region[full_region.streetname_fr=="Rue François Michoel"].iloc[0:60]

In [350]:
if municipality_id_prefix is not None:
    full_region = full_region[full_region.municipality_id.str.startswith(municipality_id_prefix)].copy()

In [351]:
# region[(region.streetname_fr=="Rue de Wérister") & (region.house_number=="7")]

In [352]:
print("Without coordinates : ")
full_region[full_region["EPSG:31370_x"]== "0.00000"].sort_values(["postcode", "streetname_fr"])#.iloc[0:60]

Without coordinates : 


Unnamed: 0,EPSG:31370_x,EPSG:31370_y,EPSG:4326_lat,EPSG:4326_lon,address_id,box_number,house_number,municipality_id,municipality_name_de,municipality_name_fr,municipality_name_nl,postcode,postname_fr,postname_nl,street_id,streetname_de,streetname_fr,streetname_nl,region_code,status


In [353]:
full_region = full_region[full_region["EPSG:31370_x"]!= "0.00000"].copy()

In [354]:
full_region.status.value_counts()#/ region.shape[0]

status
current    789861
retired     50487
Name: count, dtype: int64

In [355]:
full_region = full_region[full_region.status=="current"]

In [356]:
full_region["streetname"] =   full_region.streetname_fr.fillna(full_region.streetname_nl).fillna(full_region.streetname_de)
full_region["municipality"] = full_region.municipality_name_fr.fillna(full_region.municipality_name_nl).fillna(full_region.municipality_name_de)
full_region["postname"] =     full_region.postname_fr.fillna(full_region.postname_nl).fillna("[na]")

full_region["house_number_num"]= full_region.house_number.str.extract("^([0-9]*)").astype(int, errors="ignore")


In [357]:
print("No numerical house number:")
print(full_region[full_region.house_number_num== ""].shape[0])
full_region[full_region.house_number_num== ""]

No numerical house number:
0


Unnamed: 0,EPSG:31370_x,EPSG:31370_y,EPSG:4326_lat,EPSG:4326_lon,address_id,box_number,house_number,municipality_id,municipality_name_de,municipality_name_fr,...,street_id,streetname_de,streetname_fr,streetname_nl,region_code,status,streetname,municipality,postname,house_number_num


In [358]:
full_region = full_region[full_region.house_number_num!= ""]
full_region.house_number_num = full_region.house_number_num.astype(int)

In [359]:
# region

In [360]:
crs = "epsg:3857"
full_region["geometry"] = gpd.points_from_xy(full_region["EPSG:31370_x"], full_region["EPSG:31370_y"])
full_region = gpd.GeoDataFrame(full_region)
full_region = full_region.set_crs("epsg:31370").to_crs(crs)
full_region = full_region.drop(["EPSG:31370_x", "EPSG:31370_y"], axis=1)

In [361]:
#Workaround as geometries are not "groupbyable"(/hashable)
full_region_wkb = full_region.assign(geometry_wkb= full_region.geometry.apply(lambda geom: geom.wkb))
                                     
region = full_region_wkb.fillna("[na]").groupby(["streetname", "house_number", "house_number_num", "postcode", "postname", "municipality", "municipality_id", "geometry_wkb"]).address_id.progress_apply(list).reset_index()
region = region.merge(full_region_wkb[["geometry_wkb", "geometry"]].drop_duplicates()).drop("geometry_wkb", axis=1)

region= gpd.GeoDataFrame(region)
del full_region_wkb

100%|██████████| 307774/307774 [00:07<00:00, 43432.86it/s]


In [362]:
region = region.sort_values(["postcode", "streetname", "house_number_num"])

In [363]:
region

Unnamed: 0,streetname,house_number,house_number_num,postcode,postname,municipality,municipality_id,address_id,geometry
55,Abbaye de la Cambre,13,13,1000,Bruxelles (Centre),Bruxelles,21004,[194549],POINT (486956.474 6589242.508)
56,Abbaye de la Cambre,15,15,1000,Bruxelles (Centre),Bruxelles,21004,[194550],POINT (486891.058 6589269.902)
57,Abbaye de la Cambre,16,16,1000,Bruxelles (Centre),Bruxelles,21004,[1754],POINT (486867.212 6589332.361)
58,Abbaye de la Cambre,17,17,1000,Bruxelles (Centre),Bruxelles,21004,[22582],POINT (486870.703 6589353.125)
59,Abbaye de la Cambre,18,18,1000,Bruxelles (Centre),Bruxelles,21004,[29274],POINT (486882.079 6589369.170)
...,...,...,...,...,...,...,...,...,...
305394,Square Victoria Régina,9,9,1210,Saint-Josse-ten-Noode,Saint-Josse-ten-Noode,21014,[987485],POINT (485673.791 6595884.966)
305389,Square Victoria Régina,10,10,1210,Saint-Josse-ten-Noode,Saint-Josse-ten-Noode,21014,"[150486, 1496184, 1496188, 1496182, 1496180, 1...",POINT (485660.330 6595889.949)
305390,Square Victoria Régina,10,10,1210,Saint-Josse-ten-Noode,Saint-Josse-ten-Noode,21014,"[865671, 865203, 865202, 865670, 865201]",POINT (485660.337 6595889.943)
305391,Square Victoria Régina,11,11,1210,Saint-Josse-ten-Noode,Saint-Josse-ten-Noode,21014,"[150485, 1473394]",POINT (485652.756 6595893.553)


# Box anomalies

In [364]:
# Several names for the same street id
x = full_region[["street_id", "streetname"]].drop_duplicates()
x[x.street_id.duplicated()]

Unnamed: 0,street_id,streetname


In [365]:
# Street_ids in multiple municipalities
x = full_region[["street_id", "municipality_id"]].drop_duplicates()
x[x.street_id.duplicated(keep=False)].sort_values("street_id").merge(full_region[["street_id", "municipality_id", "streetname", "municipality"]].drop_duplicates())

Unnamed: 0,street_id,municipality_id,streetname,municipality
0,13,21016,Chaussée de Waterloo,Uccle
1,13,21004,Chaussée de Waterloo,Bruxelles
2,2405,21004,Avenue du Derby,Bruxelles
3,2405,21009,Avenue du Derby,Ixelles
4,2727,21001,Avenue des Ménestrels,Anderlecht
5,2727,21012,Avenue des Ménestrels,Molenbeek-Saint-Jean
6,2747,21009,Rue Africaine,Ixelles
7,2747,21013,Rue Africaine,Saint-Gilles
8,3244,21001,Rue de la Pastorale,Anderlecht
9,3244,21012,Rue de la Pastorale,Molenbeek-Saint-Jean


In [366]:
# Several ids for the same street
x = full_region[["street_id", "municipality_id", "streetname", "postcode"]].drop_duplicates()
x[x.duplicated(subset=["streetname", "municipality_id", "postcode"], keep=False)].sort_values("streetname")

Unnamed: 0,street_id,municipality_id,streetname,postcode
2447,862,21012,Avenue des Ménestrels,1080
29801,2727,21012,Avenue des Ménestrels,1080
188,951,21003,Chaussée de Gand,1082
474970,595,21003,Chaussée de Gand,1082
5620,2168,21004,Chaussée de Waterloo,1050
244937,13,21004,Chaussée de Waterloo,1050
20865,6305,21007,Rue Jean d'Osta,1190
596000,6306,21007,Rue Jean d'Osta,1190
56906,7826,21013,Rue de la Bonté,1060
186104,4838,21013,Rue de la Bonté,1060


In [367]:
# Several streets with the same name (and difference street ids) with the same municipality
x = full_region[["street_id", "municipality_id", "streetname"]].drop_duplicates()
x[x.duplicated(subset=["streetname", "municipality_id"], keep=False)].sort_values("streetname")

Unnamed: 0,street_id,municipality_id,streetname
2447,862,21012,Avenue des Ménestrels
29801,2727,21012,Avenue des Ménestrels
188,951,21003,Chaussée de Gand
474970,595,21003,Chaussée de Gand
5620,2168,21004,Chaussée de Waterloo
244937,13,21004,Chaussée de Waterloo
1015,4161,21013,Rue Africaine
406483,2747,21013,Rue Africaine
20865,6305,21007,Rue Jean d'Osta
596000,6306,21007,Rue Jean d'Osta


In [368]:
# full_region.

In [369]:
# Several postcodes for the same building
x = full_region[["street_id", "municipality_id", "house_number", "postcode"]].drop_duplicates()
x[x.duplicated(subset = ["street_id", "municipality_id", "house_number"], keep=False)].sort_values("street_id").merge(full_region[["street_id", "municipality_id","postcode", "streetname", "municipality", "house_number", "box_number", "address_id", "geometry"]].drop_duplicates())

Unnamed: 0,street_id,municipality_id,house_number,postcode,streetname,municipality,box_number,address_id,geometry
0,2388,21004,176,1050,Chaussée de Vleurgat,Bruxelles,2.0,1014693,POINT (486133.350 6589904.866)
1,2388,21004,176,1050,Chaussée de Vleurgat,Bruxelles,3.0,1014694,POINT (486133.350 6589904.866)
2,2388,21004,176,1050,Chaussée de Vleurgat,Bruxelles,4.0,1014695,POINT (486133.350 6589904.866)
3,2388,21004,176,1050,Chaussée de Vleurgat,Bruxelles,1.0,1014692,POINT (486133.350 6589904.866)
4,2388,21004,176,1050,Chaussée de Vleurgat,Bruxelles,5.0,1014696,POINT (486133.350 6589904.866)
5,2388,21004,176,1050,Chaussée de Vleurgat,Bruxelles,6.0,1014697,POINT (486133.350 6589904.866)
6,2388,21004,176,1050,Chaussée de Vleurgat,Bruxelles,7.0,1014698,POINT (486133.350 6589904.866)
7,2388,21004,176,1050,Chaussée de Vleurgat,Bruxelles,8.0,1014699,POINT (486133.350 6589904.866)
8,2388,21004,176,1000,Chaussée de Vleurgat,Bruxelles,,45466,POINT (486133.350 6589904.866)
9,623,21004,25,1000,Rue Vilain XIIII,Bruxelles,,181882,POINT (486501.354 6590145.190)


In [370]:
region_with_boxes = full_region[full_region.duplicated(subset=["municipality_id",  "street_id", "house_number"], keep=False)]

In [371]:
region_with_boxes

Unnamed: 0,EPSG:4326_lat,EPSG:4326_lon,address_id,box_number,house_number,municipality_id,municipality_name_de,municipality_name_fr,municipality_name_nl,postcode,...,streetname_de,streetname_fr,streetname_nl,region_code,status,streetname,municipality,postname,house_number_num,geometry
125,50.84140,4.35618,37329,,14,21004,,Bruxelles,Brussel,1000,...,,Impasse Saint-Jacques,Sint-Jakobsgang,BE-BRU,current,Impasse Saint-Jacques,Bruxelles,Bruxelles (Centre),14,POINT (484928.138 6593285.989)
135,50.84140,4.35618,656272,b002,14,21004,,Bruxelles,Brussel,1000,...,,Impasse Saint-Jacques,Sint-Jakobsgang,BE-BRU,current,Impasse Saint-Jacques,Bruxelles,Bruxelles (Centre),14,POINT (484928.142 6593285.990)
149,50.84140,4.35618,656271,b001,14,21004,,Bruxelles,Brussel,1000,...,,Impasse Saint-Jacques,Sint-Jakobsgang,BE-BRU,current,Impasse Saint-Jacques,Bruxelles,Bruxelles (Centre),14,POINT (484928.142 6593285.990)
314,50.86223,4.29492,21492,,16,21003,,Berchem-Sainte-Agathe,Sint-Agatha-Berchem,1082,...,,Rue Docteur Charles Leemans,Dokter Charles Leemansstraat,BE-BRU,current,Rue Docteur Charles Leemans,Berchem-Sainte-Agathe,Berchem-Sainte-Agathe,16,POINT (478108.688 6596960.408)
316,50.87246,4.38041,119157,,124,21015,,Schaerbeek,Schaarbeek,1030,...,,Boulevard Lambermont,Lambermontlaan,BE-BRU,current,Boulevard Lambermont,Schaerbeek,"Schaerbeek + Bruxelles (Pont Van Praet, Teichm...",124,POINT (487624.770 6598764.628)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
840343,50.84294,4.34440,656933,b004,96,21004,,Bruxelles,Brussel,1000,...,,Boulevard Maurice Lemonnier,Maurice Lemonnierlaan,BE-BRU,current,Boulevard Maurice Lemonnier,Bruxelles,Bruxelles (Centre),96,POINT (483615.794 6593559.141)
840344,50.86441,4.37504,1432364,ETES,21,21015,,Schaerbeek,Schaarbeek,1030,...,,Rue Jenatzy,Jenatzystraat,BE-BRU,current,Rue Jenatzy,Schaerbeek,"Schaerbeek + Bruxelles (Pont Van Praet, Teichm...",21,POINT (487027.427 6597344.418)
840345,50.86447,4.37557,1432352,ETSS,6,21015,,Schaerbeek,Schaarbeek,1030,...,,Rue Jenatzy,Jenatzystraat,BE-BRU,current,Rue Jenatzy,Schaerbeek,"Schaerbeek + Bruxelles (Pont Van Praet, Teichm...",6,POINT (487085.990 6597354.261)
840346,50.87077,4.37162,1432344,b003,9,21015,,Schaerbeek,Schaarbeek,1030,...,,Rue James Watt,James Wattstraat,BE-BRU,current,Rue James Watt,Schaerbeek,"Schaerbeek + Bruxelles (Pont Van Praet, Teichm...",9,POINT (486646.564 6598466.329)


In [372]:
region_buildings = region_with_boxes[["street_id", "streetname", "municipality_id","municipality", "postcode", "house_number", "geometry"]].dissolve(["street_id", "municipality_id", "postcode", "house_number"])
region_buildings

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,geometry,streetname,municipality
street_id,municipality_id,postcode,house_number,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,21004,1000,6,POINT (485036.445 6595049.642),Rue du Persil,Bruxelles
10,21013,1060,1,"MULTIPOINT ((484566.416 6591457.189), (484566....",Rue de Lausanne,Saint-Gilles
10,21013,1060,10,"MULTIPOINT ((484586.760 6591382.788), (484586....",Rue de Lausanne,Saint-Gilles
10,21013,1060,12,"MULTIPOINT ((484583.695 6591371.459), (484583....",Rue de Lausanne,Saint-Gilles
10,21013,1060,14,POINT (484581.011 6591361.523),Rue de Lausanne,Saint-Gilles
...,...,...,...,...,...,...
999,21012,1080,51,"MULTIPOINT ((480879.228 6595964.622), (480879....",Rue Jean-Baptiste Janssen,Molenbeek-Saint-Jean
999,21012,1080,53,"MULTIPOINT ((480871.239 6595969.119), (480871....",Rue Jean-Baptiste Janssen,Molenbeek-Saint-Jean
999,21012,1080,63,"MULTIPOINT ((480826.349 6595994.986), (480826....",Rue Jean-Baptiste Janssen,Molenbeek-Saint-Jean
999,21012,1080,67,"MULTIPOINT ((480809.572 6596006.313), (480809....",Rue Jean-Baptiste Janssen,Molenbeek-Saint-Jean


In [373]:
region_buildings["convex_hull"] = region_buildings.convex_hull
region_buildings

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,geometry,streetname,municipality,convex_hull
street_id,municipality_id,postcode,house_number,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,21004,1000,6,POINT (485036.445 6595049.642),Rue du Persil,Bruxelles,POINT (485036.445 6595049.642)
10,21013,1060,1,"MULTIPOINT ((484566.416 6591457.189), (484566....",Rue de Lausanne,Saint-Gilles,"LINESTRING (484566.416 6591457.189, 484566.424..."
10,21013,1060,10,"MULTIPOINT ((484586.760 6591382.788), (484586....",Rue de Lausanne,Saint-Gilles,"LINESTRING (484586.760 6591382.788, 484586.760..."
10,21013,1060,12,"MULTIPOINT ((484583.695 6591371.459), (484583....",Rue de Lausanne,Saint-Gilles,"LINESTRING (484583.695 6591371.459, 484583.697..."
10,21013,1060,14,POINT (484581.011 6591361.523),Rue de Lausanne,Saint-Gilles,POINT (484581.011 6591361.523)
...,...,...,...,...,...,...,...
999,21012,1080,51,"MULTIPOINT ((480879.228 6595964.622), (480879....",Rue Jean-Baptiste Janssen,Molenbeek-Saint-Jean,"LINESTRING (480879.228 6595964.622, 480879.229..."
999,21012,1080,53,"MULTIPOINT ((480871.239 6595969.119), (480871....",Rue Jean-Baptiste Janssen,Molenbeek-Saint-Jean,"LINESTRING (480871.239 6595969.119, 480871.241..."
999,21012,1080,63,"MULTIPOINT ((480826.349 6595994.986), (480826....",Rue Jean-Baptiste Janssen,Molenbeek-Saint-Jean,"LINESTRING (480826.349 6595994.986, 480826.355..."
999,21012,1080,67,"MULTIPOINT ((480809.572 6596006.313), (480809....",Rue Jean-Baptiste Janssen,Molenbeek-Saint-Jean,"LINESTRING (480809.572 6596006.313, 480809.573..."


In [374]:
region_buildings["min_rot_rect"] = region_buildings.convex_hull.apply(lambda g: g.minimum_rotated_rectangle)
region_buildings

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,geometry,streetname,municipality,convex_hull,min_rot_rect
street_id,municipality_id,postcode,house_number,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,21004,1000,6,POINT (485036.445 6595049.642),Rue du Persil,Bruxelles,POINT (485036.445 6595049.642),POINT (485036.445 6595049.642)
10,21013,1060,1,"MULTIPOINT ((484566.416 6591457.189), (484566....",Rue de Lausanne,Saint-Gilles,"LINESTRING (484566.416 6591457.189, 484566.424...","LINESTRING (484566.416 6591457.189, 484566.424..."
10,21013,1060,10,"MULTIPOINT ((484586.760 6591382.788), (484586....",Rue de Lausanne,Saint-Gilles,"LINESTRING (484586.760 6591382.788, 484586.760...","LINESTRING (484586.760 6591382.788, 484586.760..."
10,21013,1060,12,"MULTIPOINT ((484583.695 6591371.459), (484583....",Rue de Lausanne,Saint-Gilles,"LINESTRING (484583.695 6591371.459, 484583.697...","LINESTRING (484583.695 6591371.459, 484583.697..."
10,21013,1060,14,POINT (484581.011 6591361.523),Rue de Lausanne,Saint-Gilles,POINT (484581.011 6591361.523),POINT (484581.011 6591361.523)
...,...,...,...,...,...,...,...,...
999,21012,1080,51,"MULTIPOINT ((480879.228 6595964.622), (480879....",Rue Jean-Baptiste Janssen,Molenbeek-Saint-Jean,"LINESTRING (480879.228 6595964.622, 480879.229...","LINESTRING (480879.228 6595964.622, 480879.229..."
999,21012,1080,53,"MULTIPOINT ((480871.239 6595969.119), (480871....",Rue Jean-Baptiste Janssen,Molenbeek-Saint-Jean,"LINESTRING (480871.239 6595969.119, 480871.241...","LINESTRING (480871.239 6595969.119, 480871.241..."
999,21012,1080,63,"MULTIPOINT ((480826.349 6595994.986), (480826....",Rue Jean-Baptiste Janssen,Molenbeek-Saint-Jean,"LINESTRING (480826.349 6595994.986, 480826.355...","LINESTRING (480826.349 6595994.986, 480826.355..."
999,21012,1080,67,"MULTIPOINT ((480809.572 6596006.313), (480809....",Rue Jean-Baptiste Janssen,Molenbeek-Saint-Jean,"LINESTRING (480809.572 6596006.313, 480809.573...","LINESTRING (480809.572 6596006.313, 480809.573..."


In [375]:
mrr_size = region_buildings.min_rot_rect.progress_apply(get_min_rot_rect_size)
region_buildings[["mrr_length", "mrr_width"]]=pd.concat([mrr_size.rename("mrr_length").str[0], mrr_size.rename("mrr_width").str[1]], axis=1)

100%|██████████| 102894/102894 [00:00<00:00, 130483.23it/s]


In [376]:
# region_buildings["ch_perimeter"] = region_buildings.convex_hull.length
# region_buildings["ch_area"] = region_buildings.convex_hull.area

In [377]:
region_buildings = region_buildings.sort_values("mrr_length", ascending=False)
region_buildings

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,geometry,streetname,municipality,convex_hull,min_rot_rect,mrr_length,mrr_width
street_id,municipality_id,postcode,house_number,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
5683,21007,1190,339,"MULTIPOINT ((481030.599 6589529.967), (483196....",Rue Saint-Denis,Forest,"LINESTRING (481030.599 6589529.967, 483196.379...","LINESTRING (481030.599 6589529.967, 483196.379...",2175.233505,0.000000
642,21016,1180,54,"MULTIPOINT ((483054.882 6585007.232), (483059....",Dieweg,Uccle,"POLYGON ((483093.667 6584929.926, 483089.377 6...","POLYGON ((483139.854 6584971.065, 483093.667 6...",96.457548,61.851877
3551,21016,1180,269,"MULTIPOINT ((483478.703 6584476.834), (483498....",Chaussée de Saint-Job,Uccle,"LINESTRING (483478.703 6584476.834, 483498.927...","LINESTRING (483478.703 6584476.834, 483498.927...",44.315405,0.000000
4165,21012,1080,25,"MULTIPOINT ((482889.682 6595152.450), (482912....",Rue Fernand Brunfaut,Molenbeek-Saint-Jean,"LINESTRING (482889.682 6595152.450, 482912.799...","LINESTRING (482889.682 6595152.450, 482912.799...",36.007553,0.000000
301,21018,1200,14,"MULTIPOINT ((491246.338 6593812.101), (491276....",Rue du Bois de Linthout,Woluwe-Saint-Lambert,"LINESTRING (491246.338 6593812.101, 491276.689...","LINESTRING (491246.338 6593812.101, 491276.689...",32.632558,0.000000
...,...,...,...,...,...,...,...,...,...,...
3320,21009,1050,65,POINT (485268.050 6589057.510),Rue Jean-Baptiste Colyns,Ixelles,POINT (485268.050 6589057.510),POINT (485268.050 6589057.510),0.000000,0.000000
5303,21015,1030,77,POINT (487026.466 6596683.687),Rue de Robiano,Schaerbeek,POINT (487026.466 6596683.687),POINT (487026.466 6596683.687),0.000000,0.000000
3320,21009,1050,86,POINT (485224.636 6588984.850),Rue Jean-Baptiste Colyns,Ixelles,POINT (485224.636 6588984.850),POINT (485224.636 6588984.850),0.000000,0.000000
3320,21009,1050,86B,POINT (485219.974 6588987.555),Rue Jean-Baptiste Colyns,Ixelles,POINT (485219.974 6588987.555),POINT (485219.974 6588987.555),0.000000,0.000000


In [378]:
# region_buildings.iloc[25:35]

In [379]:
k=0
plot_building_boxes(region_buildings.iloc[[k]], full_region, with_explore=True)

In [380]:
pdf = PdfPages(f"{output_dir}/best_anomalies_{case_name}_box_anomalies.pdf")

region_buildings_sel = region_buildings[(region_buildings.mrr_length > 100) & (region_buildings.mrr_width < 100)]
for k in trange(0, min(2*topn, region_buildings_sel.shape[0])):
    boxes= region_buildings_sel.iloc[[k]]
    
    plot_building_boxes(boxes, full_region)
        
    pdf.savefig(bbox_inches='tight')#, dpi=75)
    plt.close()
    
pdf.close()

100%|██████████| 1/1 [00:02<00:00,  2.22s/it]


In [381]:
# region_buildings_sel


In [382]:
del full_region

# Conflict with bPost

In [383]:
zipcode_boundaries_filename = "data/zipcode_boundaries_shapefile_3812.zip"
download_if_nexist("https://bgu.bpost.be/assets/9738c7c0-5255-11ea-8895-34e12d0f0423_x-shapefile_3812.zip",
                  zipcode_boundaries_filename)

In [384]:
zipcodes_boundaries = gpd.read_file(f"zip://{zipcode_boundaries_filename}/3812")
zipcodes_boundaries["is_special"] = zipcodes_boundaries.CP_speciau ==1
zipcodes_boundaries = zipcodes_boundaries.rename({"nouveau_PO":"zipcode"}, axis=1)[["zipcode", "is_special", "geometry"]]

zipcodes_boundaries = zipcodes_boundaries.dissolve(["zipcode", "is_special"]).reset_index()

zipcodes_boundaries = zipcodes_boundaries.to_crs(crs)
zipcodes_boundaries

Unnamed: 0,zipcode,is_special,geometry
0,1000,False,MULTIPOLYGON Z (((487310.539 6588310.640 0.000...
1,1005,True,"POLYGON Z ((484391.808 6593846.983 0.000, 4844..."
2,1006,True,"POLYGON Z ((484413.037 6593943.815 0.000, 4843..."
3,1007,True,"POLYGON Z ((486232.320 6594255.851 0.000, 4862..."
4,1008,True,"POLYGON Z ((486000.332 6594258.665 0.000, 4858..."
...,...,...,...
1182,9982,False,"POLYGON Z ((398126.149 6670079.581 0.000, 3981..."
1183,9988,False,"POLYGON Z ((402165.046 6674165.098 0.000, 4021..."
1184,9990,False,"POLYGON Z ((383123.261 6664423.574 0.000, 3831..."
1185,9991,False,"POLYGON Z ((389466.378 6660430.021 0.000, 3895..."


In [385]:
zipcodes_boundaries["buffer"] = zipcodes_boundaries.buffer(-50)

In [386]:
region_zipcode = gpd.sjoin(region, zipcodes_boundaries.set_geometry("buffer"))

In [387]:
region_zipcode.shape, region.shape

((285843, 13), (307774, 9))

In [388]:
zip_mismatches = region_zipcode[region_zipcode.postcode !=region_zipcode.zipcode]
zip_mismatches

Unnamed: 0,streetname,house_number,house_number_num,postcode,postname,municipality,municipality_id,address_id,geometry_left,index_right,zipcode,is_special,geometry_right
48822,Avenue de Diane,3,3,1000,Bruxelles (Centre),Bruxelles,21004,[38736],POINT (486522.561 6588328.311),21,1050,False,MULTIPOLYGON Z (((486748.105 6592635.180 0.000...
53108,Avenue de Vilvorde,290,290,1000,Bruxelles (Centre),Bruxelles,21004,[89452],POINT (489705.208 6602023.106),33,1130,False,"POLYGON Z ((492165.130 6604187.117 0.000, 4921..."
53109,Avenue de Vilvorde,292,292,1000,Bruxelles (Centre),Bruxelles,21004,[89455],POINT (489710.302 6602030.197),33,1130,False,"POLYGON Z ((492165.130 6604187.117 0.000, 4921..."
53110,Avenue de Vilvorde,294,294,1000,Bruxelles (Centre),Bruxelles,21004,[91826],POINT (489715.395 6602037.286),33,1130,False,"POLYGON Z ((492165.130 6604187.117 0.000, 4921..."
53111,Avenue de Vilvorde,296,296,1000,Bruxelles (Centre),Bruxelles,21004,[8523],POINT (489876.769 6602263.360),33,1130,False,"POLYGON Z ((492165.130 6604187.117 0.000, 4921..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
105298,Chaussée de Vilvorde,1,1,1120,Bruxelles (Neder-Over-Hembeek),Bruxelles,21004,"[975990, 1557276]",POINT (486993.218 6600446.427),8,1020,False,"POLYGON Z ((484519.836 6603848.988 0.000, 4845..."
105342,Chaussée de Vilvorde,3,3,1120,Bruxelles (Neder-Over-Hembeek),Bruxelles,21004,[245554],POINT (487172.057 6600653.730),8,1020,False,"POLYGON Z ((484519.836 6603848.988 0.000, 4845..."
105339,Chaussée de Vilvorde,233,233,1120,Bruxelles (Neder-Over-Hembeek),Bruxelles,21004,[239301],POINT (490622.273 6605058.458),29,1099,True,"POLYGON Z ((491313.400 6605668.540 0.000, 4912..."
258471,Rue de la Fusée,70,70,1130,Bruxelles (Haren),Bruxelles,21004,[72969],POINT (492888.199 6599893.089),31,1110,True,"POLYGON Z ((492944.662 6600452.019 0.000, 4931..."


In [389]:
zip_mismatches.drop(["geometry_left", "geometry_right", "index_right"], axis=1).to_excel(f"{output_dir}/best_anomalies_{case_name}_zip_mismatches.xlsx")

In [390]:
# mismatches.set_geometry("geometry_left").plot()

In [391]:
# mismatches.postcode.value_counts().iloc[0:60]

In [392]:
zip_mismatches.postcode.value_counts()

postcode
1050    27
1120    23
1000     8
1030     4
1020     2
1006     1
1007     1
1010     1
1011     1
1045     1
1047     1
1070     1
1090     1
1105     1
1130     1
1170     1
Name: count, dtype: int64

In [393]:
# zipcode="1010"
# # display(mismatches[mismatches.postcode==zipcode])
# mism=zip_mismatches[zip_mismatches.postcode==zipcode].set_geometry("geometry_left")
# ax=mism.plot("zipcode", figsize=(15,15),alpha=0.8, legend=True)
# # ax=region[region.postcode==zipcode].plot(figsize=(15,15),alpha=0.8, color="green")
# # ax=mismatches[mismatches.postcode==zipcode].set_geometry("geometry_left").plot(ax=ax, color="red")

# ax=zipcodes_boundaries[zipcodes_boundaries.zipcode==zipcode].boundary.plot(ax=ax, color="red")
# set_optimal_limits(ax, zipcodes_boundaries[zipcodes_boundaries.zipcode==zipcode])
# ctx.add_basemap(ax, source=ctx.providers.OpenStreetMap.Mapnik)
# ax=mismatches[mismatches.zipcode=="1301"].set_geometry("geometry_right").plot()


In [394]:
# zipcodes_boundaries.crs

In [395]:
pdf = PdfPages(f"{output_dir}/best_anomalies_{case_name}_zip_mismatches.pdf")

for zipcode in tqdm(zip_mismatches.postcode.value_counts().index):
    
    mism=zip_mismatches[zip_mismatches.postcode==zipcode].set_geometry("geometry_left")
    nis = ";".join(mism.municipality_id.unique()) 
    ax=mism.plot("zipcode", figsize=(10,10),alpha=0.8, legend=True)
    plt.title(f"{zipcode} ({nis})")
    # ax=region[region.postcode==zipcode].plot(figsize=(15,15),alpha=0.8, color="green")
    # ax=mismatches[mismatches.postcode==zipcode].set_geometry("geometry_left").plot(ax=ax, color="red")
    zip_bnd = zipcodes_boundaries[zipcodes_boundaries.zipcode==zipcode]
    if zip_bnd.shape[0]>0:
        ax=zip_bnd.boundary.plot(ax=ax, color="red")
    set_optimal_limits(ax, pd.concat([zip_bnd[["geometry"]], mism[["geometry_left"]].rename(columns={"geometry_left": "geometry"})]))
    add_basemap(ax)

    pdf.savefig(bbox_inches='tight')#, dpi=75)
    plt.close()
    
pdf.close()

  0%|          | 0/16 [00:00<?, ?it/s]

HTTP error:  Tile URL resulted in a 404 error. Double-check your tile url:
https://cartoweb.wmts.ngi.be/1.0.0/overlay/default/3857/13/2748/4194.png


 12%|█▎        | 2/16 [00:02<00:15,  1.14s/it]

HTTP error:  Tile URL resulted in a 404 error. Double-check your tile url:
https://cartoweb.wmts.ngi.be/1.0.0/overlay/default/3857/12/1373/2096.png


 19%|█▉        | 3/16 [00:03<00:14,  1.13s/it]

HTTP error:  Tile URL resulted in a 404 error. Double-check your tile url:
https://cartoweb.wmts.ngi.be/1.0.0/overlay/default/3857/13/2746/4194.png


 25%|██▌       | 4/16 [00:04<00:11,  1.02it/s]

HTTP error:  Tile URL resulted in a 404 error. Double-check your tile url:
https://cartoweb.wmts.ngi.be/1.0.0/overlay/default/3857/13/2745/4193.png


 69%|██████▉   | 11/16 [00:10<00:03,  1.26it/s]

HTTP error:  Tile URL resulted in a 404 error. Double-check your tile url:
https://cartoweb.wmts.ngi.be/1.0.0/overlay/default/3857/13/2747/4192.png


 88%|████████▊ | 14/16 [00:12<00:01,  1.30it/s]

HTTP error:  Tile URL resulted in a 404 error. Double-check your tile url:
https://cartoweb.wmts.ngi.be/1.0.0/overlay/default/3857/13/2745/4195.png


 94%|█████████▍| 15/16 [00:13<00:00,  1.39it/s]

HTTP error:  Tile URL resulted in a 404 error. Double-check your tile url:
https://cartoweb.wmts.ngi.be/1.0.0/overlay/default/3857/13/2749/4195.png


100%|██████████| 16/16 [00:13<00:00,  1.16it/s]


# Conflict with NIS code

In [396]:
download_if_nexist("https://statbel.fgov.be/sites/default/files/files/opendata/Statistische%20sectoren/sh_statbel_statistical_sectors_31370_20200101.shp.zip",
                   "data/stat_sectors_2020.zip")
statistical_sectors = gpd.read_file("zip://data/stat_sectors_2020.zip/sh_statbel_statistical_sectors_20200101.shp")
statistical_sectors["CNIS5_2020"] = statistical_sectors["CNIS5_2020"].astype(str)

In [397]:
# Group (with "dissolve") sectors per NIS code
nis_boundaries = statistical_sectors[["CNIS5_2020", "T_MUN_FR", "T_MUN_NL", "geometry"]].dissolve(by="CNIS5_2020").reset_index()
nis_boundaries = nis_boundaries.rename({"CNIS5_2020": "niscode"}, axis=1)
nis_boundaries = nis_boundaries.to_crs(crs)

In [398]:
nis_boundaries["buffer"] = nis_boundaries.buffer(-50)

In [399]:
region_niscode = gpd.sjoin(region, nis_boundaries.set_geometry("buffer"))

In [400]:
# region_niscode[region_niscode.streetname=="Place Albert Ier (MT)"][["municipality_id", "niscode"]].drop_duplicates()

In [401]:
nis_mismatches = region_niscode[region_niscode.municipality_id !=region_niscode.niscode]
nis_mismatches

Unnamed: 0,streetname,house_number,house_number_num,postcode,postname,municipality,municipality_id,address_id,geometry_left,index_right,niscode,geometry_right,T_MUN_FR,T_MUN_NL
83909,Boulevard Industriel,222,222,1070,Anderlecht + Bruxelles (Rue de la Rosée),Anderlecht,21001,[234365],POINT (479869.818 6588803.407),75,21007,"POLYGON Z ((481126.511 6586184.205 0.000, 4811...",Forest,Vorst
283567,Rue du Bois,2,2,1090,Jette,Jette,21010,[14359],POINT (479008.082 6600274.281),76,21008,"POLYGON Z ((480317.425 6597837.031 0.000, 4803...",Ganshoren,Ganshoren
306281,Tenreuken,4,4,1170,Watermael-Boitsfort,Watermael-Boitsfort,21017,[203215],POINT (493226.354 6586647.201),70,21002,"POLYGON Z ((492884.087 6587779.057 0.000, 4928...",Auderghem,Oudergem


In [402]:
nis_mismatches.drop(["geometry_left", "geometry_right", "index_right"], axis=1).to_excel(f"{output_dir}/best_anomalies_{case_name}_nis_mismatches.xlsx")

In [403]:
pdf = PdfPages(f"{output_dir}/best_anomalies_{case_name}_nis_mismatches.pdf")

for niscode in tqdm(nis_mismatches.municipality_id.value_counts().index):
    
    mism=nis_mismatches[nis_mismatches.municipality_id==niscode].set_geometry("geometry_left")
    
    ax=mism.plot("niscode", figsize=(10,10),alpha=0.8, legend=True)
    plt.title(niscode)
    # ax=region[region.postcode==zipcode].plot(figsize=(15,15),alpha=0.8, color="green")
    # ax=mismatches[mismatches.postcode==zipcode].set_geometry("geometry_left").plot(ax=ax, color="red")
    nis_bnd = nis_boundaries[nis_boundaries.niscode==niscode]
    if nis_bnd.shape[0]>0:
        ax=nis_bnd.boundary.plot(ax=ax, color="red")
    set_optimal_limits(ax, pd.concat([nis_bnd[["geometry"]], mism[["geometry_left"]].rename(columns={"geometry_left": "geometry"})]))
    add_basemap(ax)

    pdf.savefig(bbox_inches='tight')#, dpi=75)
    plt.close()
    
pdf.close()

  0%|          | 0/3 [00:00<?, ?it/s]

HTTP error:  Tile URL resulted in a 404 error. Double-check your tile url:
https://cartoweb.wmts.ngi.be/1.0.0/overlay/default/3857/13/2747/4192.png


 67%|██████▋   | 2/3 [00:00<00:00,  2.78it/s]

HTTP error:  Tile URL resulted in a 404 error. Double-check your tile url:
https://cartoweb.wmts.ngi.be/1.0.0/overlay/default/3857/13/2749/4195.png


100%|██████████| 3/3 [00:01<00:00,  2.72it/s]


# Street names incoherence

In [404]:
# region.groupby(["streetname", "postcode"]).dissolve()

In [405]:
region_streets = region[["streetname", "postcode", "geometry", "municipality_id"]].dissolve(["streetname", "postcode"])
region_streets["buffer"] = region_streets.buffer(100)
region_streets = region_streets.reset_index()
region_streets

Unnamed: 0,streetname,postcode,geometry,municipality_id,buffer
0,'t Hof te Overbeke,1082,"MULTIPOINT ((477059.087 6596586.965), (477065....",21003,"POLYGON ((476987.031 6596656.191, 476988.671 6..."
1,Abbaye de la Cambre,1000,"MULTIPOINT ((486867.212 6589332.361), (486870....",21004,"POLYGON ((486771.010 6589359.359, 486771.185 6..."
2,Abbaye de la Cambre,1050,"MULTIPOINT ((486933.660 6589388.966), (486945....",21009,"POLYGON ((486982.498 6589213.487, 486982.496 6..."
3,Allée Christian de Duve,1200,"MULTIPOINT ((492846.254 6595286.229), (492858....",21018,"POLYGON ((492758.931 6595237.639, 492758.061 6..."
4,Allée Clara Clairbert,1070,"MULTIPOINT ((477224.712 6590295.178), (477230....",21001,"POLYGON ((477200.769 6590198.210, 477195.684 6..."
...,...,...,...,...,...
5123,Venelle du Champ du Moulin,1082,"MULTIPOINT ((478957.857 6597750.281), (478959....",21003,"POLYGON ((478860.496 6597772.658, 478861.509 6..."
5124,Venelle en Boucle,1150,"MULTIPOINT ((494587.900 6591921.986), (494590....",21019,"POLYGON ((494672.922 6591740.930, 494667.436 6..."
5125,Vieille rue du Moulin,1180,"MULTIPOINT ((485363.515 6584141.760), (485364....",21016,"MULTIPOLYGON (((485268.570 6584110.641, 485267..."
5126,Vieux Chemin,1180,"MULTIPOINT ((482161.954 6581712.851), (482167....",21016,"POLYGON ((482079.857 6581769.824, 482078.894 6..."


In [406]:
connected_streets = gpd.sjoin(region_streets, region_streets.set_geometry("buffer"))
connected_streets = connected_streets[connected_streets.streetname_left < connected_streets.streetname_right]

In [407]:
connected_streets

Unnamed: 0,streetname_left,postcode_left,geometry_left,municipality_id_left,buffer,index_right,streetname_right,postcode_right,geometry_right,municipality_id_right
0,'t Hof te Overbeke,1082,"MULTIPOINT ((477059.087 6596586.965), (477065....",21003,"POLYGON ((476987.031 6596656.191, 476988.671 6...",1105,Avenue du Cognassier,1082,"MULTIPOINT ((476864.638 6596371.762), (476870....",21003
0,'t Hof te Overbeke,1082,"MULTIPOINT ((477059.087 6596586.965), (477065....",21003,"POLYGON ((476987.031 6596656.191, 476988.671 6...",4954,Rue du Village Européen,1082,"MULTIPOINT ((477224.486 6596385.016), (477224....",21003
0,'t Hof te Overbeke,1082,"MULTIPOINT ((477059.087 6596586.965), (477065....",21003,"POLYGON ((476987.031 6596656.191, 476988.671 6...",1697,Drève des Maricolles,1082,"MULTIPOINT ((477315.005 6596294.530), (477317....",21003
0,'t Hof te Overbeke,1082,"MULTIPOINT ((477059.087 6596586.965), (477065....",21003,"POLYGON ((476987.031 6596656.191, 476988.671 6...",2248,Rue Auguste Denie,1082,"MULTIPOINT ((477111.748 6596721.606), (477121....",21003
0,'t Hof te Overbeke,1082,"MULTIPOINT ((477059.087 6596586.965), (477065....",21003,"POLYGON ((476987.031 6596656.191, 476988.671 6...",3689,Rue de Dilbeek,1082,"MULTIPOINT ((476525.746 6596559.544), (476531....",21003
...,...,...,...,...,...,...,...,...,...,...
5117,Venelle aux Coins de Terre,1150,"MULTIPOINT ((494611.239 6591755.079), (494619....",21019,"POLYGON ((494579.766 6591660.260, 494572.971 6...",5124,Venelle en Boucle,1150,"MULTIPOINT ((494587.900 6591921.986), (494590....",21019
5117,Venelle aux Coins de Terre,1150,"MULTIPOINT ((494611.239 6591755.079), (494619....",21019,"POLYGON ((494579.766 6591660.260, 494572.971 6...",5119,Venelle aux Quatre Noeuds,1150,"MULTIPOINT ((494609.577 6591761.618), (494611....",21019
5118,Venelle aux Jeux,1150,"MULTIPOINT ((494352.347 6591877.987), (494363....",21019,"POLYGON ((494594.150 6591805.308, 494594.520 6...",5124,Venelle en Boucle,1150,"MULTIPOINT ((494587.900 6591921.986), (494590....",21019
5118,Venelle aux Jeux,1150,"MULTIPOINT ((494352.347 6591877.987), (494363....",21019,"POLYGON ((494594.150 6591805.308, 494594.520 6...",5119,Venelle aux Quatre Noeuds,1150,"MULTIPOINT ((494609.577 6591761.618), (494611....",21019


In [408]:
connected_streets[connected_streets.streetname_right=="Avenue de Tervuren"]

Unnamed: 0,streetname_left,postcode_left,geometry_left,municipality_id_left,buffer,index_right,streetname_right,postcode_right,geometry_right,municipality_id_right
147,Avenue Colonel Daumerie,1150,"MULTIPOINT ((494651.728 6590015.451), (494768....",21019,"POLYGON ((494907.354 6589908.483, 494906.997 6...",694,Avenue de Tervuren,1160,"MULTIPOINT ((494979.465 6589951.318), (494990....",21002
148,Avenue Colonel Daumerie,1160,"MULTIPOINT ((494626.708 6589912.351), (494644....",21002,"POLYGON ((494619.512 6590011.998, 494624.691 6...",694,Avenue de Tervuren,1160,"MULTIPOINT ((494979.465 6589951.318), (494990....",21002
325,Avenue Isidore Gérard,1160,"MULTIPOINT ((494528.649 6589804.939), (494537....",21002,"POLYGON ((494439.387 6589760.060, 494436.261 6...",694,Avenue de Tervuren,1160,"MULTIPOINT ((494979.465 6589951.318), (494990....",21002
692,Avenue de Tervueren,1150,"MULTIPOINT ((490404.050 6592603.008), (490404....",21019,"MULTIPOLYGON (((494381.544 6590931.543, 494388...",694,Avenue de Tervuren,1160,"MULTIPOINT ((494979.465 6589951.318), (494990....",21002
692,Avenue de Tervueren,1150,"MULTIPOINT ((490404.050 6592603.008), (490404....",21019,"MULTIPOLYGON (((494381.544 6590931.543, 494388...",693,Avenue de Tervuren,1150,POINT (495131.623 6589948.395),21019


In [409]:
for side in ["right", "left"]:
    connected_streets[[f"split_{side}_1", f"split_{side}_2"]] = connected_streets[f"streetname_{side}"].str.extract("^(Avenue|Rue|Chaussée|Boulevard|Drève|Clos|Square)(.*)$", flags=re.IGNORECASE)

In [410]:
def single_digit_diff(a, b):
    if pd.isnull(a) or pd.isnull(b) or len(a)!= len(b):
        return False
#     print(a, b)
    diffs = list(difflib.ndiff([a], [b]))
#     print(diffs)
    if len(diffs) != 4:
        return False
    
    diff_pos_a = diffs[1].count(" ")
    diff_pos_b = diffs[3].count(" ")

    return a[diff_pos_a-1].isdigit() and b[diff_pos_b-1].isdigit()

In [411]:
connected_streets["jaro"] =connected_streets.progress_apply(lambda row: jellyfish.jaro_winkler_similarity(row["streetname_left"],
                                                                                                          row["streetname_right"]), axis=1)

connected_streets["levenshtein"] =connected_streets.progress_apply(lambda row: jellyfish.damerau_levenshtein_distance(row["streetname_left"],
                                                                                                          row["streetname_right"]), axis=1)

connected_streets["jaro_split"] =connected_streets.progress_apply(lambda row: null_jaro(row["split_left_2"],
                                                                                        row["split_right_2"]), axis=1)

# single_last_letter : True if the only difference is the last letter, and the penultimate letter is a space
connected_streets["single_last_letter"] = (connected_streets.streetname_left.str.len() == connected_streets.streetname_right.str.len()) & \
                                            (connected_streets.streetname_left.str[:-2] == connected_streets.streetname_right.str[:-2]) & \
                                            (connected_streets.streetname_left.str[-2] == " ")

100%|██████████| 17174/17174 [00:00<00:00, 103315.00it/s]
100%|██████████| 17174/17174 [00:00<00:00, 57315.68it/s]
100%|██████████| 17174/17174 [00:00<00:00, 105414.32it/s]


In [412]:
connected_streets["single_digit_diff"] = connected_streets.progress_apply(lambda row: single_digit_diff(row["streetname_left"],
                                                                                        row["streetname_right"]), axis=1)

100%|██████████| 17174/17174 [00:00<00:00, 96449.31it/s]


In [413]:
connected_streets[connected_streets.single_digit_diff]

Unnamed: 0,streetname_left,postcode_left,geometry_left,municipality_id_left,buffer,index_right,streetname_right,postcode_right,geometry_right,municipality_id_right,split_right_1,split_right_2,split_left_1,split_left_2,jaro,levenshtein,jaro_split,single_last_letter,single_digit_diff


In [414]:
streetname_mismatches = connected_streets[((connected_streets.levenshtein<=1) 
                                          | (connected_streets.jaro >=0.96)
                                          | ((connected_streets.jaro_split >= 0.96 ) & 
                                             (connected_streets.split_left_1.str.lower() == connected_streets.split_right_1.str.lower()))) &
                                         ~connected_streets.single_last_letter &
                                         ~connected_streets.single_digit_diff
                                         ]
                                         
streetname_mismatches

Unnamed: 0,streetname_left,postcode_left,geometry_left,municipality_id_left,buffer,index_right,streetname_right,postcode_right,geometry_right,municipality_id_right,split_right_1,split_right_2,split_left_1,split_left_2,jaro,levenshtein,jaro_split,single_last_letter,single_digit_diff
346,Avenue Jean François Leemans,1160,"MULTIPOINT ((493419.110 6586426.114), (493437....",21002,"POLYGON ((493404.431 6586524.909, 493408.862 6...",366,Avenue Jean-François Leemans,1170,"MULTIPOINT ((493399.419 6586420.198), (493407....",21017,Avenue,Jean-François Leemans,Avenue,Jean François Leemans,0.956085,1,0.943723,False,False
351,Avenue Jean Palfijn,1090,"MULTIPOINT ((481647.140 6602498.016), (481647....",21010,"POLYGON ((481576.430 6602568.728, 481576.431 6...",352,Avenue Jean Palfyn,1020,"MULTIPOINT ((481735.930 6602421.883), (481735....",21004,Avenue,Jean Palfyn,Avenue,Jean Palfijn,0.967836,2,0.952564,False,False
692,Avenue de Tervueren,1150,"MULTIPOINT ((490404.050 6592603.008), (490404....",21019,"MULTIPOLYGON (((494381.544 6590931.543, 494388...",694,Avenue de Tervuren,1160,"MULTIPOINT ((494979.465 6589951.318), (494990....",21002,Avenue,de Tervuren,Avenue,de Tervueren,0.978363,1,0.967949,False,False
692,Avenue de Tervueren,1150,"MULTIPOINT ((490404.050 6592603.008), (490404....",21019,"MULTIPOLYGON (((494381.544 6590931.543, 494388...",693,Avenue de Tervuren,1150,POINT (495131.623 6589948.395),21019,Avenue,de Tervuren,Avenue,de Tervueren,0.978363,1,0.967949,False,False
869,Avenue des Alouettes,1150,"MULTIPOINT ((491391.961 6590183.620), (491400....",21019,"POLYGON ((491305.901 6590234.317, 491305.474 6...",1005,Avenue des Mouettes,1150,"MULTIPOINT ((491496.230 6590281.889), (491516....",21019,Avenue,des Mouettes,Avenue,des Alouettes,0.969474,2,0.956044,False,False
1438,Chemin de Putdael,1160,"MULTIPOINT ((494063.154 6589415.942), (494240....",21002,"MULTIPOLYGON (((494678.030 6589893.914, 494677...",1467,Chemin du Putdael,1150,"MULTIPOINT ((494235.914 6589719.263), (494357....",21019,,,,,0.951471,1,,False,False
2421,Rue De Praetere,1000,"MULTIPOINT ((486507.883 6589011.416), (486513....",21004,"POLYGON ((486445.568 6589089.551, 486449.693 6...",3792,Rue de Praetere,1050,"MULTIPOINT ((486228.448 6588861.035), (486228....",21009,Rue,de Praetere,Rue,De Praetere,0.973333,1,0.95,False,False
2422,Rue De Praetere,1180,"MULTIPOINT ((486181.627 6588851.072), (486181....",21016,"POLYGON ((486110.918 6588921.784, 486110.918 6...",3792,Rue de Praetere,1050,"MULTIPOINT ((486228.448 6588861.035), (486228....",21009,Rue,de Praetere,Rue,De Praetere,0.973333,1,0.95,False,False
2819,Rue Jacobs Fontaine,1090,"MULTIPOINT ((482914.010 6599706.468), (482914....",21010,"POLYGON ((482819.553 6599738.954, 482819.330 6...",2820,Rue Jacobs-Fontaine,1020,"MULTIPOINT ((483064.904 6599714.888), (483064....",21004,Rue,Jacobs-Fontaine,Rue,Jacobs Fontaine,0.978947,1,0.975,False,False
3480,Rue Van Hoegaerde,1081,"MULTIPOINT ((482372.013 6596211.967), (482372....",21011,"POLYGON ((482301.303 6596282.678, 482301.304 6...",3528,Rue Vanhoegaerde,1080,"MULTIPOINT ((482317.528 6596116.238), (482317....",21012,Rue,Vanhoegaerde,Rue,Van Hoegaerde,0.963971,2,0.956044,False,False


In [415]:
# streetname_mismatches.sort_values("jaro")

In [416]:
for (case, str_msmtch) in [("same_nis", streetname_mismatches[streetname_mismatches.municipality_id_left == streetname_mismatches.municipality_id_right]),
                           ("diff_nis", streetname_mismatches[streetname_mismatches.municipality_id_left != streetname_mismatches.municipality_id_right]),
                          ]:
    
    if str_msmtch.shape[0]==0:
        print("No case for", case)
        continue
    pdf = PdfPages(f"{output_dir}/best_anomalies_{case_name}_close_streetnames_{case}.pdf")

    for i, rec in tqdm(str_msmtch.sort_values(["levenshtein", "jaro"], ascending=[True, False]).iterrows(),
                      total=str_msmtch.shape[0]):
    #     print(rec)
    #     rec = streetname_mismatches.sort_values("levenshtein").iloc[[i]]
        r1 = rec[["streetname_left", "postcode_left",  "municipality_id_left", "geometry_left"]].rename({"streetname_left":"streetname", 
                                                                                                         "postcode_left": "postcode", 
                                                                                                         "municipality_id_left": "municipality_id", 
                                                                                                         "geometry_left":"geometry"})
        r2 = rec[["streetname_right", "postcode_right", "municipality_id_right", "geometry_right"]].rename({"streetname_right":"streetname", 
                                                                                                            "postcode_right": "postcode", 
                                                                                                            "municipality_id_right": "municipality_id", 
                                                                                                            "geometry_right":"geometry"})
        
            
            
        r = gpd.GeoDataFrame([r1, r2])

        r["name"] = r["streetname"]+", "+r["postcode"]+"/"+r["municipality_id"]
        ax=r.plot("name", legend=True)
        set_optimal_limits(ax, r)
        add_basemap(ax)

        pdf.savefig(bbox_inches='tight')#, dpi=75)
        plt.close()

    pdf.close()
    

100%|██████████| 2/2 [00:03<00:00,  1.84s/it]
100%|██████████| 11/11 [00:08<00:00,  1.33it/s]


In [417]:
# gpd.GeoDataFrame(region[(region.streetname.str.startswith("Avenue de Tervu")) & (region.postcode=="1150")]).explore("streetname")

# Metrics

In [418]:
# get_max_delta_ratio(street_bloc)

## Sinuosity

In [419]:
sin_par = []
for parity in [0, 1]:
    sin= region[(region.house_number_num.mod(2)==parity)].groupby(["streetname", "postcode"]).progress_apply(bloc_sinuosity)#.sort_values(na_pos="first")
    sin = sin.sort_values(na_position="first").rename("sinuosity").reset_index()
    sin["parity"]=parity
    display(sin)
    sin_par.append(sin)
sinuosity= pd.concat(sin_par)
sinuosity = sinuosity[(sinuosity.sinuosity.notnull()) & (sinuosity.sinuosity<10**10)].sort_values("sinuosity", ascending=False).reset_index(drop=True)
sinuosity

100%|██████████| 4782/4782 [00:10<00:00, 436.01it/s]


Unnamed: 0,streetname,postcode,sinuosity,parity
0,Allée Pierre Levie,1200,,0
1,Allée des Glycines,1070,,0
2,Allée des Perce-Neige,1070,,0
3,Allée du Cloître,1000,,0
4,Avenue Albert Brachet,1090,,0
...,...,...,...,...
4777,Place Liedts,1030,20.277506,0
4778,Clos des Lauriers Roses,1140,22.548361,0
4779,Galerie d'Ixelles,1050,28.585238,0
4780,Rue du Ciel Bleu,1150,31.321679,0


100%|██████████| 4817/4817 [00:10<00:00, 443.65it/s]


Unnamed: 0,streetname,postcode,sinuosity,parity
0,Allée Louise Van den Plas,1200,,1
1,Allée Lucette Decroly,1070,,1
2,Allée Pierre Levie,1200,,1
3,Allée Wauters,1210,,1
4,Allée des Citronniers,1020,,1
...,...,...,...,...
4812,Place Liedts,1030,20.627267,1
4813,Rue du Temps des Cerises,1150,24.546394,1
4814,Galerie d'Ixelles,1050,25.707731,1
4815,Rue du Ciel Bleu,1150,37.296457,1


Unnamed: 0,streetname,postcode,sinuosity,parity
0,Rue Henri Maubel,1190,127.244114,0
1,Cité Modèle,1020,45.755797,1
2,Rue du Ciel Bleu,1150,37.296457,1
3,Rue du Ciel Bleu,1150,31.321679,0
4,Galerie d'Ixelles,1050,28.585238,0
...,...,...,...,...
8912,Rue du Pré,1070,1.0,1
8913,Avenue Simone Veil,1070,1.0,0
8914,Rue Brialmont,1210,1.0,0
8915,Venelle aux Coins de Terre,1150,1.0,0


In [420]:
# sinuosity = sinuosity[sinuosity.sinuosity<10**10]
# region

In [421]:
sw_sin_par = []
for parity in [0, 1]:
    sin= region[(region.house_number_num.mod(2)==parity)].groupby(["streetname", "postcode"]).progress_apply(sliding_sinuosity)#.sort_values(na_pos="first")
    sin = sin.sort_values(na_position="first").rename("sw_sinuosity").reset_index()
    sin["parity"]=parity
    display(sin)
    sw_sin_par.append(sin)
sw_sinuosity= pd.concat(sw_sin_par)
sw_sinuosity = sw_sinuosity[(sw_sinuosity.sw_sinuosity.notnull()) & (sw_sinuosity.sw_sinuosity<10**10)].sort_values("sw_sinuosity", ascending=False)
sw_sinuosity = sw_sinuosity.reset_index(drop=True)
sw_sinuosity

100%|██████████| 4782/4782 [00:44<00:00, 106.42it/s]


Unnamed: 0,streetname,postcode,sw_sinuosity,parity
0,Allée Pierre Levie,1200,,0
1,Allée des Glycines,1070,,0
2,Allée des Perce-Neige,1070,,0
3,Allée du Cloître,1000,,0
4,Avenue Albert Brachet,1090,,0
...,...,...,...,...
4777,Rue du Chien Vert,1080,7.939017,0
4778,Rue du Grand-Serment,1000,9.042477,0
4779,Jardin Martin V,1200,9.104593,0
4780,Rue Heyvaert,1080,12.787389,0


100%|██████████| 4817/4817 [00:45<00:00, 105.31it/s]


Unnamed: 0,streetname,postcode,sw_sinuosity,parity
0,Allée Louise Van den Plas,1200,,1
1,Allée Lucette Decroly,1070,,1
2,Allée Pierre Levie,1200,,1
3,Allée Wauters,1210,,1
4,Allée des Citronniers,1020,,1
...,...,...,...,...
4812,Drève de Bonne Odeur,1170,7.40305,1
4813,Montagne de Sable,1160,15.839709,1
4814,Avenue Valduchesse,1160,17.022565,1
4815,Avenue Guillaume De Greef,1090,inf,1


Unnamed: 0,streetname,postcode,sw_sinuosity,parity
0,Avenue Valduchesse,1160,17.022565,1
1,Montagne de Sable,1160,15.839709,1
2,Rue Heyvaert,1080,12.787389,0
3,Jardin Martin V,1200,9.104593,0
4,Rue du Grand-Serment,1000,9.042477,0
...,...,...,...,...
8909,Rue Pangaert,1083,0.954766,1
8910,Avenue de Vilvorde,1000,0.92543,0
8911,Rue d'Angleterre,1060,0.887795,0
8912,Rue d'Arlon,1040,0.875484,0


## Length

In [422]:
#Add parity
# length = region.groupby(["streetname", "postcode"]).progress_apply(bloc_length)#.sort_values(na_pos="first")
# length = length.sort_values(na_position="first").rename("length").reset_index()
# length

In [423]:
region_lenghts = []
for parity in [0,1]:
    region_par = region[(region.house_number_num.mod(2)==parity)].copy()

    length = region_par.groupby(["streetname", "postcode"]).progress_apply(bloc_length)#.sort_values(na_pos="first")
    length = length.sort_values(na_position="first", ascending=False).rename("length").reset_index()
    
    region_lenghts.append(length.assign(parity=parity))
length = pd.concat(region_lenghts)
length = length.sort_values("length", ascending=False).reset_index(drop=True)
length

100%|██████████| 4782/4782 [00:10<00:00, 447.53it/s]
100%|██████████| 4817/4817 [00:10<00:00, 439.07it/s]


Unnamed: 0,streetname,postcode,length,parity
0,Chaussée de Wavre,1160,12463.543721,1
1,Chaussée de Waterloo,1180,11598.646661,1
2,Chaussée de Mons,1070,10435.691037,1
3,Chaussée d'Alsemberg,1180,10309.421684,1
4,Chaussée de Mons,1070,9008.514842,0
...,...,...,...,...
9594,Square du Rubis,1020,,1
9595,Square du Sacré-Coeur,1160,,1
9596,Val des Perdreaux,1150,,1
9597,Venelle Georges Désir,1200,,1


## Distance to previous

In [424]:
region_pars = []
for parity in [0,1]:
    region_par = region[(region.house_number_num.mod(2)==parity)].copy()

    region_par["dist_to_prev"] = region_par.distance(region_par.shift(1))
    region_par["dist_to_prev2"] = region_par.distance(region_par.shift(2))
    region_par["is_new_bloc"] = (region_par[["streetname", "postcode"]] !=  region_par[["streetname", "postcode"]].shift(1)).any(axis=1)
    region_par.dist_to_prev = region_par.dist_to_prev.where(~region_par.is_new_bloc, pd.NA)
    
    region_par.dist_to_prev2 = region_par.dist_to_prev2.where(~region_par.is_new_bloc, pd.NA)
    region_par.dist_to_prev2 = region_par.dist_to_prev2.where(~region_par.is_new_bloc.shift(1).astype(bool), pd.NA)
    
    
    region_pars.append(region_par.assign(parity=parity))
region_pars = pd.concat(region_pars)

In [425]:
idx_max= region_pars.groupby(["streetname", "postcode", "parity"]).dist_to_prev.idxmax()
dist_to_prev = region_pars.loc[idx_max.dropna().values].sort_values("dist_to_prev", ascending=False)
dist_to_prev = dist_to_prev[["streetname", "postcode", "parity", "dist_to_prev", "house_number", "house_number_num"]].reset_index(drop=True)
dist_to_prev

Unnamed: 0,streetname,postcode,parity,dist_to_prev,house_number,house_number_num
0,Chaussée de Wavre,1160,1,3810.133107,2245A,2245
1,Chaussée de Vilvorde,1120,1,3597.745954,233,233
2,Chaussée d'Alsemberg,1180,1,2733.893721,373,373
3,Chaussée de Ninove,1080,1,2688.683025,975,975
4,Chaussée de Ninove,1080,0,2641.308532,996,996
...,...,...,...,...,...,...
9256,Rue Henri Van Antwerpen,1160,0,0.001584,2,2
9257,Petite rue Sainte-Anne,1090,0,0.001582,172,172
9258,Avenue des Phalènes,1050,0,0.001579,36,36
9259,Rue des Valérianes,1170,0,0.001579,2,2


In [426]:
dist_to_prev.dist_to_prev.describe(percentiles=[0.5, 0.90, .95, .99, .999])

count    9261.000000
mean       91.570971
std       155.058585
min         0.001579
50%        54.789463
90%       178.884645
95%       281.686411
99%       699.746579
99.9%    1924.073893
max      3810.133107
Name: dist_to_prev, dtype: float64

## Delta dist to prev

In [427]:
idx_max= region_pars.groupby(["streetname", "postcode", "parity"]).dist_to_prev.idxmax()
delta_dist_to_prev =  region_pars.loc[idx_max.dropna().values].rename(columns={"dist_to_prev":"max_dist_to_prev"})#.sort_values("dist_to_prev")[["streetname", "postcode", "parity", "dist_to_prev", "house_number"]]
delta_dist_to_prev

Unnamed: 0,streetname,house_number,house_number_num,postcode,postname,municipality,municipality_id,address_id,geometry,max_dist_to_prev,dist_to_prev2,is_new_bloc,parity
10,'t Hof te Overbeke,22,22,1082,Berchem-Sainte-Agathe,Berchem-Sainte-Agathe,21003,[215387],POINT (477123.512 6596425.307),82.671768,98.611794,False,0
36,'t Hof te Overbeke,45,45,1082,Berchem-Sainte-Agathe,Berchem-Sainte-Agathe,21003,[4866],POINT (477065.972 6596594.963),98.704650,59.753805,False,1
59,Abbaye de la Cambre,18,18,1000,Bruxelles (Centre),Bruxelles,21004,[29274],POINT (486882.079 6589369.170),39.697858,,False,0
62,Abbaye de la Cambre,21,21,1000,Bruxelles (Centre),Bruxelles,21004,[25027],POINT (487034.134 6589127.772),278.376788,201.671795,False,1
66,Abbaye de la Cambre,6,6,1050,Ixelles + Bruxelles (Louise-Roosevelt),Ixelles,21009,[17785],POINT (487074.049 6589377.591),93.109887,129.048054,False,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
307730,Vieille rue du Moulin,99,99,1180,Uccle,Uccle,21016,[45026],POINT (485972.970 6584232.035),332.481110,77.608254,False,1
307744,Vieux Chemin,66,66,1180,Uccle,Uccle,21016,[937283],POINT (482206.811 6581870.449),91.358538,91.360057,False,0
307738,Vieux Chemin,5,5,1180,Uccle,Uccle,21016,[40362],POINT (482495.061 6581538.903),38.862762,69.789710,False,1
307754,Windmolenberg,10,10,1200,Woluwe-Saint-Lambert,Woluwe-Saint-Lambert,21018,[200230],POINT (493549.010 6593704.030),19.563378,29.402017,False,0


In [428]:
delta_dist_to_prev = delta_dist_to_prev.merge(region_pars.groupby(["streetname", "postcode", "parity"]).dist_to_prev.median().rename("median_dist_to_prev").reset_index())
delta_dist_to_prev["median_dist_to_prev"] = delta_dist_to_prev["median_dist_to_prev"].where(delta_dist_to_prev.median_dist_to_prev> 5, 0)

delta_dist_to_prev["delta_dist_to_prev"] = delta_dist_to_prev.max_dist_to_prev / delta_dist_to_prev.median_dist_to_prev

delta_dist_to_prev = delta_dist_to_prev[~delta_dist_to_prev.delta_dist_to_prev.isnull() & (delta_dist_to_prev.delta_dist_to_prev< np.inf)].sort_values("delta_dist_to_prev", ascending=False)
delta_dist_to_prev = delta_dist_to_prev[["streetname", "postcode", "parity", "house_number", "delta_dist_to_prev" ]].reset_index(drop=True)
delta_dist_to_prev

Unnamed: 0,streetname,postcode,parity,house_number,delta_dist_to_prev
0,Chaussée de Wavre,1160,1,2245A,438.897388
1,Chaussée d'Alsemberg,1180,1,373,310.923444
2,Rue Saint-Denis,1190,1,339,304.314987
3,Chaussée de Ninove,1080,1,975,287.156663
4,Chaussée de Ninove,1080,0,996,264.216071
...,...,...,...,...,...
8610,Place de la Gare,1082,1,5,1.000000
8611,Chemin des Oiseleurs,1180,0,86,1.000000
8612,Rue Henri-Joseph Genesse,1070,1,11,1.000000
8613,Chemin des Roses,1180,0,4,1.000000


In [429]:
# delta_dist_to_prev[delta_dist_to_prev.delta_dist_to_prev>1000]

## prev_to_prev2_ratio

In [430]:
# region_pars[region_pars.dist_to_prev>region_pars.dist_to_prev2+100]



In [431]:
region_pars["prev_to_prev2_ratio"] = (region_pars.dist_to_prev/region_pars[["dist_to_prev2"]].assign(m=10).max(axis=1,  skipna=False))#.replace(np.inf, pd.NA)
region_pars["prev_to_prev2_ratio"]

57             NaN
59             NaN
61        0.361841
219            NaN
220            NaN
            ...   
304004    1.000022
304005    0.000109
305388         NaN
305394         NaN
305391    0.215949
Name: prev_to_prev2_ratio, Length: 307774, dtype: float64

In [432]:
idx_max= region_pars.groupby(["streetname", "postcode", "parity"]).prev_to_prev2_ratio.idxmax()
prev_to_prev2_ratio = region_pars.loc[idx_max.dropna().values].sort_values("prev_to_prev2_ratio", ascending=False)
prev_to_prev2_ratio = prev_to_prev2_ratio[["streetname", "postcode", "parity", "prev_to_prev2_ratio", "house_number"]].reset_index(drop=True)
prev_to_prev2_ratio

Unnamed: 0,streetname,postcode,parity,prev_to_prev2_ratio,house_number
0,Rue Saint-Denis,1190,1,217.415350,341
1,Allée Verte,1000,0,51.557030,10
2,Boulevard de Waterloo,1000,0,28.874506,4
3,Boulevard Charlemagne,1000,0,26.012943,54
4,Chaussée de Waterloo,1180,0,17.092340,1264
...,...,...,...,...,...
8913,Place Arthur Van Gehuchten,1020,0,0.000027,4
8914,Rue de la Poterie,1070,0,0.000026,20
8915,Drève des Libellules,1170,0,0.000020,10
8916,Rue Hubert Van Eepoel,1090,1,0.000019,3


In [433]:
prev_to_prev2_ratio.iloc[0:60]

Unnamed: 0,streetname,postcode,parity,prev_to_prev2_ratio,house_number
0,Rue Saint-Denis,1190,1,217.41535,341
1,Allée Verte,1000,0,51.55703,10
2,Boulevard de Waterloo,1000,0,28.874506,4
3,Boulevard Charlemagne,1000,0,26.012943,54
4,Chaussée de Waterloo,1180,0,17.09234,1264
5,Boulevard Louis Mettewie,1080,1,15.779784,19
6,Rue du Chien Vert,1080,0,14.064775,8A
7,Chaussée de Neerstalle,1190,1,14.056923,263
8,Avenue des Nymphes,1170,1,11.68147,3
9,Rue Groeselenberg,1180,1,11.12965,85


In [434]:
region_pars.sort_values("prev_to_prev2_ratio", ascending=False)

Unnamed: 0,streetname,house_number,house_number_num,postcode,postname,municipality,municipality_id,address_id,geometry,dist_to_prev,dist_to_prev2,is_new_bloc,parity,prev_to_prev2_ratio
206353,Rue Saint-Denis,341,341,1190,Forest,Forest,21007,[58600],POINT (481031.099 6589536.316),2174.153502,6.368716,False,1,217.415350
192,Allée Verte,10,10,1000,Bruxelles (Centre),Bruxelles,21004,[141130],POINT (484716.126 6597177.266),1082.529344,20.996736,False,0,51.557030
213,Allée Verte,14,14,1000,Bruxelles (Centre),Bruxelles,21004,"[988161, 988162, 988164, 988191, 988142, 98814...",POINT (484731.727 6597207.616),1525.833386,34.125450,False,0,44.712477
87760,Boulevard de Waterloo,4,4,1000,Bruxelles (Centre),Bruxelles,21004,[117796],POINT (485481.896 6592823.763),412.690035,14.292540,False,0,28.874506
81967,Boulevard Charlemagne,54,54,1000,Bruxelles (Centre),Bruxelles,21004,[11900],POINT (487802.824 6593900.231),260.129426,7.624899,False,0,26.012943
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
302394,Rue du Vallon,3,3,1210,Saint-Josse-ten-Noode,Saint-Josse-ten-Noode,21014,[161478],POINT (486721.728 6594516.671),5.507407,,False,1,
303975,Square Armand Steurs,1,1,1210,Saint-Josse-ten-Noode,Saint-Josse-ten-Noode,21014,"[164403, 1215941, 1473924, 1473926, 1473928, 1...",POINT (487386.826 6595286.061),,,True,1,
304006,Square Armand Steurs,3,3,1210,Saint-Josse-ten-Noode,Saint-Josse-ten-Noode,21014,"[871258, 164401, 871257, 1473936]",POINT (487368.173 6595286.541),18.658738,,False,1,
305388,Square Victoria Régina,1,1,1210,Saint-Josse-ten-Noode,Saint-Josse-ten-Noode,21014,[5686],POINT (485717.967 6595810.985),,,True,1,


## Delta ratio

In [435]:
# To avoid to compare all pairs of addresses in all street, we only consider streets with a sinuosity above 1.5 
# (assuming that "straight streets" won't have high delta ratio)

region_sel = region.merge(sinuosity[sinuosity.sinuosity>1.5][["streetname", "postcode"]])
region_sel

Unnamed: 0,streetname,house_number,house_number_num,postcode,postname,municipality,municipality_id,address_id,geometry
0,Abbaye de la Cambre,13,13,1000,Bruxelles (Centre),Bruxelles,21004,[194549],POINT (486956.474 6589242.508)
1,Abbaye de la Cambre,15,15,1000,Bruxelles (Centre),Bruxelles,21004,[194550],POINT (486891.058 6589269.902)
2,Abbaye de la Cambre,16,16,1000,Bruxelles (Centre),Bruxelles,21004,[1754],POINT (486867.212 6589332.361)
3,Abbaye de la Cambre,17,17,1000,Bruxelles (Centre),Bruxelles,21004,[22582],POINT (486870.703 6589353.125)
4,Abbaye de la Cambre,18,18,1000,Bruxelles (Centre),Bruxelles,21004,[29274],POINT (486882.079 6589369.170)
...,...,...,...,...,...,...,...,...,...
51683,Square Armand Steurs,28,28,1210,Saint-Josse-ten-Noode,Saint-Josse-ten-Noode,21014,"[1216826, 1216825, 1216827, 142132, 1474192, 1...",POINT (487447.429 6595343.020)
51684,Square Armand Steurs,29,29,1210,Saint-Josse-ten-Noode,Saint-Josse-ten-Noode,21014,"[142131, 1474206, 1474208]",POINT (487454.476 6595331.529)
51685,Square Armand Steurs,29,29,1210,Saint-Josse-ten-Noode,Saint-Josse-ten-Noode,21014,"[142131, 1474206, 1474208]",POINT (487454.476 6595331.529)
51686,Square Armand Steurs,29,29,1210,Saint-Josse-ten-Noode,Saint-Josse-ten-Noode,21014,"[871761, 871760, 871762]",POINT (487454.479 6595331.530)


In [436]:
delta_par = []
for parity in [0, 1]:
    dlt_par= region_sel[(region_sel.house_number_num.mod(2)==parity)].groupby(["streetname", "postcode"]).progress_apply(get_max_delta_ratio)#.sort_values(na_pos="first")
    dlt_par = dlt_par.apply(pd.Series).rename(columns={0: "delta_ratio", 1: "house_number" })#.reset_index()
    dlt_par = dlt_par.sort_values("delta_ratio", na_position="first", ascending=False)#.reset_index()
    dlt_par["parity"]=parity
    display(dlt_par)
    delta_par.append(dlt_par)
delta_ratio= pd.concat(delta_par)

delta_ratio = delta_ratio.sort_values("delta_ratio", ascending=False).reset_index()
delta_ratio

100%|██████████| 716/716 [00:08<00:00, 86.15it/s] 


Unnamed: 0_level_0,Unnamed: 1_level_0,delta_ratio,house_number,parity
streetname,postcode,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Rue du Biplan,1130,70612.403316,16 -> 128,0
Avenue Guillaume De Greef,1090,43747.154072,400 -> 498,0
Parc Jean Monnet,1082,51.861204,6072 -> 7084,0
Rue Henri Maubel,1190,21.193701,2 -> 158,0
Cour Saint-Lazare,1080,12.385736,2 -> 36,0
...,...,...,...,...
Avenue du Chèvrefeuille,1200,0.000000,-,0
Rue Jean Tiebackx,1090,0.000000,-,0
Avenue du Cerf-Volant,1170,0.000000,-,0
Clos des Quatre-Saisons,1200,0.000000,-,0


100%|██████████| 721/721 [00:08<00:00, 85.57it/s] 


Unnamed: 0_level_0,Unnamed: 1_level_0,delta_ratio,house_number,parity
streetname,postcode,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Avenue Guillaume De Greef,1090,43747.154072,401 -> 499,1
Chaussée d'Alsemberg,1180,85.618556,181 -> 883,1
Parc Jean Monnet,1082,51.861204,6071 -> 7083,1
Rue Louis De Gunst,1080,29.462654,9 -> 59,1
Galerie d'Ixelles,1050,11.782070,17 -> 63,1
...,...,...,...,...
Place Houwaert,1210,0.000000,-,1
Place Henri Conscience,1050,0.000000,-,1
Place Guy d'Arezzo,1180,0.000000,-,1
Place François Bossuet,1210,0.000000,-,1


Unnamed: 0,streetname,postcode,delta_ratio,house_number,parity
0,Rue du Biplan,1130,70612.403316,16 -> 128,0
1,Avenue Guillaume De Greef,1090,43747.154072,401 -> 499,1
2,Avenue Guillaume De Greef,1090,43747.154072,400 -> 498,0
3,Chaussée d'Alsemberg,1180,85.618556,181 -> 883,1
4,Parc Jean Monnet,1082,51.861204,6071 -> 7083,1
...,...,...,...,...,...
1432,Drève des Rhododendrons,1170,0.000000,-,0
1433,Avenue du Sippelberg,1080,0.000000,-,0
1434,Rue du Chien Vert,1080,0.000000,-,0
1435,Clos A.J. Slegers,1200,0.000000,-,0


In [437]:
# delta_ratio

## Consolidate

In [438]:
metrics = {
    "dist_to_prev" :      dist_to_prev,
    "delta_dist_to_prev": delta_dist_to_prev,
    "sinuosity":          sinuosity,
    "sw_sinuosity":       sw_sinuosity,
    "length":             length,
    "delta_ratio":        delta_ratio,
    "prev_to_prev2_ratio":   prev_to_prev2_ratio
}

In [439]:
delta_ratio

Unnamed: 0,streetname,postcode,delta_ratio,house_number,parity
0,Rue du Biplan,1130,70612.403316,16 -> 128,0
1,Avenue Guillaume De Greef,1090,43747.154072,401 -> 499,1
2,Avenue Guillaume De Greef,1090,43747.154072,400 -> 498,0
3,Chaussée d'Alsemberg,1180,85.618556,181 -> 883,1
4,Parc Jean Monnet,1082,51.861204,6071 -> 7083,1
...,...,...,...,...,...
1432,Drève des Rhododendrons,1170,0.000000,-,0
1433,Avenue du Sippelberg,1080,0.000000,-,0
1434,Rue du Chien Vert,1080,0.000000,-,0
1435,Clos A.J. Slegers,1200,0.000000,-,0


In [440]:
with open(f"{data_dir}/metrics_{case_name}.pkl", "wb") as pkl:
    pickle.dump(metrics, pkl, pickle.HIGHEST_PROTOCOL)

region.to_pickle(f"{data_dir}/data_{case_name}.pkl")

In [441]:
# Start from here to plot without recomputing all metrics
with open(f"{data_dir}/metrics_{case_name}.pkl", "rb") as pkl:
    metrics = pickle.load(pkl)#, pickle.HIGHEST_PROTOCOL)
region = pd.read_pickle(f"{data_dir}/data_{case_name}.pkl")

In [442]:
# region_name

In [443]:
for m in metrics:
    metrics[m] = metrics[m].reset_index(drop=True).copy()
#     metrics[m]["f"{m}_topn""]= False
#     metrics[m].loc[metrics[m].shape[0]-topn::, f"{m}_topn"]=True
    metrics[m][f"{m}_ranking"] = metrics[m].index+1
    if "house_number" in metrics[m]:
        metrics[m] = metrics[m].rename(columns={"house_number": f"{m}_house_number"})
        
    
#     dist_to_prev

In [444]:
from functools import reduce
glob_metrics = reduce((lambda x, y: x.merge(y, how="outer")), metrics.values())

In [445]:
glob_metrics


Unnamed: 0,streetname,postcode,parity,dist_to_prev,dist_to_prev_house_number,house_number_num,dist_to_prev_ranking,delta_dist_to_prev_house_number,delta_dist_to_prev,delta_dist_to_prev_ranking,...,sw_sinuosity,sw_sinuosity_ranking,length,length_ranking,delta_ratio,delta_ratio_house_number,delta_ratio_ranking,prev_to_prev2_ratio,prev_to_prev2_ratio_house_number,prev_to_prev2_ratio_ranking
0,'t Hof te Overbeke,1082,0,82.671768,22,22.0,2956.0,22,3.925774,4629.0,...,1.465682,502.0,702.69563,1575,0.526344,6 -> 34,263.0,0.906182,44,7187.0
1,'t Hof te Overbeke,1082,1,98.704650,45,45.0,2341.0,45,4.467949,4269.0,...,1.483233,480.0,788.742494,1337,0.454729,5 -> 35,327.0,1.651855,45,746.0
2,Abbaye de la Cambre,1000,0,39.697858,18,18.0,5783.0,18,1.305228,7716.0,...,1.041611,2789.0,60.828992,8324,0.000000,-,1192.0,0.361841,20,8841.0
3,Abbaye de la Cambre,1000,1,278.376788,21,21.0,469.0,21,3.249155,5129.0,...,3.139519,51.0,434.973059,2912,0.000000,-,1042.0,1.380346,21,1006.0
4,Abbaye de la Cambre,1050,0,93.109887,6,6.0,2541.0,6,1.425775,7486.0,...,1.0121,3908.0,130.609525,6964,0.000000,-,1371.0,0.721513,6,8019.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9594,Vieille rue du Moulin,1180,1,332.481110,99,99.0,345.0,99,38.043000,169.0,...,1.610427,351.0,3639.214105,67,2.344276,199 -> 223,45.0,4.284095,99,112.0
9595,Vieux Chemin,1180,0,91.358538,66,66.0,2598.0,66,2.928445,5413.0,...,1.43796,538.0,673.091684,1686,0.380454,16 -> 50,400.0,0.999983,66,5926.0
9596,Vieux Chemin,1180,1,38.862762,5,5.0,5860.0,5,1.253532,7811.0,...,1.003019,5337.0,98.152487,7676,0.000000,-,1437.0,0.556855,5,8516.0
9597,Windmolenberg,1200,0,19.563378,10,10.0,7643.0,10,2.032586,6442.0,...,1.015664,3666.0,105.082753,7534,,,,0.665375,10,8222.0


In [446]:
glob_metrics[glob_metrics.streetname=="Chaussée de Wavre"]

Unnamed: 0,streetname,postcode,parity,dist_to_prev,dist_to_prev_house_number,house_number_num,dist_to_prev_ranking,delta_dist_to_prev_house_number,delta_dist_to_prev,delta_dist_to_prev_ranking,...,sw_sinuosity,sw_sinuosity_ranking,length,length_ranking,delta_ratio,delta_ratio_house_number,delta_ratio_ranking,prev_to_prev2_ratio,prev_to_prev2_ratio_house_number,prev_to_prev2_ratio_ranking
2675,Chaussée de Wavre,1040,0,304.43002,950,950.0,406.0,950,41.348127,150.0,...,1.046659,2680.0,3007.026997,101,,,,1.0004,350,2178.0
2676,Chaussée de Wavre,1040,1,439.817253,455,455.0,216.0,455,61.085835,65.0,...,1.064638,2347.0,2390.418122,168,,,,1.015597,637,1670.0
2677,Chaussée de Wavre,1050,0,361.511463,270,270.0,302.0,270,46.498771,117.0,...,1.179988,1265.0,2127.652978,227,,,,1.444294,54,932.0
2678,Chaussée de Wavre,1050,1,100.283701,207,207.0,2290.0,207,12.483006,1224.0,...,1.061053,2417.0,2052.126419,247,,,,1.640642,67A,755.0
2679,Chaussée de Wavre,1160,0,552.557611,1854,1854.0,148.0,1854,66.267011,54.0,...,1.177878,1282.0,6311.361974,12,,,,3.107078,1134,211.0
2680,Chaussée de Wavre,1160,1,3810.133107,2245A,2245.0,1.0,2245A,438.897388,1.0,...,1.140525,1542.0,12463.543721,1,,,,6.166852,1235,43.0


## Show

In [447]:
osm_crs=  'epsg:4326'

In [448]:
# region

In [449]:
k = 0
metric_name = "dist_to_prev"
metric_name = "sinuosity"
metric_name = "delta_ratio"

metric = metrics[metric_name]
print(metric.iloc[k])
street_bloc = get_street_bloc(region, 
                              metric.iloc[k].streetname, 
                              metric.iloc[k].postcode,
                              metric.iloc[k].parity)
    
street_bloc

streetname                  Rue du Biplan
postcode                             1130
delta_ratio                  70612.403316
delta_ratio_house_number        16 -> 128
parity                                  0
delta_ratio_ranking                     1
Name: 0, dtype: object


Unnamed: 0,streetname,house_number,house_number_num,postcode,postname,municipality,municipality_id,address_id,geometry
283452,Rue du Biplan,16,16,1130,Bruxelles (Haren),Bruxelles,21004,[1006150],POINT (490806.506 6600604.282)
283463,Rue du Biplan,18,18,1130,Bruxelles (Haren),Bruxelles,21004,[1009961],POINT (490919.323 6600973.931)
283467,Rue du Biplan,20,20,1130,Bruxelles (Haren),Bruxelles,21004,[1016075],POINT (490921.537 6600964.133)
283470,Rue du Biplan,28,28,1130,Bruxelles (Haren),Bruxelles,21004,[1013415],POINT (490925.063 6600915.597)
283472,Rue du Biplan,30,30,1130,Bruxelles (Haren),Bruxelles,21004,[1601941],POINT (490925.465 6600907.266)
283473,Rue du Biplan,32,32,1130,Bruxelles (Haren),Bruxelles,21004,[1010100],POINT (490926.161 6600894.446)
283433,Rue du Biplan,102,102,1130,Bruxelles (Haren),Bruxelles,21004,[101244],POINT (490869.779 6600513.526)
283434,Rue du Biplan,126,126,1130,Bruxelles (Haren),Bruxelles,21004,[82071],POINT (490863.416 6600733.013)
283435,Rue du Biplan,128,128,1130,Bruxelles (Haren),Bruxelles,21004,[82552],POINT (490806.506 6600604.281)
283437,Rue du Biplan,130,130,1130,Bruxelles (Haren),Bruxelles,21004,"[83314, 1557728]",POINT (490801.875 6600594.519)


In [450]:
street_bloc = get_street_bloc(region, 
                  "Bredabaan", #"Rue François Michoel", 
                  "2990", #"4845",
                  1)
street_bloc

Unnamed: 0,streetname,house_number,house_number_num,postcode,postname,municipality,municipality_id,address_id,geometry


In [451]:
# plot_street_bloc(street_bloc, "title")

In [452]:
street_bloc

Unnamed: 0,streetname,house_number,house_number_num,postcode,postname,municipality,municipality_id,address_id,geometry


In [453]:
plot_street_bloc_plotly(street_bloc)
None

In [454]:
# street_bloc.explore()

## PDF

In [455]:
# make_table(street_bloc)

In [456]:
metric

Unnamed: 0,streetname,postcode,delta_ratio,delta_ratio_house_number,parity,delta_ratio_ranking
0,Rue du Biplan,1130,70612.403316,16 -> 128,0,1
1,Avenue Guillaume De Greef,1090,43747.154072,401 -> 499,1,2
2,Avenue Guillaume De Greef,1090,43747.154072,400 -> 498,0,3
3,Chaussée d'Alsemberg,1180,85.618556,181 -> 883,1,4
4,Parc Jean Monnet,1082,51.861204,6071 -> 7083,1,5
...,...,...,...,...,...,...
1432,Drève des Rhododendrons,1170,0.000000,-,0,1433
1433,Avenue du Sippelberg,1080,0.000000,-,0,1434
1434,Rue du Chien Vert,1080,0.000000,-,0,1435
1435,Clos A.J. Slegers,1200,0.000000,-,0,1436


In [457]:
thresholds = {
    "dist_to_prev": 1000,
    "length":5000,
    "delta_ratio": 1.0,
    "delta_dist_to_prev":50,
    "sinuosity": 10,
    "sw_sinuosity": 10,
    "prev_to_prev2_ratio": 10
}

In [458]:
metrics.keys()
# region

dict_keys(['dist_to_prev', 'delta_dist_to_prev', 'sinuosity', 'sw_sinuosity', 'length', 'delta_ratio', 'prev_to_prev2_ratio'])

In [459]:
for metric_name in metrics:
# for metric_name in ['sinuosity']:
    metric=metrics[metric_name]
    print(metric_name)
    pdf = PdfPages(f"{output_dir}/best_anomalies_{case_name}_{metric_name}.pdf")
    
    if metric_name in thresholds:
        metric = metric[metric[metric_name]>thresholds[metric_name]]
    for i, metr in tqdm(metric.iloc[0:topn].iterrows(), total=min(topn, metric.shape[0])):# in trange(1, min(51, metric.shape[0])):

        
        street_bloc = get_street_bloc(region, 
                                      metr.streetname, 
                                      metr.postcode,
                                      metr.parity)
        sb = street_bloc.iloc[0]
        title = f"{sb.streetname} - {sb.postcode}/{sb.municipality_id} - {sb.postname} - {sb.municipality} - parity: {metr['parity']} "
        title += f"\n{metric_name}: {metr[metric_name]:.2f}"
#         title = street_bloc.iloc[0]["streetname"]+ " - " + street_bloc.iloc[0]["postcode"] + \
#                 " - " + street_bloc.iloc[0]["postname"] + " - " + street_bloc.iloc[0]["municipality"]+" - parity: "+str(metr["parity"])+"\n"+\
#                 f"{metric_name}: {metr[metric_name]:.2f}"
        if f"{metric_name}_house_number" in metr:
            title += f" (hn: {metr[f'{metric_name}_house_number']})"

        make_table(street_bloc, title)
        pdf.savefig(bbox_inches='tight')
        plt.close()
        plot_street_bloc(street_bloc, title)
        pdf.savefig(bbox_inches='tight')
        plt.close()
    pdf.close()

dist_to_prev


  0%|          | 0/42 [00:00<?, ?it/s]

HTTP error:  Tile URL resulted in a 404 error. Double-check your tile url:
https://cartoweb.wmts.ngi.be/1.0.0/overlay/default/3857/13/2748/4195.png


  5%|▍         | 2/42 [00:04<01:15,  1.89s/it]

HTTP error:  Tile URL resulted in a 404 error. Double-check your tile url:
https://cartoweb.wmts.ngi.be/1.0.0/overlay/default/3857/13/2749/4193.png


100%|██████████| 42/42 [01:15<00:00,  1.81s/it]


delta_dist_to_prev


  0%|          | 0/50 [00:00<?, ?it/s]

HTTP error:  Tile URL resulted in a 404 error. Double-check your tile url:
https://cartoweb.wmts.ngi.be/1.0.0/overlay/default/3857/13/2748/4195.png


  2%|▏         | 1/50 [00:03<02:38,  3.23s/it]

HTTP error:  Tile URL resulted in a 404 error. Double-check your tile url:
https://cartoweb.wmts.ngi.be/1.0.0/overlay/default/3857/13/2749/4193.png


 54%|█████▍    | 27/50 [00:44<00:37,  1.63s/it]

HTTP error:  Tile URL resulted in a 404 error. Double-check your tile url:
https://cartoweb.wmts.ngi.be/1.0.0/overlay/default/3857/13/2749/4194.png


 70%|███████   | 35/50 [00:58<00:23,  1.58s/it]

HTTP error:  Tile URL resulted in a 404 error. Double-check your tile url:
https://cartoweb.wmts.ngi.be/1.0.0/overlay/default/3857/13/2749/4194.png


 86%|████████▌ | 43/50 [01:14<00:11,  1.66s/it]

HTTP error:  Tile URL resulted in a 404 error. Double-check your tile url:
https://cartoweb.wmts.ngi.be/1.0.0/overlay/default/3857/13/2748/4193.png


100%|██████████| 50/50 [01:29<00:00,  1.79s/it]


sinuosity


100%|██████████| 50/50 [01:28<00:00,  1.77s/it]


sw_sinuosity


100%|██████████| 3/3 [00:03<00:00,  1.14s/it]


length


  0%|          | 0/24 [00:00<?, ?it/s]

HTTP error:  Tile URL resulted in a 404 error. Double-check your tile url:
https://cartoweb.wmts.ngi.be/1.0.0/overlay/default/3857/13/2748/4195.png


  4%|▍         | 1/24 [00:03<01:27,  3.82s/it]

HTTP error:  Tile URL resulted in a 404 error. Double-check your tile url:
https://cartoweb.wmts.ngi.be/1.0.0/overlay/default/3857/13/2749/4194.png


  8%|▊         | 2/24 [00:07<01:19,  3.63s/it]

HTTP error:  Tile URL resulted in a 404 error. Double-check your tile url:
https://cartoweb.wmts.ngi.be/1.0.0/overlay/default/3857/13/2748/4193.png


 12%|█▎        | 3/24 [00:11<01:21,  3.86s/it]

HTTP error:  Tile URL resulted in a 404 error. Double-check your tile url:
https://cartoweb.wmts.ngi.be/1.0.0/overlay/default/3857/13/2749/4193.png


 17%|█▋        | 4/24 [00:15<01:16,  3.83s/it]

HTTP error:  Tile URL resulted in a 404 error. Double-check your tile url:
https://cartoweb.wmts.ngi.be/1.0.0/overlay/default/3857/13/2748/4193.png


 25%|██▌       | 6/24 [00:20<00:54,  3.05s/it]

HTTP error:  Tile URL resulted in a 404 error. Double-check your tile url:
https://cartoweb.wmts.ngi.be/1.0.0/overlay/default/3857/13/2749/4194.png


 29%|██▉       | 7/24 [00:23<00:50,  2.99s/it]

HTTP error:  Tile URL resulted in a 404 error. Double-check your tile url:
https://cartoweb.wmts.ngi.be/1.0.0/overlay/default/3857/13/2749/4193.png


100%|██████████| 24/24 [00:59<00:00,  2.49s/it]


delta_ratio


  6%|▌         | 3/50 [00:10<02:38,  3.37s/it]

HTTP error:  Tile URL resulted in a 404 error. Double-check your tile url:
https://cartoweb.wmts.ngi.be/1.0.0/overlay/default/3857/13/2749/4193.png


100%|██████████| 50/50 [01:26<00:00,  1.73s/it]


prev_to_prev2_ratio


 33%|███▎      | 4/12 [00:07<00:14,  1.78s/it]

HTTP error:  Tile URL resulted in a 404 error. Double-check your tile url:
https://cartoweb.wmts.ngi.be/1.0.0/overlay/default/3857/13/2749/4194.png


100%|██████████| 12/12 [00:23<00:00,  1.95s/it]


### Consolidated pdf

In [460]:
# glob_metrics[(glob_metrics[[f"{m}_ranking" for m in metrics]]  < topn).fillna(False).any(axis=1)]


In [461]:
glob_metrics_topn = glob_metrics[(glob_metrics[[f"{m}_ranking" for m in metrics]]  < topn).fillna(False).any(axis=1)]

glob_metrics_topn = glob_metrics_topn[pd.concat([glob_metrics_topn[m] > thresholds[m] for m in metrics], axis=1).any(axis=1)]


glob_metrics_topn = glob_metrics_topn.sort_values("dist_to_prev", ascending=False)
glob_metrics_topn

Unnamed: 0,streetname,postcode,parity,dist_to_prev,dist_to_prev_house_number,house_number_num,dist_to_prev_ranking,delta_dist_to_prev_house_number,delta_dist_to_prev,delta_dist_to_prev_ranking,...,sw_sinuosity,sw_sinuosity_ranking,length,length_ranking,delta_ratio,delta_ratio_house_number,delta_ratio_ranking,prev_to_prev2_ratio,prev_to_prev2_ratio_house_number,prev_to_prev2_ratio_ranking
2680,Chaussée de Wavre,1160,1,3810.133107,2245A,2245.0,1.0,2245A,438.897388,1.0,...,1.140525,1542.0,12463.543721,1,,,,6.166852,1235,43.0
2661,Chaussée de Vilvorde,1120,1,3597.745954,233,233.0,2.0,233,16.844137,715.0,...,1.028317,3152.0,6058.600614,14,,,,0.956507,233,6930.0
2568,Chaussée d'Alsemberg,1180,1,2733.893721,373,373.0,3.0,373,310.923444,2.0,...,1.082466,2074.0,10309.421684,4,85.618556,181 -> 883,4.0,2.094894,473,471.0
2644,Chaussée de Ninove,1080,1,2688.683025,975,975.0,4.0,975,287.156663,4.0,...,1.144392,1520.0,6305.956422,13,,,,4.359290,137,108.0
2643,Chaussée de Ninove,1080,0,2641.308532,996,996.0,5.0,996,264.216071,5.0,...,1.100168,1883.0,5775.192821,18,,,,2.088261,1030,476.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2540,Carré Pauwels,1180,1,41.208382,1A,1.0,5643.0,1A,7.418196,2684.0,...,1.361798,662.0,165.219331,6269,2.247335,1 -> 13,48.0,2.179677,3,437.0
7661,Rue de la Fleur d'Oranger,1150,0,35.485300,10,10.0,6121.0,,,,...,1.325938,742.0,340.002503,3741,3.816814,18 -> 36,27.0,1.000489,4,2006.0
2884,Clos de la Carène,1200,1,32.991502,11,11.0,6323.0,11,1.380393,7569.0,...,3.369466,40.0,163.24396,6307,0.000000,-,932.0,1.408350,11,973.0
2883,Clos de la Carène,1200,0,29.058059,12,12.0,6658.0,12,1.248461,7820.0,...,2.890712,70.0,162.039378,6329,0.000000,-,1363.0,0.999744,12,6654.0


In [462]:
glob_metrics_topn["global_ranking"] = glob_metrics_topn[glob_metrics_topn.columns[glob_metrics_topn.columns.str.endswith("_ranking")]].fillna(100).apply(np.log).sum(axis=1)

In [463]:
glob_metrics_topn = glob_metrics_topn.sort_values("global_ranking")
glob_metrics_topn

Unnamed: 0,streetname,postcode,parity,dist_to_prev,dist_to_prev_house_number,house_number_num,dist_to_prev_ranking,delta_dist_to_prev_house_number,delta_dist_to_prev,delta_dist_to_prev_ranking,...,sw_sinuosity_ranking,length,length_ranking,delta_ratio,delta_ratio_house_number,delta_ratio_ranking,prev_to_prev2_ratio,prev_to_prev2_ratio_house_number,prev_to_prev2_ratio_ranking,global_ranking
6288,Rue Saint-Denis,1190,1,2175.233505,339,339.0,8.0,339,304.314987,3.0,...,9.0,6809.701988,9,0.437076,105 -> 117,345.0,217.415350,341,1.0,19.446733
21,Allée Verte,1000,0,1559.894261,12,12.0,20.0,12,86.545267,33.0,...,26.0,8824.051599,6,1.176004,12 -> 116,93.0,51.557030,10,2.0,22.152338
2680,Chaussée de Wavre,1160,1,3810.133107,2245A,2245.0,1.0,2245A,438.897388,1.0,...,1542.0,12463.543721,1,,,,6.166852,1235,43.0,23.361175
2568,Chaussée d'Alsemberg,1180,1,2733.893721,373,373.0,3.0,373,310.923444,2.0,...,2074.0,10309.421684,4,85.618556,181 -> 883,4.0,2.094894,473,471.0,24.431787
2671,Chaussée de Waterloo,1180,0,858.424441,778,778.0,62.0,778,94.816999,28.0,...,767.0,8751.43638,7,,,,17.092340,1264,5.0,29.380360
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3741,Place de la Duchesse de Brabant,1080,0,85.309182,26,26.0,2842.0,26,5.126182,3886.0,...,2320.0,615.159339,1916,0.675572,2 -> 40,199.0,1.000134,14,4042.0,49.014336
3626,Place Rouppe,1000,0,68.497502,26,26.0,3716.0,26,2.457382,5887.0,...,1729.0,504.855884,2449,0.820037,2 -> 32,151.0,1.000020,10,5514.0,49.193161
2883,Clos de la Carène,1200,0,29.058059,12,12.0,6658.0,12,1.248461,7820.0,...,70.0,162.039378,6329,0.000000,-,1363.0,0.999744,12,6654.0,49.428881
3478,Place Communale,1080,1,66.932083,17,17.0,3821.0,17,3.483835,4942.0,...,1461.0,412.568113,3089,0.802292,1 -> 31,158.0,0.999915,25,6350.0,49.766278


In [464]:
pdf = PdfPages(f"{output_dir}/best_anomalies_{case_name}_consolidated.pdf")

for i, metr in tqdm(glob_metrics_topn.iterrows(), total=glob_metrics_topn.shape[0]):# in trange(1, min(51, metric.shape[0])):
    street_bloc = get_street_bloc(region, 
                                  metr.streetname, 
                                  metr.postcode,
                                  metr.parity)
    
    sb = street_bloc.iloc[0]
    title = f"{sb.streetname} - {sb.postcode}/{sb.municipality_id} - {sb.postname} - {sb.municipality} - parity: {metr['parity']} "

    make_table(street_bloc, title)
    pdf.savefig(bbox_inches='tight')
    plt.close()

    fig, ax = plt.subplots(nrows=2, figsize=(10,10), gridspec_kw={'height_ratios':[0.85, 0.15]})
    plot_street_bloc(street_bloc, title, ax=ax[0])
    make_metric_table(metr, metrics, ax=ax[1])

    pdf.savefig(bbox_inches='tight')
    plt.close()
pdf.close()

  1%|          | 2/166 [00:03<05:22,  1.97s/it]

HTTP error:  Tile URL resulted in a 404 error. Double-check your tile url:
https://cartoweb.wmts.ngi.be/1.0.0/overlay/default/3857/13/2748/4195.png


  2%|▏         | 3/166 [00:08<07:56,  2.92s/it]

HTTP error:  Tile URL resulted in a 404 error. Double-check your tile url:
https://cartoweb.wmts.ngi.be/1.0.0/overlay/default/3857/13/2749/4193.png


  2%|▏         | 4/166 [00:12<09:03,  3.36s/it]

HTTP error:  Tile URL resulted in a 404 error. Double-check your tile url:
https://cartoweb.wmts.ngi.be/1.0.0/overlay/default/3857/13/2749/4194.png


  4%|▍         | 7/166 [00:20<07:40,  2.89s/it]

HTTP error:  Tile URL resulted in a 404 error. Double-check your tile url:
https://cartoweb.wmts.ngi.be/1.0.0/overlay/default/3857/13/2749/4194.png


  9%|▉         | 15/166 [00:39<05:33,  2.21s/it]

HTTP error:  Tile URL resulted in a 404 error. Double-check your tile url:
https://cartoweb.wmts.ngi.be/1.0.0/overlay/default/3857/13/2748/4193.png


 19%|█▊        | 31/166 [01:10<04:27,  1.98s/it]

HTTP error:  Tile URL resulted in a 404 error. Double-check your tile url:
https://cartoweb.wmts.ngi.be/1.0.0/overlay/default/3857/13/2748/4193.png


 61%|██████▏   | 102/166 [03:10<01:29,  1.40s/it]

HTTP error:  Tile URL resulted in a 404 error. Double-check your tile url:
https://cartoweb.wmts.ngi.be/1.0.0/overlay/default/3857/13/2749/4193.png


100%|██████████| 166/166 [04:37<00:00,  1.67s/it]
