In [1]:
import os

import urllib

import pandas as pd
import numpy as np

import json

from tqdm.autonotebook import tqdm

#%matplotlib inline

tqdm.pandas()

import dask.dataframe as dd

# from dask.multiprocessing import get
from dask.diagnostics import ProgressBar

from datetime import datetime
import matplotlib.pyplot as plt

from IPython.display import display
import requests

try: 
    import plotly
    pd.options.plotting.backend = "plotly"

except ModuleNotFoundError: 
    print("Plotly, limiting plotting quality")

  from tqdm.autonotebook import tqdm


In [2]:
import sys
pd.__version__
sys.version

'3.12.9 | packaged by conda-forge | (main, Mar  4 2025, 22:48:41) [GCC 13.3.0]'

# Functions

In [3]:
ws_hostname = "172.27.0.64:4001" # bePelias hostname:port
# ws_hostname = "172.27.0.64:8008" # bePelias hostname:port

street_field  =  "streetName"
housenbr_field = "houseNumber"
postcode_field = "postCode"
city_field  =    "postName"

filename = "data/data.csv" # A csv file with as header "streetName,houseNumber,postCode,postName"
# filename = "../../GISAnalytics/data/geocoding/kbo_10000.csv.gz"

from_file = False  # If try, will not call bePelias, but read results from file

In [4]:
folder = filename.split(".", 1)[0]
folder

'data/data'

In [5]:
os.makedirs(folder, exist_ok=True)

In [6]:
def call_ws(addr_data, mode="advanced"):
    t = datetime.now()
    
    if isinstance(addr_data, pd.Series):
        addr_data = addr_data.to_dict()
        
    
    addr_data["mode"]=mode       
    addr_data["withPeliasResult"]=False
    try: 
        r = requests.get(
        f'http://{ws_hostname}/REST/bepelias/v1/geocode',
            params=addr_data,
            # fields = addr_data
            )
        
        # print(r.url)

    except Exception as e:
        print("Exception !")
        print(addr_data)
        print(e)
        raise e
        
    if r.status_code == 204:
        print("No result!")
        print(addr_data)
        print(r.text)
        return
    elif r.status_code == 400:
        print("Argument error")
        print(r.text)
    elif r.status_code == 200:
        try:
            res = json.loads(r.text)
            res["time"] = (datetime.now() - t).total_seconds()
        except ValueError as ve:

            print("Cannot decode result:")
            print(ve)
            print(r.text)
            return r.text
        return res
    else: 
        print(f"Unknown return code: {r.status_code} ")
        print(r.text)



In [7]:
def call_unstruct_ws(address, mode="advanced"):
    t = datetime.now()
    
    addr_data = {"address": address}
    
        
    
    addr_data["mode"]=mode       
    addr_data["withPeliasResult"]=False
    try: 
        r = requests.get(
        f'http://{ws_hostname}/REST/bepelias/v1/geocode/unstructured',
            params=addr_data)
        

    except Exception as e:
        print("Exception !")
        print(addr_data)
        print(e)
        raise e
        
    if r.status_code == 204:
        print("No result!")
        print(addr_data)
        print(r.text)
        return
    elif r.status_code == 400:
        print("Argument error")
        print(r.text)
    elif r.status_code == 200:
        try:
            res = json.loads(r.text)
            res["time"] = (datetime.now() - t).total_seconds()
        except ValueError as ve:

            print("Cannot decode result:")
            print(ve)
            print(r.text)
            return r.text
        return res
    else: 
        print(f"Unknown return code: {r.status_code} ")
        print(r.text)



In [8]:
def call_ws_by_id(best_id): 
    t = datetime.now()
    
    if best_id is None:
        return None
       
    try: 
        url = f'http://{ws_hostname}/REST/bepelias/v1/id/{urllib.parse.quote_plus(urllib.parse.quote_plus(best_id))}'
        r = requests.get(url)
        

    except Exception as e:
        print("Exception !")
        print(best_id)
        print(e)
        raise e
        
    if r.status_code == 204:
        # print("No result!")
        # print(addr_data)
        # print(r.text)
        return
    elif r.status_code == 400:
        print("Argument error")
        print(r.text)
    elif r.status_code == 200:
        try:
            res = json.loads(r.text)
        except ValueError as ve:

            print("Cannot decode result:")
            print(ve)
            print(r.text)
            return r.text
        except AttributeError as ae:
            print(ae)
            print(type(r.text))
            print(r.text)
        return res
    else: 
        print(f"Unknown return code: {r.status_code} ")
        print(r.text)



In [9]:
def call_ws_search_city(postcode=None, postname=None):
    t = datetime.now()
    
    data = {"postCode": postcode,
           "cityName": postname,
           "raw":True
           }
    
    try: 
        r = requests.get(
        f'http://{ws_hostname}/REST/bepelias/v1/searchCity',
            params=data)

    except Exception as e:
        print("Exception !")
        print(e)
        raise e
        
    if r.status_code == 204:
        # print("No result!")
        # print(addr_data)
        # print(r.text)
        return
    elif r.status_code == 400:
        print("Argument error")
        print(r.text)
    elif r.status_code == 200:
        try:
            res = json.loads(r.text)
            # res["time"] = (datetime.now() - t).total_seconds()
        except ValueError as ve:

            print("Cannot decode result:")
            print(ve)
            print(r.text)
            return r.text
        except AttributeError as ae:
            print(ae)
            print(type(r.text))
            print(r.text)
        return res
    else: 
        print(f"Unknown return code: {r.status_code} ")
        print(r.text)



In [10]:
def call_ws_reverse(lat=None, lon=None): #lg = "en,fr,nl"
    t = datetime.now()
    
    data = {"lat": lat,
           "lon": lon,
           "radius":0.01,
           "size":1
           }
    
    try: 
        r = requests.get(
        f'http://{ws_hostname}/REST/bepelias/v1/reverse',
            params=data)

    except Exception as e:
        print("Exception !")
        print(e)
        raise e
        
    if r.status_code == 204:
        # print("No result!")
        # print(addr_data)
        # print(r.text)
        return
    elif r.status_code == 400:
        print("Argument error")
        print(r.text)
    elif r.status_code == 422:
        print(f"Unprocessable entity: {r.text}")
        print(r.text)
    elif r.status_code == 200:
        try:
            res = json.loads(r.text)
        except ValueError as ve:

            print("Cannot decode result:")
            print(ve)
            print(r.text)
            return r.text
        except AttributeError as ae:
            print(ae)
            print(type(r.text))
            print(r.text)
        return res
    else: 
        print(f"Unknown return code: {r.status_code} ")
        print(r.text)



In [11]:
def get(dct, keys):
    for k in keys:
        try: 
            if  dct is None:
                return None
                # print(init_dct)

            dct = dct[k]
        except KeyError :
            return None
        except IndexError:
            return None
    return dct

In [12]:
def get_best_id(json_col):
    return json_col.apply(lambda r: get(r, ["items", 0, "bestId"]) or\
                                    get(r, ["items", 0, "street", "id"]) or\
                                    get(r, ["items", 0, "municipality", "id"]))

def get_best_addr(json_col):
    return json_col.apply(lambda r: f'{get(r, ["items", 0, "street", "name"])}, {get(r, ["items", 0, "housenumber"])}, {get(r, ["items", 0, "postalInfo", "postalCode"])}')

def get_precision(json_col):
    return json_col.apply(lambda r: get(r, ["items", 0, "precision"])).fillna("[no result]")

def get_pelias_call_count(json_col):
    return json_col.apply(lambda r: get(r, ["peliasCallCount"]))

# Calls

## Single address calls

In [13]:
res=call_ws({
        street_field:   "Avenue Fonsny", \
         housenbr_field: "20",\
         city_field:     "Saint-Gilles",\
         postcode_field: "1060"})
res

{'items': [{'bestId': 'https://databrussels.be/id/address/219307/1',
   'street': {'name': {'fr': 'Avenue Fonsny', 'nl': 'Fonsnylaan'},
    'id': 'https://databrussels.be/id/streetname/4921/1'},
   'municipality': {'name': {'fr': 'Saint-Gilles', 'nl': 'Sint-Gillis'},
    'code': '21013',
    'id': 'https://databrussels.be/id/municipality/21013/1'},
   'postalInfo': {'postalCode': '1060'},
   'housenumber': '20',
   'status': 'current',
   'coordinates': {'lat': 50.83583, 'lon': 4.33845},
   'precision': 'address'}],
 'total': 1,
 'callType': 'struct',
 'inAddr': {'address': 'Avenue Fonsny, 20',
  'locality': '',
  'postalcode': '1060'},
 'peliasCallCount': 3,
 'transformers': 'clean;no_city',
 'self': 'http://172.27.0.64:4001/REST/bepelias/v1/geocode?streetName=Avenue+Fonsny&houseNumber=20&postName=Saint-Gilles&postCode=1060&mode=advanced&withPeliasResult=False',
 'time': 1.004385}

## Batch calls (row by row)

In [14]:
addresses = pd.read_csv(filename)
addresses

Unnamed: 0,streetName,houseNumber,postCode,postName
0,Dendermondsesteenweg,200,2830,Willebroek
1,Boulevard du Régent,47,1000,Bruxelles
2,Hauwaart,44,9700,Oudenaarde
3,Bruynebosstraat,40,3511,Hasselt
4,Steenwinkelstraat,583,2627,Schelle
...,...,...,...,...
995,Route de Marche,43,6940,Durbuy
996,Godfried van Leuvenlaan,8,1600,Sint-Pieters-Leeuw
997,Valleilaan,83,3290,Diest
998,Venusstraat,2,2000,Antwerpen


In [15]:
# addresses = addresses.sample(10000)
# import re
# re.match( r'^(https://)?[a-z.]+/id/[a-zA-Z]+/\d{4,8}/(\d{1,3}|[0-9\-T\:\+]+)$', "https://data.vlaanderen.be/id/gemeente/41081/2002-08-13T16:33:18+02:00")

In [16]:
iter_per_sec_stats={}

In [17]:
# call_ws(addresses.iloc[999])


### Simple way

In [18]:
if from_file:
    addresses_seq = pd.read_pickle(f"{folder}/sequential.pkl")   
else:
    addresses_seq = addresses.copy()

    t = datetime.now()
    addresses_seq["json"] = addresses_seq[[street_field, housenbr_field, postcode_field, city_field]].fillna("").progress_apply(call_ws,
                                                                    mode="advanced", axis=1)
    tot_time = (datetime.now() - t).total_seconds()

    ips=addresses_seq.shape[0]/tot_time
    iter_per_sec_stats["sequential"] = ips

    print(f"{tot_time:.2f} seconds, {ips:.2f} it/s, {ips*3600:.0f} it/h")   
    addresses_seq.to_pickle(f"{folder}/sequential.pkl")

addresses_seq

100%|██████████| 1000/1000 [02:03<00:00,  8.07it/s]

123.96 seconds, 8.07 it/s, 29041 it/h





Unnamed: 0,streetName,houseNumber,postCode,postName,json
0,Dendermondsesteenweg,200,2830,Willebroek,{'items': [{'bestId': 'https://data.vlaanderen...
1,Boulevard du Régent,47,1000,Bruxelles,{'items': [{'bestId': 'https://databrussels.be...
2,Hauwaart,44,9700,Oudenaarde,{'items': [{'bestId': 'https://data.vlaanderen...
3,Bruynebosstraat,40,3511,Hasselt,{'items': [{'bestId': 'https://data.vlaanderen...
4,Steenwinkelstraat,583,2627,Schelle,{'items': [{'bestId': 'https://data.vlaanderen...
...,...,...,...,...,...
995,Route de Marche,43,6940,Durbuy,{'items': [{'bestId': 'geodata.wallonie.be/id/...
996,Godfried van Leuvenlaan,8,1600,Sint-Pieters-Leeuw,{'items': [{'bestId': 'https://data.vlaanderen...
997,Valleilaan,83,3290,Diest,{'items': [{'bestId': 'https://data.vlaanderen...
998,Venusstraat,2,2000,Antwerpen,{'items': [{'bestId': 'https://data.vlaanderen...


In [19]:
addresses_seq["best_id"] = get_best_id(addresses_seq.json)

addresses_seq["best_addr"] = get_best_addr(addresses_seq.json)

addresses_seq["precision"] = get_precision(addresses_seq.json)

addresses_seq["pelias_call_count"] = get_pelias_call_count(addresses_seq.json)
addresses_seq

Unnamed: 0,streetName,houseNumber,postCode,postName,json,best_id,best_addr,precision,pelias_call_count
0,Dendermondsesteenweg,200,2830,Willebroek,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/391785/202...,"{'nl': 'Dendermondsesteenweg'}, 200, 2830",address,1
1,Boulevard du Régent,47,1000,Bruxelles,{'items': [{'bestId': 'https://databrussels.be...,https://databrussels.be/id/address/164117/1,"{'fr': 'Boulevard du Régent', 'nl': 'Regentlaa...",address,2
2,Hauwaart,44,9700,Oudenaarde,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/1216986/20...,"{'nl': 'Hauwaart'}, 44, 9700",address,1
3,Bruynebosstraat,40,3511,Hasselt,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/30693367/2...,"{'nl': 'Bruynebosstraat'}, 40, 3511",address,1
4,Steenwinkelstraat,583,2627,Schelle,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/1167407/20...,"{'nl': 'Steenwinkelstraat'}, 583, 2627",address,3
...,...,...,...,...,...,...,...,...,...
995,Route de Marche,43,6940,Durbuy,{'items': [{'bestId': 'geodata.wallonie.be/id/...,geodata.wallonie.be/id/Address/177216/3,"{'fr': 'Route de Marche'}, 43A, 6940",address,3
996,Godfried van Leuvenlaan,8,1600,Sint-Pieters-Leeuw,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/20196254/2...,"{'nl': 'Godfried van Leuvenlaan'}, 8, 1600",address,1
997,Valleilaan,83,3290,Diest,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/1994910/20...,"{'nl': 'Valleilaan'}, 83, 3290",address,1
998,Venusstraat,2,2000,Antwerpen,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/1834258/20...,"{'nl': 'Venusstraat'}, 2, 2000",address,1


In [20]:
# Results with no address id
addresses_seq[addresses_seq.json.apply(lambda r: get(r, ["items", 0, "bestId"])).isnull()]

Unnamed: 0,streetName,houseNumber,postCode,postName,json,best_id,best_addr,precision,pelias_call_count
12,Rue du Panorama - ATHUS,28,6791,Aubange,{'items': [{'street': {'name': {'fr': 'Rue du ...,geodata.wallonie.be/id/Streetname/7742084/2,"{'fr': 'Rue du Panorama'}, None, 6791",street,10
45,Rue Marconi,143,1190,Forest,{'items': [{'street': {'name': {'fr': 'Rue Mar...,https://databrussels.be/id/streetname/113/1,"{'fr': 'Rue Marconi', 'nl': 'Marconistraat'}, ...",street,4
47,Kattenstraat,33,8800,Roeselare,{'items': [{'street': {'name': {'nl': 'Kattens...,https://data.vlaanderen.be/id/straatnaam/58052...,"{'nl': 'Kattenstraat'}, None, 8800",street_interpol,1
48,Kaardeloodstraat,6,9400,Ninove,{'items': [{'street': {'name': {'nl': 'Kaardel...,https://data.vlaanderen.be/id/straatnaam/63526...,"{'nl': 'Kaardeloodstraat'}, None, 9400",street_interpol,1
52,Rue du Tige,250B,4590,Ouffet,{'items': [{'street': {'name': {'fr': 'Rue du ...,geodata.wallonie.be/id/Streetname/7726095/1,"{'fr': 'Rue du Tige'}, None, 4590",street,14
...,...,...,...,...,...,...,...,...,...
948,de Stuersstraat,20,8900,Ieper,{'items': [{'street': {'name': {'nl': 'de Stue...,https://data.vlaanderen.be/id/straatnaam/49443...,"{'nl': 'de Stuersstraat'}, None, 8900",street_interpol,1
961,Chaussée de Bruxelles,467B,1410,Waterloo,{'items': [{'street': {'name': {'fr': 'Chaussé...,geodata.wallonie.be/id/Streetname/7704303/6,"{'fr': 'Chaussée de Bruxelles'}, None, 1410",street_interpol,1
971,Avenue Air Marshal Coningham,2,1050,Ixelles,{'items': [{'street': {'name': {'fr': 'Avenue ...,https://databrussels.be/id/streetname/3827/1,"{'fr': 'Avenue Air Marshal Coningham', 'nl': '...",street_00,10
981,Avenue Alexandre Duchesne,/,4802,Verviers,{'items': [{'street': {'name': {'fr': 'Avenue ...,geodata.wallonie.be/id/Streetname/7738572/2,"{'fr': 'Avenue Alexandre Duchesne'}, None, 4802",street,10


In [21]:
# timing = addresses_seq[["dataset"]].copy()
addresses_seq["time"] =addresses_seq.json.apply(lambda j: j["time"] if j else None) 
addresses_seq["method"] =addresses_seq.json.apply(lambda j: (j["transformers"]+"-"+j["callType"]) if j else None) 

addresses_seq

Unnamed: 0,streetName,houseNumber,postCode,postName,json,best_id,best_addr,precision,pelias_call_count,time,method
0,Dendermondsesteenweg,200,2830,Willebroek,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/391785/202...,"{'nl': 'Dendermondsesteenweg'}, 200, 2830",address,1,0.266116,-struct
1,Boulevard du Régent,47,1000,Bruxelles,{'items': [{'bestId': 'https://databrussels.be...,https://databrussels.be/id/address/164117/1,"{'fr': 'Boulevard du Régent', 'nl': 'Regentlaa...",address,2,0.262626,-unstruct
2,Hauwaart,44,9700,Oudenaarde,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/1216986/20...,"{'nl': 'Hauwaart'}, 44, 9700",address,1,0.131894,-struct
3,Bruynebosstraat,40,3511,Hasselt,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/30693367/2...,"{'nl': 'Bruynebosstraat'}, 40, 3511",address,1,0.136583,-struct
4,Steenwinkelstraat,583,2627,Schelle,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/1167407/20...,"{'nl': 'Steenwinkelstraat'}, 583, 2627",address,3,0.205558,clean;no_city-struct
...,...,...,...,...,...,...,...,...,...,...,...
995,Route de Marche,43,6940,Durbuy,{'items': [{'bestId': 'geodata.wallonie.be/id/...,geodata.wallonie.be/id/Address/177216/3,"{'fr': 'Route de Marche'}, 43A, 6940",address,3,0.138032,clean;no_city-struct
996,Godfried van Leuvenlaan,8,1600,Sint-Pieters-Leeuw,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/20196254/2...,"{'nl': 'Godfried van Leuvenlaan'}, 8, 1600",address,1,0.063240,-struct
997,Valleilaan,83,3290,Diest,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/1994910/20...,"{'nl': 'Valleilaan'}, 83, 3290",address,1,0.053354,-struct
998,Venusstraat,2,2000,Antwerpen,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/1834258/20...,"{'nl': 'Venusstraat'}, 2, 2000",address,1,0.053359,-struct


In [22]:
# addresses_seq.to_pickle("../data/addresses_struct_shortcut.pkl")

In [23]:
# addresses_seq_shortcut = pd.read_pickle("../data/addresses_struct_shortcut.pkl")
# addresses_seq = pd.read_pickle("../data/addresses_struct.pkl")

In [24]:
# addresses_seq

In [25]:
# mg = addresses_seq[["streetName", "houseNumber", "postCode", "postName", "best_id", "pelias_cnt_call"]].merge(addresses_seq_shortcut[["streetName", "houseNumber", "postCode", "postName", "best_id", "pelias_cnt_call"]], how="outer", indicator=True)
# mg[mg["_merge"] != "both"]

In [26]:
(addresses_seq.groupby("method").time.mean()).plot.bar(title="Avg call duration")

In [27]:
addresses_seq.plot.scatter(x="pelias_call_count", y="time", title="Call time vs Pelias call count")

In [28]:
addresses_seq[["time"]].boxplot()

### Using Dask

In [29]:
addresses_dask = addresses.copy()

In [30]:
t = datetime.now()

if from_file:
    addresses_dask = pd.read_pickle(f"{folder}/dask.pkl")   
else:
        
    dd_addresses = dd.from_pandas(addresses_dask.fillna(""), npartitions=32)

    dask_task = dd_addresses[[street_field, housenbr_field, postcode_field, city_field]].apply(call_ws, meta=('x', 'str'), axis=1)

    with ProgressBar(): 
        addresses_dask["json"] = dask_task.compute()
        

    tot_time = (datetime.now() - t).total_seconds()

    ips=addresses_dask.shape[0]/tot_time
    iter_per_sec_stats["dask"] = ips
    print(f"{tot_time:.2f} seconds, {ips:.2f} it/s, {ips*3600:.0f} it/h")
    addresses_dask.to_pickle(f"{folder}/dask.pkl")

[########################################] | 100% Completed | 27.32 s
27.94 seconds, 35.80 it/s, 128869 it/h


In [31]:
addresses_dask["best_id"] = get_best_id(addresses_dask.json)
addresses_dask["best_addr"] = get_best_addr(addresses_dask.json)
addresses_dask

Unnamed: 0,streetName,houseNumber,postCode,postName,json,best_id,best_addr
0,Dendermondsesteenweg,200,2830,Willebroek,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/391785/202...,"{'nl': 'Dendermondsesteenweg'}, 200, 2830"
1,Boulevard du Régent,47,1000,Bruxelles,{'items': [{'bestId': 'https://databrussels.be...,https://databrussels.be/id/address/164117/1,"{'fr': 'Boulevard du Régent', 'nl': 'Regentlaa..."
2,Hauwaart,44,9700,Oudenaarde,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/1216986/20...,"{'nl': 'Hauwaart'}, 44, 9700"
3,Bruynebosstraat,40,3511,Hasselt,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/30693367/2...,"{'nl': 'Bruynebosstraat'}, 40, 3511"
4,Steenwinkelstraat,583,2627,Schelle,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/1167407/20...,"{'nl': 'Steenwinkelstraat'}, 583, 2627"
...,...,...,...,...,...,...,...
995,Route de Marche,43,6940,Durbuy,{'items': [{'bestId': 'geodata.wallonie.be/id/...,geodata.wallonie.be/id/Address/177216/3,"{'fr': 'Route de Marche'}, 43A, 6940"
996,Godfried van Leuvenlaan,8,1600,Sint-Pieters-Leeuw,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/20196254/2...,"{'nl': 'Godfried van Leuvenlaan'}, 8, 1600"
997,Valleilaan,83,3290,Diest,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/1994910/20...,"{'nl': 'Valleilaan'}, 83, 3290"
998,Venusstraat,2,2000,Antwerpen,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/1834258/20...,"{'nl': 'Venusstraat'}, 2, 2000"


In [32]:
mg = addresses_dask.drop(columns="json").merge(addresses_seq.drop(columns="json"), how="outer", indicator=True)
if mg.shape[0] == addresses.shape[0]:
    print("Same result in seq and dask run!")
else: 
    print("!!! Not the same result in seq and dask run!")
    

Same result in seq and dask run!


# Test modes

In [33]:
addresses_mode = addresses.copy()

In [34]:
if from_file:
    addresses_mode = pd.read_pickle(f"{folder}/modes.pkl")   
else:
    for mode in ["basic", "simple", "advanced"]:
        print(mode)
        t = datetime.now()
        dd_addresses = dd.from_pandas(addresses_mode.fillna(""), npartitions=32)

        dask_task = dd_addresses[[street_field, housenbr_field, postcode_field, city_field]].apply(call_ws, meta=('x', 'str'), mode=mode, axis=1)

        with ProgressBar(): 
            addresses_mode[f"json_{mode}"] = dask_task.compute()


        tot_time = (datetime.now() - t).total_seconds()

        ips=addresses_dask.shape[0]/tot_time
        iter_per_sec_stats["dask"] = ips

        addresses_mode[f"precision_{mode}"] = get_precision(addresses_mode[f"json_{mode}"])

        addresses_mode[f"time_{mode}"] =addresses_mode[f"json_{mode}"].apply(lambda j: j["time"] if j else None) 


        print(f"{tot_time:.2f} seconds, {ips:.2f} it/s, {ips*3600:.0f} it/h")
        
        addresses_mode.to_pickle(f"{folder}/modes.pkl")

basic
[########################################] | 100% Completed | 6.78 ss
6.87 seconds, 145.52 it/s, 523889 it/h
simple
[########################################] | 100% Completed | 13.06 s
13.26 seconds, 75.43 it/s, 271550 it/h
advanced
[########################################] | 100% Completed | 26.27 s
26.64 seconds, 37.54 it/s, 135145 it/h


## Match rate

In [35]:
precision = addresses_mode[["precision_basic", "precision_simple", "precision_advanced"]].unstack().rename("precision").reset_index()#.groupby(["level_0"])[0].value_counts().unstack().plot.barh(stacked=True)
precision["precision_short"] = precision["precision"].map({
            "address":              "building",
            "address_00":           "country",
            "address_streetcenter": "street",
            "address_interpol":     "building",
            "street_interpol":      "building",
            "street_00":            "country",
            "street":               "street",
            "city":                 "city",
            "country":              "country",
        }).fillna("[no res]")
precision = precision.rename(columns={"level_0":"mode"})
precision["mode"] = precision["mode"].str.split("_").str[1]
precision

Unnamed: 0,mode,level_1,precision,precision_short
0,basic,0,address,building
1,basic,1,city,city
2,basic,2,address,building
3,basic,3,address,building
4,basic,4,city,city
...,...,...,...,...
2995,advanced,995,address,building
2996,advanced,996,address,building
2997,advanced,997,address,building
2998,advanced,998,address,building


In [36]:
vc = precision.groupby(["mode"]).precision_short.value_counts().unstack()
vc = vc.reindex(["building", "street", "city", "[no res]"], axis=1)
if pd.options.plotting.backend == "plotly":
    vc.plot.barh(color_discrete_sequence=["green", "orange", "red", "grey"]).show()
else:
    vc.plot.barh(stacked=True, color=["tab:green", "tab:orange", "tab:red", "tab:grey"])

In [37]:
vc/addresses.shape[0]*100

precision_short,building,street,city,[no res]
mode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
advanced,94.6,4.2,0.9,
basic,57.4,2.9,32.9,6.8
simple,64.3,4.1,30.9,0.7


## Timing

In [38]:
addresses_mode[["time_basic", "time_simple", "time_advanced"]].boxplot()

In [39]:
timing = addresses_mode[["time_basic", "time_simple", "time_advanced"]].unstack().rename("time").reset_index()
timing = timing.rename(columns={"level_0":"mode"})
timing["mode"] = timing["mode"].str.split("_").str[1]
timing

Unnamed: 0,mode,level_1,time
0,basic,0,0.072762
1,basic,1,0.043376
2,basic,2,0.066560
3,basic,3,0.051210
4,basic,4,0.025272
...,...,...,...
2995,advanced,995,0.217895
2996,advanced,996,0.039531
2997,advanced,997,0.052768
2998,advanced,998,0.050113


In [40]:
timing.groupby("mode").time.describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
mode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
advanced,1000.0,0.197425,0.2349,0.026808,0.065001,0.122457,0.244795,2.284126
basic,1000.0,0.051559,0.025183,0.016919,0.03733,0.044846,0.055594,0.177932
simple,1000.0,0.100788,0.071891,0.027813,0.053398,0.077731,0.116316,0.756526


In [41]:
if pd.options.plotting.backend == "plotly":
    timing.drop(columns=["level_1"]).hist(facet_col="mode", sharex=True, sharey=True).show()
else:
    timing.pivot(index="level_1", columns = "mode", values="time").hist(sharex=True, sharey=True)

In [42]:
addresses_mode[addresses_mode.precision_simple=="[no result]"]

Unnamed: 0,streetName,houseNumber,postCode,postName,json_basic,precision_basic,time_basic,json_simple,precision_simple,time_simple,json_advanced,precision_advanced,time_advanced
30,Oostwinkeldorp(OW),2,9931,Lievegem,"{'items': [], 'total': 0, 'self': 'http://172....",[no result],0.018963,"{'items': [], 'total': 0, 'callType': 'unstruc...",[no result],0.052596,{'items': [{'bestId': 'https://data.vlaanderen...,address,0.117358
273,Avenue de Merode,54,1330,Rixensart,{'items': [{'street': {'name': {'fr': 'Avenue ...,street,0.054177,"{'items': [], 'total': 0, 'callType': 'unstruc...",[no result],0.230841,{'items': [{'street': {'name': {'fr': 'Avenue ...,street_interpol,0.205769
387,Wedekensdriesstraat(KRU),5,9770,Kruisem,"{'items': [], 'total': 0, 'self': 'http://172....",[no result],0.032159,"{'items': [], 'total': 0, 'callType': 'unstruc...",[no result],0.062209,{'items': [{'bestId': 'https://data.vlaanderen...,address,0.366562
488,Quai des Ardennes,135,4020,Liège,{'items': [{'bestId': 'geodata.wallonie.be/id/...,address,0.067973,"{'items': [], 'total': 0, 'callType': 'unstruc...",[no result],0.233828,{'items': [{'street': {'name': {'fr': 'Quai de...,street,0.421624
728,Kaulillerweg(SHL),79,3910,Pelt,"{'items': [], 'total': 0, 'self': 'http://172....",[no result],0.017087,"{'items': [], 'total': 0, 'callType': 'unstruc...",[no result],0.085735,{'items': [{'bestId': 'https://data.vlaanderen...,address,0.531496
776,Krijgsbaan,7,2100,Antwerpen,{'items': [{'bestId': 'https://data.vlaanderen...,address,0.037784,"{'items': [], 'total': 0, 'callType': 'unstruc...",[no result],0.116766,{'items': [{'street': {'name': {'nl': 'Krijgsb...,street,0.575124
971,Avenue Air Marshal Coningham,2,1050,Ixelles,"{'items': [], 'total': 0, 'self': 'http://172....",[no result],0.031453,"{'items': [], 'total': 0, 'callType': 'unstruc...",[no result],0.19593,{'items': [{'street': {'name': {'fr': 'Avenue ...,street_00,0.624755


In [43]:
# addresses_mode.json_basic.apply(lambda x: x["peliasRaw"]["features"][0]["properties"]["source"] if len(x["peliasRaw"]["features"])>0 else "NONE").value_counts()

# res

In [44]:
# addresses_mode.json_simple.apply(lambda x: x["peliasRaw"]["features"][0]["properties"]["source"] if len(x["peliasRaw"]["features"])>0 else "NONE").value_counts()

In [45]:
# addresses_mode.json_advanced.apply(lambda x: x["peliasRaw"]["features"][0]["properties"]["source"] if len(x["peliasRaw"]["features"])>0 else "NONE").value_counts()

# Get By ID

In [47]:
addresses_seq["json_from_id"] = addresses_seq["best_id"].progress_apply(call_ws_by_id)

100%|██████████| 1000/1000 [00:20<00:00, 48.40it/s]


In [48]:
addresses_seq["json_from_id"]

0      {'items': [{'bestId': 'https://data.vlaanderen...
1      {'items': [{'bestId': 'https://databrussels.be...
2      {'items': [{'bestId': 'https://data.vlaanderen...
3      {'items': [{'bestId': 'https://data.vlaanderen...
4      {'items': [{'bestId': 'https://data.vlaanderen...
                             ...                        
995    {'items': [{'bestId': 'geodata.wallonie.be/id/...
996    {'items': [{'bestId': 'https://data.vlaanderen...
997    {'items': [{'bestId': 'https://data.vlaanderen...
998    {'items': [{'bestId': 'https://data.vlaanderen...
999    {'items': [{'bestId': 'geodata.wallonie.be/id/...
Name: json_from_id, Length: 1000, dtype: object

In [49]:
x = get_best_id(addresses_seq.json_from_id)


In [50]:
addresses_seq[addresses_seq.best_id != x]

Unnamed: 0,streetName,houseNumber,postCode,postName,json,best_id,best_addr,precision,pelias_call_count,time,method,json_from_id
86,Rue de Beauraing,44,5500,Dinant,"{'items': [{'coordinates': {'lat': 50.249286, ...",,"None, None, None",city,10,0.418696,-struct,
111,Marktplein Res. De Wijngaard,1D,3400,Landen,"{'items': [{'coordinates': {'lat': 50.749135, ...",,"None, None, None",city,20,1.032362,-struct,
135,Lindestraat,20,3840,Borgloon,"{'items': [{'coordinates': {'lat': 50.802445, ...",,"None, None, None",city,10,1.265523,-struct,
643,Bruwaenestraat,4,8750,Wingene,"{'items': [{'coordinates': {'lat': 51.059026, ...",,"None, None, None",city,10,0.323969,-struct,
769,Route de Liège,259/B,4720,Kelmis,"{'items': [{'coordinates': {'lat': 50.720042, ...",,"None, None, None",city,14,0.642322,-struct,
780,Tonny,78,6680,Sainte-Ode,"{'items': [{'coordinates': {'lat': 50.01723, '...",,"None, None, None",city,10,0.301024,-struct,
847,Sint-Lambertusstraat,4,3730,Hoeselt,"{'items': [{'coordinates': {'lat': 50.849789, ...",,"None, None, None",city,10,0.501132,-struct,


# Search city

In [51]:
addresses_seq.postCode.progress_apply(lambda r: call_ws_search_city(postcode=r))

  0%|          | 0/1000 [00:00<?, ?it/s]

100%|██████████| 1000/1000 [00:19<00:00, 50.67it/s]


0      {'items': [{'municipality': {'name': {'nl': 'W...
1      {'items': [{'municipality': {'name': {'fr': 'B...
2      {'items': [{'municipality': {'name': {'fr': 'A...
3      {'items': [{'municipality': {'name': {'nl': 'H...
4      {'items': [{'municipality': {'name': {'nl': 'S...
                             ...                        
995    {'items': [{'municipality': {'name': {'fr': 'D...
996    {'items': [{'municipality': {'name': {'nl': 'S...
997    {'items': [{'municipality': {'name': {'nl': 'D...
998    {'items': [{'municipality': {'name': {'fr': 'A...
999    {'items': [{'municipality': {'name': {'fr': 'B...
Name: postCode, Length: 1000, dtype: object

In [52]:
addresses_seq.postName.progress_apply(lambda r: call_ws_search_city(postname=r))

100%|██████████| 1000/1000 [00:26<00:00, 37.68it/s]


0      {'items': [{'municipality': {'name': {'nl': 'W...
1      {'items': [{'municipality': {'name': {'fr': 'B...
2      {'items': [{'municipality': {'name': {'fr': 'A...
3      {'items': [{'municipality': {'name': {'nl': 'H...
4      {'items': [{'municipality': {'name': {'nl': 'S...
                             ...                        
995    {'items': [{'municipality': {'name': {'fr': 'D...
996    {'items': [{'municipality': {'name': {'nl': 'S...
997    {'items': [{'municipality': {'name': {'nl': 'D...
998    {'items': [{'municipality': {'name': {'fr': 'A...
999    {'items': [{'municipality': {'name': {'fr': 'B...
Name: postName, Length: 1000, dtype: object

In [53]:
addresses_seq.progress_apply(lambda r: call_ws_search_city(postname=r.postName, postcode=r.postCode), axis=1)

100%|██████████| 1000/1000 [00:22<00:00, 44.50it/s]


0      {'items': [{'municipality': {'name': {'nl': 'W...
1      {'items': [{'municipality': {'name': {'fr': 'B...
2      {'items': [{'municipality': {'name': {'fr': 'A...
3      {'items': [{'municipality': {'name': {'nl': 'H...
4      {'items': [{'municipality': {'name': {'nl': 'S...
                             ...                        
995    {'items': [{'municipality': {'name': {'fr': 'D...
996    {'items': [{'municipality': {'name': {'nl': 'S...
997    {'items': [{'municipality': {'name': {'nl': 'D...
998    {'items': [{'municipality': {'name': {'fr': 'A...
999    {'items': [{'municipality': {'name': {'fr': 'B...
Length: 1000, dtype: object

# Reverse

In [54]:
addresses_seq["json_from_reverse"] = addresses_seq.json.progress_apply(lambda r: call_ws_reverse(**get(r, ["items", 0, "coordinates"])))

 11%|█         | 109/1000 [00:04<00:28, 30.96it/s]

Unprocessable entity: {"detail":[{"type":"greater_than","loc":["query","lat"],"msg":"Input should be greater than 49.49","input":"0","ctx":{"gt":49.49}},{"type":"greater_than","loc":["query","lon"],"msg":"Input should be greater than 2.4","input":"0","ctx":{"gt":2.4}}]}
{"detail":[{"type":"greater_than","loc":["query","lat"],"msg":"Input should be greater than 49.49","input":"0","ctx":{"gt":49.49}},{"type":"greater_than","loc":["query","lon"],"msg":"Input should be greater than 2.4","input":"0","ctx":{"gt":2.4}}]}


 67%|██████▋   | 674/1000 [00:21<00:09, 35.88it/s]

Unprocessable entity: {"detail":[{"type":"greater_than","loc":["query","lat"],"msg":"Input should be greater than 49.49","input":"0","ctx":{"gt":49.49}},{"type":"greater_than","loc":["query","lon"],"msg":"Input should be greater than 2.4","input":"0","ctx":{"gt":2.4}}]}
{"detail":[{"type":"greater_than","loc":["query","lat"],"msg":"Input should be greater than 49.49","input":"0","ctx":{"gt":49.49}},{"type":"greater_than","loc":["query","lon"],"msg":"Input should be greater than 2.4","input":"0","ctx":{"gt":2.4}}]}


 98%|█████████▊| 978/1000 [00:30<00:00, 33.89it/s]

Unprocessable entity: {"detail":[{"type":"greater_than","loc":["query","lat"],"msg":"Input should be greater than 49.49","input":"0","ctx":{"gt":49.49}},{"type":"greater_than","loc":["query","lon"],"msg":"Input should be greater than 2.4","input":"0","ctx":{"gt":2.4}}]}
{"detail":[{"type":"greater_than","loc":["query","lat"],"msg":"Input should be greater than 49.49","input":"0","ctx":{"gt":49.49}},{"type":"greater_than","loc":["query","lon"],"msg":"Input should be greater than 2.4","input":"0","ctx":{"gt":2.4}}]}


100%|██████████| 1000/1000 [00:31<00:00, 31.78it/s]


In [55]:
addresses_seq

Unnamed: 0,streetName,houseNumber,postCode,postName,json,best_id,best_addr,precision,pelias_call_count,time,method,json_from_id,json_from_reverse
0,Dendermondsesteenweg,200,2830,Willebroek,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/391785/202...,"{'nl': 'Dendermondsesteenweg'}, 200, 2830",address,1,0.266116,-struct,{'items': [{'bestId': 'https://data.vlaanderen...,{'items': [{'bestId': 'https://data.vlaanderen...
1,Boulevard du Régent,47,1000,Bruxelles,{'items': [{'bestId': 'https://databrussels.be...,https://databrussels.be/id/address/164117/1,"{'fr': 'Boulevard du Régent', 'nl': 'Regentlaa...",address,2,0.262626,-unstruct,{'items': [{'bestId': 'https://databrussels.be...,{'items': [{'bestId': 'https://databrussels.be...
2,Hauwaart,44,9700,Oudenaarde,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/1216986/20...,"{'nl': 'Hauwaart'}, 44, 9700",address,1,0.131894,-struct,{'items': [{'bestId': 'https://data.vlaanderen...,{'items': [{'bestId': 'https://data.vlaanderen...
3,Bruynebosstraat,40,3511,Hasselt,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/30693367/2...,"{'nl': 'Bruynebosstraat'}, 40, 3511",address,1,0.136583,-struct,{'items': [{'bestId': 'https://data.vlaanderen...,{'items': [{'bestId': 'https://data.vlaanderen...
4,Steenwinkelstraat,583,2627,Schelle,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/1167407/20...,"{'nl': 'Steenwinkelstraat'}, 583, 2627",address,3,0.205558,clean;no_city-struct,{'items': [{'bestId': 'https://data.vlaanderen...,{'items': [{'bestId': 'https://data.vlaanderen...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,Route de Marche,43,6940,Durbuy,{'items': [{'bestId': 'geodata.wallonie.be/id/...,geodata.wallonie.be/id/Address/177216/3,"{'fr': 'Route de Marche'}, 43A, 6940",address,3,0.138032,clean;no_city-struct,{'items': [{'bestId': 'geodata.wallonie.be/id/...,{'items': [{'bestId': 'geodata.wallonie.be/id/...
996,Godfried van Leuvenlaan,8,1600,Sint-Pieters-Leeuw,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/20196254/2...,"{'nl': 'Godfried van Leuvenlaan'}, 8, 1600",address,1,0.063240,-struct,{'items': [{'bestId': 'https://data.vlaanderen...,{'items': [{'bestId': 'https://data.vlaanderen...
997,Valleilaan,83,3290,Diest,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/1994910/20...,"{'nl': 'Valleilaan'}, 83, 3290",address,1,0.053354,-struct,{'items': [{'bestId': 'https://data.vlaanderen...,{'items': [{'bestId': 'https://data.vlaanderen...
998,Venusstraat,2,2000,Antwerpen,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/1834258/20...,"{'nl': 'Venusstraat'}, 2, 2000",address,1,0.053359,-struct,{'items': [{'bestId': 'https://data.vlaanderen...,{'items': [{'bestId': 'https://data.vlaanderen...


In [56]:
addresses_seq["best_id_from_reverse"] = get_best_id(addresses_seq.json_from_reverse)

addresses_seq

Unnamed: 0,streetName,houseNumber,postCode,postName,json,best_id,best_addr,precision,pelias_call_count,time,method,json_from_id,json_from_reverse,best_id_from_reverse
0,Dendermondsesteenweg,200,2830,Willebroek,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/391785/202...,"{'nl': 'Dendermondsesteenweg'}, 200, 2830",address,1,0.266116,-struct,{'items': [{'bestId': 'https://data.vlaanderen...,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/391785/202...
1,Boulevard du Régent,47,1000,Bruxelles,{'items': [{'bestId': 'https://databrussels.be...,https://databrussels.be/id/address/164117/1,"{'fr': 'Boulevard du Régent', 'nl': 'Regentlaa...",address,2,0.262626,-unstruct,{'items': [{'bestId': 'https://databrussels.be...,{'items': [{'bestId': 'https://databrussels.be...,https://databrussels.be/id/address/164117/1
2,Hauwaart,44,9700,Oudenaarde,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/1216986/20...,"{'nl': 'Hauwaart'}, 44, 9700",address,1,0.131894,-struct,{'items': [{'bestId': 'https://data.vlaanderen...,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/1216986/20...
3,Bruynebosstraat,40,3511,Hasselt,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/30693367/2...,"{'nl': 'Bruynebosstraat'}, 40, 3511",address,1,0.136583,-struct,{'items': [{'bestId': 'https://data.vlaanderen...,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/30693367/2...
4,Steenwinkelstraat,583,2627,Schelle,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/1167407/20...,"{'nl': 'Steenwinkelstraat'}, 583, 2627",address,3,0.205558,clean;no_city-struct,{'items': [{'bestId': 'https://data.vlaanderen...,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/1167407/20...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,Route de Marche,43,6940,Durbuy,{'items': [{'bestId': 'geodata.wallonie.be/id/...,geodata.wallonie.be/id/Address/177216/3,"{'fr': 'Route de Marche'}, 43A, 6940",address,3,0.138032,clean;no_city-struct,{'items': [{'bestId': 'geodata.wallonie.be/id/...,{'items': [{'bestId': 'geodata.wallonie.be/id/...,geodata.wallonie.be/id/Address/177216/3
996,Godfried van Leuvenlaan,8,1600,Sint-Pieters-Leeuw,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/20196254/2...,"{'nl': 'Godfried van Leuvenlaan'}, 8, 1600",address,1,0.063240,-struct,{'items': [{'bestId': 'https://data.vlaanderen...,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/20196254/2...
997,Valleilaan,83,3290,Diest,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/1994910/20...,"{'nl': 'Valleilaan'}, 83, 3290",address,1,0.053354,-struct,{'items': [{'bestId': 'https://data.vlaanderen...,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/1994910/20...
998,Venusstraat,2,2000,Antwerpen,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/1834258/20...,"{'nl': 'Venusstraat'}, 2, 2000",address,1,0.053359,-struct,{'items': [{'bestId': 'https://data.vlaanderen...,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/1834258/20...


In [57]:
# Mismatch OK if:
# - primary result (in json) if not "address" level (streetname, city...)
# - coordinates are interpolated (precision='address_streetcenter', 'address_interpol', ...)
# - best_id is null (and by chance we find an Best address in the coordinates given by WhosOnFirst)
# - several addresses with the same coordinates

addresses_seq[addresses_seq.best_id != addresses_seq.best_id_from_reverse]#.iloc[0:60]

Unnamed: 0,streetName,houseNumber,postCode,postName,json,best_id,best_addr,precision,pelias_call_count,time,method,json_from_id,json_from_reverse,best_id_from_reverse
12,Rue du Panorama - ATHUS,28,6791,Aubange,{'items': [{'street': {'name': {'fr': 'Rue du ...,geodata.wallonie.be/id/Streetname/7742084/2,"{'fr': 'Rue du Panorama'}, None, 6791",street,10,1.575110,clean;no_city-unstruct,{'items': [{'street': {'name': {'fr': 'Rue du ...,"{'items': [], 'total': 0, 'self': 'http://172....",
16,Groeningenweg,8,3590,Diepenbeek,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/20072199/2...,"{'nl': 'Groeningenweg'}, 23_8, 3590",address,1,0.092939,-struct,{'items': [{'bestId': 'https://data.vlaanderen...,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/20072203/2...
39,Theo Verellenlaan,9,2990,Wuustwezel,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/960649/202...,"{'nl': 'Theo Verellenlaan'}, 9, 2990",address,1,0.077213,-struct,{'items': [{'bestId': 'https://data.vlaanderen...,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/1784075/20...
45,Rue Marconi,143,1190,Forest,{'items': [{'street': {'name': {'fr': 'Rue Mar...,https://databrussels.be/id/streetname/113/1,"{'fr': 'Rue Marconi', 'nl': 'Marconistraat'}, ...",street,4,0.270575,clean;no_city-struct,{'items': [{'street': {'name': {'fr': 'Rue Mar...,{'items': [{'bestId': 'https://databrussels.be...,https://databrussels.be/id/address/70221/1
47,Kattenstraat,33,8800,Roeselare,{'items': [{'street': {'name': {'nl': 'Kattens...,https://data.vlaanderen.be/id/straatnaam/58052...,"{'nl': 'Kattenstraat'}, None, 8800",street_interpol,1,0.098652,-struct,{'items': [{'street': {'name': {'nl': 'Kattens...,"{'items': [], 'total': 0, 'self': 'http://172....",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
971,Avenue Air Marshal Coningham,2,1050,Ixelles,{'items': [{'street': {'name': {'fr': 'Avenue ...,https://databrussels.be/id/streetname/3827/1,"{'fr': 'Avenue Air Marshal Coningham', 'nl': '...",street_00,10,0.260533,no_hn-unstruct,{'items': [{'street': {'name': {'fr': 'Avenue ...,,
981,Avenue Alexandre Duchesne,/,4802,Verviers,{'items': [{'street': {'name': {'fr': 'Avenue ...,geodata.wallonie.be/id/Streetname/7738572/2,"{'fr': 'Avenue Alexandre Duchesne'}, None, 4802",street,10,0.599377,-struct,{'items': [{'street': {'name': {'fr': 'Avenue ...,{'items': [{'bestId': 'geodata.wallonie.be/id/...,geodata.wallonie.be/id/Address/1312090/5
989,Rue Combattants(Haul),20,7120,Estinnes,{'items': [{'bestId': 'geodata.wallonie.be/id/...,geodata.wallonie.be/id/Address/811361/1,"{'fr': 'Rue des Combattants'}, 20, 7120",address_streetcenter,2,0.463828,-unstruct,{'items': [{'bestId': 'geodata.wallonie.be/id/...,"{'items': [], 'total': 0, 'self': 'http://172....",
990,Wirtzfeld,116,4761,Büllingen,{'items': [{'municipality': {'name': {'fr': 'B...,geodata.wallonie.be/id/Municipality/63012/7,"None, None, 4761",city,10,0.499711,no_city;no_hn-unstruct,{'items': [{'municipality': {'name': {'fr': 'B...,"{'items': [], 'total': 0, 'self': 'http://172....",


# Unstructured mode

In [58]:
if from_file:
    addresses_unstruct = pd.read_pickle(f"{folder}/unstruct.pkl")   
else:
    addresses_unstruct = addresses.copy()

    addresses_unstruct["address"] = addresses_unstruct.streetName+", " +addresses_unstruct.houseNumber + ", "+addresses_unstruct.postCode.astype(str)+" "+addresses_unstruct.postName
    addresses_unstruct["address_nozip"] = addresses_unstruct.streetName+", " +addresses_unstruct.houseNumber + ", "+addresses_unstruct.postName

    t = datetime.now()
    addresses_unstruct["json"] = addresses_unstruct[["address"]].fillna("").progress_apply(call_unstruct_ws,
                                                                    mode="advanced", axis=1)
    tot_time = (datetime.now() - t).total_seconds()

    ips=addresses_unstruct.shape[0]/tot_time
    iter_per_sec_stats["unstruct"] = ips
    print(f"{tot_time:.2f} seconds, {ips:.2f} it/s, {ips*3600:.0f} it/h")

    t = datetime.now()
    addresses_unstruct["json_nozip"] = addresses_unstruct[["address_nozip"]].fillna("").progress_apply(call_unstruct_ws,
                                                                    mode="advanced", axis=1)
    tot_time = (datetime.now() - t).total_seconds()

    ips=addresses_unstruct.shape[0]/tot_time
    iter_per_sec_stats["unstruct_nozip"] = ips
    print(f"{tot_time:.2f} seconds, {ips:.2f} it/s, {ips*3600:.0f} it/h")
    
    addresses_unstruct.to_pickle(f"{folder}/unstruct.pkl")
    
addresses_unstruct

  0%|          | 0/1000 [00:00<?, ?it/s]

100%|██████████| 1000/1000 [01:47<00:00,  9.30it/s]


107.50 seconds, 9.30 it/s, 33487 it/h


100%|██████████| 1000/1000 [02:28<00:00,  6.71it/s]


148.94 seconds, 6.71 it/s, 24170 it/h


Unnamed: 0,streetName,houseNumber,postCode,postName,address,address_nozip,json,json_nozip
0,Dendermondsesteenweg,200,2830,Willebroek,"Dendermondsesteenweg, 200, 2830 Willebroek","Dendermondsesteenweg, 200, Willebroek",{'items': [{'bestId': 'https://data.vlaanderen...,{'items': [{'bestId': 'https://data.vlaanderen...
1,Boulevard du Régent,47,1000,Bruxelles,"Boulevard du Régent, 47, 1000 Bruxelles","Boulevard du Régent, 47, Bruxelles",{'items': [{'bestId': 'https://databrussels.be...,{'items': [{'bestId': 'https://databrussels.be...
2,Hauwaart,44,9700,Oudenaarde,"Hauwaart, 44, 9700 Oudenaarde","Hauwaart, 44, Oudenaarde",{'items': [{'bestId': 'https://data.vlaanderen...,{'items': [{'bestId': 'https://data.vlaanderen...
3,Bruynebosstraat,40,3511,Hasselt,"Bruynebosstraat, 40, 3511 Hasselt","Bruynebosstraat, 40, Hasselt",{'items': [{'bestId': 'https://data.vlaanderen...,{'items': [{'bestId': 'https://data.vlaanderen...
4,Steenwinkelstraat,583,2627,Schelle,"Steenwinkelstraat, 583, 2627 Schelle","Steenwinkelstraat, 583, Schelle",{'items': [{'bestId': 'https://data.vlaanderen...,{'items': [{'bestId': 'https://data.vlaanderen...
...,...,...,...,...,...,...,...,...
995,Route de Marche,43,6940,Durbuy,"Route de Marche, 43, 6940 Durbuy","Route de Marche, 43, Durbuy",{'items': [{'bestId': 'geodata.wallonie.be/id/...,{'items': [{'bestId': 'geodata.wallonie.be/id/...
996,Godfried van Leuvenlaan,8,1600,Sint-Pieters-Leeuw,"Godfried van Leuvenlaan, 8, 1600 Sint-Pieters-...","Godfried van Leuvenlaan, 8, Sint-Pieters-Leeuw",{'items': [{'bestId': 'https://data.vlaanderen...,{'items': [{'bestId': 'https://data.vlaanderen...
997,Valleilaan,83,3290,Diest,"Valleilaan, 83, 3290 Diest","Valleilaan, 83, Diest",{'items': [{'bestId': 'https://data.vlaanderen...,{'items': [{'bestId': 'https://data.vlaanderen...
998,Venusstraat,2,2000,Antwerpen,"Venusstraat, 2, 2000 Antwerpen","Venusstraat, 2, Antwerpen",{'items': [{'bestId': 'https://data.vlaanderen...,{'items': [{'bestId': 'https://data.vlaanderen...


In [59]:
for suffix in ["", "_nozip"]:
    addresses_unstruct[f"best_id{suffix}"] = get_best_id(addresses_unstruct[f"json{suffix}"])
    
    addresses_unstruct[f"best_addr{suffix}"] = get_best_addr(addresses_unstruct[f"json{suffix}"])
    addresses_unstruct[f"precision{suffix}"] = get_precision(addresses_unstruct[f"json{suffix}"])
    
    addresses_unstruct[f"pelias_call_cnt{suffix}"] = get_pelias_call_count(addresses_unstruct[f"json{suffix}"])

    addresses_unstruct[f"callType{suffix}"] = addresses_unstruct[f"json{suffix}"].apply(lambda r: get(r, ["callType"]))
    addresses_unstruct[f"transformers{suffix}"] = addresses_unstruct[f"json{suffix}"].apply(lambda r: get(r, ["transformers"]))
    addresses_unstruct[f"time{suffix}"] =addresses_unstruct[f"json{suffix}"].apply(lambda j: j["time"] if j else None) 

    

addresses_unstruct

Unnamed: 0,streetName,houseNumber,postCode,postName,address,address_nozip,json,json_nozip,best_id,best_addr,...,callType,transformers,time,best_id_nozip,best_addr_nozip,precision_nozip,pelias_call_cnt_nozip,callType_nozip,transformers_nozip,time_nozip
0,Dendermondsesteenweg,200,2830,Willebroek,"Dendermondsesteenweg, 200, 2830 Willebroek","Dendermondsesteenweg, 200, Willebroek",{'items': [{'bestId': 'https://data.vlaanderen...,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/391785/202...,"{'nl': 'Dendermondsesteenweg'}, 200, 2830",...,unstruct,,0.082524,https://data.vlaanderen.be/id/adres/391785/202...,"{'nl': 'Dendermondsesteenweg'}, 200, 2830",address,1,unstruct,,0.047253
1,Boulevard du Régent,47,1000,Bruxelles,"Boulevard du Régent, 47, 1000 Bruxelles","Boulevard du Régent, 47, Bruxelles",{'items': [{'bestId': 'https://databrussels.be...,{'items': [{'bestId': 'https://databrussels.be...,https://databrussels.be/id/address/164117/1,"{'fr': 'Boulevard du Régent', 'nl': 'Regentlaa...",...,unstruct,,0.059941,https://databrussels.be/id/address/164117/1,"{'fr': 'Boulevard du Régent', 'nl': 'Regentlaa...",address,1,unstruct,,0.052008
2,Hauwaart,44,9700,Oudenaarde,"Hauwaart, 44, 9700 Oudenaarde","Hauwaart, 44, Oudenaarde",{'items': [{'bestId': 'https://data.vlaanderen...,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/1216986/20...,"{'nl': 'Hauwaart'}, 44, 9700",...,unstruct,,0.051207,https://data.vlaanderen.be/id/adres/1216986/20...,"{'nl': 'Hauwaart'}, 44, 9700",address,1,unstruct,,0.048175
3,Bruynebosstraat,40,3511,Hasselt,"Bruynebosstraat, 40, 3511 Hasselt","Bruynebosstraat, 40, Hasselt",{'items': [{'bestId': 'https://data.vlaanderen...,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/30693367/2...,"{'nl': 'Bruynebosstraat'}, 40, 3511",...,unstruct,,0.051681,https://data.vlaanderen.be/id/adres/30693367/2...,"{'nl': 'Bruynebosstraat'}, 40, 3511",address,1,unstruct,,0.046662
4,Steenwinkelstraat,583,2627,Schelle,"Steenwinkelstraat, 583, 2627 Schelle","Steenwinkelstraat, 583, Schelle",{'items': [{'bestId': 'https://data.vlaanderen...,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/1167407/20...,"{'nl': 'Steenwinkelstraat'}, 583, 2627",...,struct,parsed(postcode=2627);no_city,0.078363,https://data.vlaanderen.be/id/adres/1167407/20...,"{'nl': 'Steenwinkelstraat'}, 583, 2627",address,2,struct,parsed(postcode=2627);no_city,0.078672
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,Route de Marche,43,6940,Durbuy,"Route de Marche, 43, 6940 Durbuy","Route de Marche, 43, Durbuy",{'items': [{'bestId': 'geodata.wallonie.be/id/...,{'items': [{'bestId': 'geodata.wallonie.be/id/...,geodata.wallonie.be/id/Address/177216/3,"{'fr': 'Route de Marche'}, 43A, 6940",...,struct,parsed(postcode=6940);no_city,0.092281,geodata.wallonie.be/id/Address/177216/3,"{'fr': 'Route de Marche'}, 43A, 6940",address,2,struct,parsed(postcode=6940);no_city,0.118912
996,Godfried van Leuvenlaan,8,1600,Sint-Pieters-Leeuw,"Godfried van Leuvenlaan, 8, 1600 Sint-Pieters-...","Godfried van Leuvenlaan, 8, Sint-Pieters-Leeuw",{'items': [{'bestId': 'https://data.vlaanderen...,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/20196254/2...,"{'nl': 'Godfried van Leuvenlaan'}, 8, 1600",...,unstruct,,0.052666,https://data.vlaanderen.be/id/adres/20196254/2...,"{'nl': 'Godfried van Leuvenlaan'}, 8, 1600",address,1,unstruct,,0.053854
997,Valleilaan,83,3290,Diest,"Valleilaan, 83, 3290 Diest","Valleilaan, 83, Diest",{'items': [{'bestId': 'https://data.vlaanderen...,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/1994910/20...,"{'nl': 'Valleilaan'}, 83, 3290",...,unstruct,,0.044936,https://data.vlaanderen.be/id/adres/1994910/20...,"{'nl': 'Valleilaan'}, 83, 3290",address,1,unstruct,,0.051458
998,Venusstraat,2,2000,Antwerpen,"Venusstraat, 2, 2000 Antwerpen","Venusstraat, 2, Antwerpen",{'items': [{'bestId': 'https://data.vlaanderen...,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/1834258/20...,"{'nl': 'Venusstraat'}, 2, 2000",...,unstruct,,0.061502,https://data.vlaanderen.be/id/adres/2195427/20...,"{'nl': 'Venusstraat'}, 2, 2960",address,1,unstruct,,0.065911


In [60]:
addresses_unstruct.plot.scatter(x="pelias_call_cnt_nozip", y="time_nozip", title="Call time vs Pelias call count")

In [61]:
# addresses_unstruct_shortcut = pd.read_pickle("../data/addresses_unstruct_shortcut.pkl")
# addresses_unstruct= pd.read_pickle("../data/addresses_unstruct.pkl")


In [62]:
# mg = addresses_unstruct[["address", "address_nozip",  "best_id", "best_id_nozip", "best_cnt_call", "best_cnt_call_nozip"]].merge(
#      addresses_unstruct_shortcut[["address", "address_nozip", "best_id", "best_id_nozip", "best_cnt_call", "best_cnt_call_nozip"]], how="outer", indicator=True)
# mg[mg._merge != "both"]

In [63]:
addresses_unstruct.precision_nozip.value_counts()

precision_nozip
address                 891
street                   53
street_interpol          40
city                      7
address_interpol          4
address_streetcenter      3
street_00                 2
Name: count, dtype: int64

In [64]:
addresses_unstruct[["time", "time_nozip"]].boxplot()

In [65]:
cnt = pd.merge(addresses_unstruct["pelias_call_cnt"].value_counts(), 
         addresses_unstruct["pelias_call_cnt_nozip"].value_counts(), left_index=True, right_index=True, how="outer", suffixes=("_withzip", "_nozip"))

cnt.plot.bar(title="Pelias call count with and without zip code").show()
cnt

Unnamed: 0,count_withzip,count_nozip
1,645.0,661.0
2,249.0,229.0
3,41.0,38.0
4,12.0,9.0
5,2.0,
6,,1.0
7,4.0,1.0
8,2.0,3.0
9,6.0,3.0
10,3.0,


In [66]:
addresses_unstruct["pelias_call_cnt"].mean(), addresses_unstruct["pelias_call_cnt_nozip"].mean()
                                    

(np.float64(1.907), np.float64(2.645))

In [67]:
# addresses_unstruct[["time_nozip"]].hist()                                   

In [68]:
# addresses_unstruct[["time"]].hist()

In [69]:
common = ["streetName", "houseNumber", "postCode", "postName", "best_id", "best_addr", "precision"]
mg = addresses_unstruct[common +["address"]].merge(addresses_seq[common], on=common,
                                                             how="outer", indicator=True, suffixes=("_unstruct", "_struct"))

print(f"With zip: Agree on {mg[mg._merge=='both'].shape[0]}/{addresses_unstruct.shape[0]}")

With zip: Agree on 980/1000


In [70]:
mg[mg._merge!='both']

Unnamed: 0,streetName,houseNumber,postCode,postName,best_id,best_addr,precision,address,_merge
3,Aarschotsesteenweg,163,3010,Leuven,https://data.vlaanderen.be/id/straatnaam/34606...,"{'nl': 'Aarschotsesteenweg'}, None, 3010",street_interpol,,right_only
4,Aarschotsesteenweg,163,3010,Leuven,https://data.vlaanderen.be/id/straatnaam/34606...,"{'nl': 'Aarschotsesteenweg'}, None, 3012",street_interpol,"Aarschotsesteenweg, 163, 3010 Leuven",left_only
23,Aux Quatre Bonniers,7,4052,Chaudfontaine,geodata.wallonie.be/id/Address/1122799/2,"{'fr': 'Aux Quatre Bonniers'}, 7, 4052",address,,right_only
24,Aux Quatre Bonniers,7,4052,Chaudfontaine,geodata.wallonie.be/id/Streetname/7727968/2,"{'fr': 'Aux Quatre Bonniers'}, None, 4052",street,"Aux Quatre Bonniers, 7, 4052 Chaudfontaine",left_only
30,Avenue Alexandre Duchesne,/,4802,Verviers,geodata.wallonie.be/id/Streetname/7738571/1,"{'fr': 'Avenue Alexandre Deru'}, None, 4800",street,"Avenue Alexandre Duchesne, /, 4802 Verviers",left_only
31,Avenue Alexandre Duchesne,/,4802,Verviers,geodata.wallonie.be/id/Streetname/7738572/2,"{'fr': 'Avenue Alexandre Duchesne'}, None, 4802",street,,right_only
72,"Avenue du Centenaire,H.-M.",12,1320,Beauvechain,geodata.wallonie.be/id/Address/10759/1,"{'fr': 'Avenue du Centenaire'}, 12, 1320",address,,right_only
73,"Avenue du Centenaire,H.-M.",12,1320,Beauvechain,geodata.wallonie.be/id/Streetname/7700006/1,"{'fr': 'Avenue du Centenaire'}, None, 1320",street,"Avenue du Centenaire,H.-M., 12, 1320 Beauvechain",left_only
160,"Chaussée d'Arlon,NEU",141,6840,Neufchâteau,geodata.wallonie.be/id/Address/210725/1,"{'fr': ""Chaussée d'Arlon""}, 141, 6840",address,,right_only
161,"Chaussée d'Arlon,NEU",141,6840,Neufchâteau,geodata.wallonie.be/id/Streetname/7745792/3,"{'fr': ""Chaussée d'Arlon""}, None, 6840",street,"Chaussée d'Arlon,NEU, 141, 6840 Neufchâteau",left_only


In [71]:
mg[mg._merge!='both'][["streetName", "houseNumber", "postCode", "postName", "address", "best_id", "best_addr" ]].to_excel("data/diff_struct_vs_unstruct.xlsx")

In [72]:
common = ["streetName", "houseNumber", "postCode", "postName", "best_id", "best_addr", "precision"]
mg = addresses_unstruct.drop(columns=["json", "best_id", "best_addr", "precision"]).rename(columns={"best_id_nozip": "best_id", 
                                                                                       "best_addr_nozip": "best_addr",
                                                                                        "precision_nozip": "precision"})[common +["address"]].merge(addresses_seq[common], on=common,
                                                             how="outer", indicator=True, suffixes=("_unstruct", "_struct"))

print(f"No zip: Agree on {mg[mg._merge=='both'].shape[0]}/{addresses_unstruct.shape[0]}")

No zip: Agree on 955/1000


In [73]:
mg[mg._merge!='both']

Unnamed: 0,streetName,houseNumber,postCode,postName,best_id,best_addr,precision,address,_merge
3,Aarschotsesteenweg,163,3010,Leuven,https://data.vlaanderen.be/id/straatnaam/34606...,"{'nl': 'Aarschotsesteenweg'}, None, 3010",street_interpol,,right_only
4,Aarschotsesteenweg,163,3010,Leuven,https://data.vlaanderen.be/id/straatnaam/34606...,"{'nl': 'Aarschotsesteenweg'}, None, 3012",street_interpol,"Aarschotsesteenweg, 163, 3010 Leuven",left_only
23,Aux Quatre Bonniers,7,4052,Chaudfontaine,geodata.wallonie.be/id/Address/1122799/2,"{'fr': 'Aux Quatre Bonniers'}, 7, 4052",address,,right_only
24,Aux Quatre Bonniers,7,4052,Chaudfontaine,geodata.wallonie.be/id/Streetname/7727968/2,"{'fr': 'Aux Quatre Bonniers'}, None, 4052",street,"Aux Quatre Bonniers, 7, 4052 Chaudfontaine",left_only
29,Avenue Air Marshal Coningham,2,1050,Ixelles,https://databrussels.be/id/address/30161/1,"{'fr': 'Avenue Air Marshal Coningham', 'nl': '...",address,"Avenue Air Marshal Coningham, 2, 1050 Ixelles",left_only
...,...,...,...,...,...,...,...,...,...
868,"Ruelle Procès, St-Denis",4,5081,La Bruyère,geodata.wallonie.be/id/Streetname/7756100/2,"{'fr': 'Ruelle Procès'}, None, 5081",street,"Ruelle Procès, St-Denis, 4, 5081 La Bruyère",left_only
912,Statiestraat,19-21,2600,Antwerpen,https://data.vlaanderen.be/id/straatnaam/11372...,"{'nl': 'Statiestraat'}, None, 2560",street_interpol,"Statiestraat, 19-21, 2600 Antwerpen",left_only
913,Statiestraat,19-21,2600,Antwerpen,https://data.vlaanderen.be/id/straatnaam/2724/...,"{'nl': 'Statiestraat'}, None, 2600",street_interpol,,right_only
973,Venusstraat,2,2000,Antwerpen,https://data.vlaanderen.be/id/adres/1834258/20...,"{'nl': 'Venusstraat'}, 2, 2000",address,,right_only


In [74]:
print("Differences in best_id with and without zip code:")
addresses_unstruct[addresses_unstruct.best_id.fillna("none") != addresses_unstruct.best_id_nozip.fillna("none")]

Differences in best_id with and without zip code:


Unnamed: 0,streetName,houseNumber,postCode,postName,address,address_nozip,json,json_nozip,best_id,best_addr,...,callType,transformers,time,best_id_nozip,best_addr_nozip,precision_nozip,pelias_call_cnt_nozip,callType_nozip,transformers_nozip,time_nozip
40,Avenue Beau Vallon,14,1410,Waterloo,"Avenue Beau Vallon, 14, 1410 Waterloo","Avenue Beau Vallon, 14, Waterloo",{'items': [{'bestId': 'geodata.wallonie.be/id/...,{'items': [{'street': {'name': {'fr': 'Avenue ...,geodata.wallonie.be/id/Address/94128/2,"{'fr': 'Avenue Beau Vallon'}, 14, 1410",...,unstruct,,0.051537,geodata.wallonie.be/id/Streetname/7704134/3,"{'fr': 'Avenue Blücher'}, None, 1410",street,1,unstruct,,0.410291
45,Rue Marconi,143,1190,Forest,"Rue Marconi, 143, 1190 Forest","Rue Marconi, 143, Forest",{'items': [{'street': {'name': {'fr': 'Rue Mar...,{'items': [{'bestId': 'geodata.wallonie.be/id/...,https://databrussels.be/id/streetname/113/1,"{'fr': 'Rue Marconi', 'nl': 'Marconistraat'}, ...",...,struct,parsed(postcode=1190);no_city,0.143763,geodata.wallonie.be/id/Address/1349551/1,"{'fr': 'Rue Marconi'}, 1, 4100",address,1,unstruct,,0.24069
90,Statiestraat,19-21,2600,Antwerpen,"Statiestraat, 19-21, 2600 Antwerpen","Statiestraat, 19-21, Antwerpen",{'items': [{'street': {'name': {'nl': 'Staties...,{'items': [{'street': {'name': {'nl': 'Staties...,https://data.vlaanderen.be/id/straatnaam/2724/...,"{'nl': 'Statiestraat'}, None, 2600",...,unstruct,,0.116486,https://data.vlaanderen.be/id/straatnaam/11372...,"{'nl': 'Statiestraat'}, None, 2560",street_interpol,1,unstruct,,0.128086
96,Rue Emile Vandervelde,69,6031,Charleroi,"Rue Emile Vandervelde, 69, 6031 Charleroi","Rue Emile Vandervelde, 69, Charleroi",{'items': [{'bestId': 'geodata.wallonie.be/id/...,{'items': [{'bestId': 'geodata.wallonie.be/id/...,geodata.wallonie.be/id/Address/914626/1,"{'fr': 'Rue Emile Vandervelde (MS)'}, 69, 6031",...,unstruct,,0.077031,geodata.wallonie.be/id/Address/494450/1,"{'fr': 'Rue Emile Vandervelde'}, 69, 6240",address,1,unstruct,,0.067978
172,Avenue Emile Verhaeren,20 A,1030,Schaerbeek,"Avenue Emile Verhaeren, 20 A, 1030 Schaerbeek","Avenue Emile Verhaeren, 20 A, Schaerbeek",{'items': [{'bestId': 'https://databrussels.be...,{'items': [{'bestId': 'geodata.wallonie.be/id/...,https://databrussels.be/id/address/117118/1,"{'fr': 'Avenue Emile Verhaeren', 'nl': 'Emile ...",...,unstruct,,0.053521,geodata.wallonie.be/id/Address/130365/1,"{'fr': 'avenue Emile Verhaeren'}, 20, 1348",address,1,unstruct,,0.048631
219,Rue de Pont(TOU),3/11,7500,Tournai,"Rue de Pont(TOU), 3/11, 7500 Tournai","Rue de Pont(TOU), 3/11, Tournai",{'items': [{'bestId': 'geodata.wallonie.be/id/...,{'items': [{'street': {'name': {'fr': 'Rue de ...,geodata.wallonie.be/id/Address/1442499/1,"{'fr': 'Rue de Pont'}, 11, 7500",...,unstruct,clean,0.090414,geodata.wallonie.be/id/Streetname/7723448/2,"{'fr': 'Rue de Pont'}, None, 7500",street_interpol,2,unstruct,clean,0.086623
233,Rue de Bois-de-Breux,270,4030,CHENEE (CHAUDFONTAINE),"Rue de Bois-de-Breux, 270, 4030 CHENEE (CHAUDF...","Rue de Bois-de-Breux, 270, CHENEE (CHAUDFONTAINE)",{'items': [{'street': {'name': {'fr': 'Rue de ...,{'items': [{'bestId': 'geodata.wallonie.be/id/...,geodata.wallonie.be/id/Streetname/7730299/1,"{'fr': 'Rue de Bois-de-Breux'}, None, 4030",...,struct,parsed(postcode=4030);no_city;no_hn,0.334686,geodata.wallonie.be/id/Address/1109020/1,"{'fr': 'Rue de Bois-de-Breux'}, 270, 4020",address,2,unstruct,clean,0.355081
247,Oude Pastorijstraat,12,3930,Hamont-Achel,"Oude Pastorijstraat, 12, 3930 Hamont-Achel","Oude Pastorijstraat, 12, Hamont-Achel",{'items': [{'bestId': 'https://data.vlaanderen...,{'items': [{'bestId': 'https://data.vlaanderen...,https://data.vlaanderen.be/id/adres/666835/202...,"{'nl': 'Oude Pastorijstraat'}, 12, 3930",...,unstruct,,0.057276,https://data.vlaanderen.be/id/adres/20367672/2...,"{'nl': 'Oude Pastorijstraat'}, 12, 3910",address,1,unstruct,,0.053682
373,Rue de Liège,149/153,4800,Verviers,"Rue de Liège, 149/153, 4800 Verviers","Rue de Liège, 149/153, Verviers",{'items': [{'bestId': 'geodata.wallonie.be/id/...,{'items': [{'street': {'name': {'fr': 'Rue de ...,geodata.wallonie.be/id/Address/1870520/2,"{'fr': 'Rue de Liège'}, 153, 4800",...,unstruct,,0.05562,geodata.wallonie.be/id/Streetname/7738831/2,"{'fr': 'Rue de Liège'}, None, 4800",street_interpol,1,unstruct,,0.053393
391,Jodenstraat,12 b 1,2000,Antwerpen,"Jodenstraat, 12 b 1, 2000 Antwerpen","Jodenstraat, 12 b 1, Antwerpen",{'items': [{'bestId': 'https://data.vlaanderen...,{'items': [{'street': {'name': {'nl': 'Jodenst...,https://data.vlaanderen.be/id/adres/438745/202...,"{'nl': 'Jodenstraat'}, 12, 2000",...,unstruct,,0.077197,https://data.vlaanderen.be/id/straatnaam/10637...,"{'nl': 'Jodenstraat'}, None, 2800",street_interpol,1,unstruct,,0.065285


In [75]:
addresses_unstruct[addresses_unstruct.best_id.fillna("none") != addresses_unstruct.best_id_nozip.fillna("none")][["address", "address_nozip", "best_id", "best_id_nozip", "best_addr", "best_addr_nozip" ]].to_excel("data/diff_nozip.xlsx")


# Tests