In [2]:
import pandas as pd
import numpy as np
import requests

# from tqdm.auto import tqdm, trange
from tqdm import tqdm, trange
# tqdm.pandas()


from credentials import bosa_mapping_url

from zipfile import ZipFile
import json
import random
import time
import os, urllib

import glob


In [3]:
def download_if_nexist(url, filename):
    """
    If the (local) file <filename> does not exists, download it from <url>

    Parameters
    ----------
    url: str
       url to fetch
    filename: str
       local file to save

    Returns
    -------

    None
    """
    if not os.path.isfile(filename):
        #gcontext = ssl.SSLContext()
        with urllib.request.urlopen(url) as response:
            with open(filename, "wb") as f:
                f.write(response.read())

In [4]:
datadir = "data/geocoding/"
os.makedirs(datadir, exist_ok=True)
os.makedirs(f"{datadir}/full", exist_ok=True)


In [5]:
sample_size=10000

seed=314

In [11]:
datasets = ["kbo", "rep", "best", "rrn", "resto"]
# datasets = ["best"]

# Best

In [7]:
if "best" in datasets:
    best_vlg_fn = f"{datadir}/full/openaddress-bevlg.zip"
    download_if_nexist("https://opendata.bosa.be/download/best/openaddress-bevlg.zip", best_vlg_fn)

    best_wal_fn = f"{datadir}/full/openaddress-bewal.zip"
    download_if_nexist("https://opendata.bosa.be/download/best/openaddress-bewal.zip", best_wal_fn)

    best_bru_fn = f"{datadir}/full/openaddress-bebru.zip"
    download_if_nexist("https://opendata.bosa.be/download/best/openaddress-bebru.zip", best_bru_fn)

In [8]:
if "best" in datasets:
    best_full = pd.concat([pd.read_csv(fn, usecols=["municipality_name_de", "municipality_name_fr", "municipality_name_nl", 
                                                    "streetname_de", "streetname_fr", "streetname_nl",
                                                    "postcode", "house_number", "region_code"], dtype=str) for fn in [best_vlg_fn, best_wal_fn, best_bru_fn] ])

In [9]:
if "best" in datasets:
    best_full["street"] = np.where(best_full.region_code == "BE-VLG", best_full.streetname_nl.fillna(best_full.streetname_fr).fillna(best_full.streetname_de),
                          np.where(best_full.region_code == "BE-WAL", best_full.streetname_fr.fillna(best_full.streetname_de).fillna(best_full.streetname_nl),
                          np.where(best_full.region_code == "BE-BRU", best_full.streetname_fr.fillna(best_full.streetname_nl).fillna(best_full.streetname_de), None)))

    best_full["city"] =   np.where(best_full.region_code == "BE-VLG", best_full.municipality_name_nl.fillna(best_full.municipality_name_fr).fillna(best_full.municipality_name_de),
                          np.where(best_full.region_code == "BE-WAL", best_full.municipality_name_fr.fillna(best_full.municipality_name_de).fillna(best_full.municipality_name_nl),
                          np.where(best_full.region_code == "BE-BRU", best_full.municipality_name_fr.fillna(best_full.municipality_name_nl).fillna(best_full.municipality_name_de), None)))

In [10]:
if "best" in datasets:
    best_sample =  best_full[["street", "house_number", "postcode", "city"]].rename(columns={"house_number": "housenumber"})
    best_sample = best_sample.drop_duplicates().sample(sample_size, random_state=seed).reset_index(drop=True)
    best_sample.to_csv(f"{datadir}/best_{sample_size}.csv.gz", index=False)

# KBO


In [12]:
kbo_fn = f"{datadir}/full/kbo_full.zip"
# To be downloaded from https://kbopub.economie.fgov.be/kbo-open-data/

#download_if_nexist("https://kbopub.economie.fgov.be/kbo-open-data/affiliation/xml/files/KboOpenData_0111_2023_05_Full.zip", kbo_fn)


In [110]:
if "kbo" in datasets:
    with ZipFile(kbo_fn) as z:
       # open the csv file in the dataset
        with z.open("address.csv") as f:

            kbo_full = pd.read_csv(f, #usecols=["CountryFR", "Zipcode", 
#                                                "MunicipalityNL","MunicipalityFR", 
#                                                "StreetNL", "StreetFR", "HouseNumber"],
                                dtype=str)
    

In [112]:
if "kbo" in datasets:

    kbo_full = kbo_full[kbo_full.CountryFR.isnull()].copy()
    
    kbo_sample = kbo_full.drop_duplicates(subset=["Zipcode","StreetNL", "StreetFR", "HouseNumber"]).sample(sample_size, random_state=seed)

In [72]:
kbo_full[kbo_full.Zipcode == "4154"]

Unnamed: 0,EntityNumber,TypeOfAddress,CountryNL,CountryFR,Zipcode,MunicipalityNL,MunicipalityFR,StreetNL,StreetFR,HouseNumber,Box,ExtraAddressInfo,DateStrikingOff
52653,0411.902.679,REGO,,,4154,Tinlot,Tinlot,Rue du Village,Rue du Village,2,,,


In [114]:
kbo_full[kbo_full.Zipcode.isin(["4154", "5301", "5082", "8478", "9821", "4383", "5053", "6268","5665", "3820", "6848",
                                "1641", "8190","4070", "4622", "5411", "4131", "4341", "3260", "3668", "7583", "5430"])]#"3590" "4140" "6960"  "6941"

Unnamed: 0,EntityNumber,TypeOfAddress,CountryNL,CountryFR,Zipcode,MunicipalityNL,MunicipalityFR,StreetNL,StreetFR,HouseNumber,Box,ExtraAddressInfo,DateStrikingOff
9706,0400.928.813,REGO,,,3260,Assent,Assent,Dorpstraat 44A,Dorpstraat 44A,,,,
52653,0411.902.679,REGO,,,4154,Tinlot,Tinlot,Rue du Village,Rue du Village,2.0,,,
103449,0423.008.981,REGO,,,3668,Niel-bij-As,Niel-bij-As,Grotstraat,Grotstraat,7.0,,,


In [15]:
if "kbo" in datasets:
    kbo_sample["lg"] = np.where(kbo_sample.Zipcode.str[0].isin(["2", "3", "8", "9"]), "VL",
                       np.where(kbo_sample.Zipcode.str[0].isin(["4", "5", "6", "7"]), "FR",
                       np.where(kbo_sample.Zipcode.str[0:2].between("10", "14"), "FR", "VL")))
    
    

In [16]:
if "kbo" in datasets:
    kbo_sample["street"] = np.where(kbo_sample.lg == "FR", 
                                    kbo_sample.StreetFR.fillna(kbo_sample.StreetNL),
                                    kbo_sample.StreetNL.fillna(kbo_sample.StreetFR))

    kbo_sample["city"] = np.where(kbo_sample.lg == "FR", 
                                  kbo_sample.MunicipalityFR.fillna(kbo_sample.MunicipalityNL),
                                  kbo_sample.MunicipalityNL.fillna(kbo_sample.MunicipalityFR))


In [17]:
if "kbo" in datasets:
    kbo_sample = kbo_sample.rename(columns={"Zipcode": "postcode", "HouseNumber": "housenumber"})[["street", "housenumber", "postcode", "city"]]
    
    kbo_sample.to_csv(f"{datadir}/kbo_{sample_size}.csv.gz", index=False)

In [18]:
# kbo_sample

# RRN

In [19]:
if "rrn" in datasets:
    best_RN_mapping_fn = f"{datadir}/full/3_RRN_2023Q1.zip"
    download_if_nexist(f"{bosa_mapping_url}/3_RRN_2023Q1.zip", best_RN_mapping_fn)

In [20]:
if "rrn" in datasets:
    with ZipFile(best_RN_mapping_fn, 'r') as zipObj:
        recs = []
        for f in ["STEP131_RR_B_Result.txt", "STEP131_RR_F_Result.txt", "STEP131_RR_W_Result.txt"]:
            print(f)
            for row in tqdm(zipObj.open(f) ):
                recs.append(row)

        print(f"Got {len(recs)} records, take a sample")
        random.seed(seed)
        recs= random.sample(recs, int(sample_size*1.2))

        rrn_sample = pd.DataFrame([json.loads(r) for r in recs])

STEP131_RR_B_Result.txt


487317it [00:01, 284650.73it/s]


STEP131_RR_F_Result.txt


2882103it [00:13, 214664.71it/s]


STEP131_RR_W_Result.txt


1597585it [00:06, 237999.26it/s]


Got 4967005 records, take a sample


In [21]:
if "rrn" in datasets:
    rrn_sample = rrn_sample.drop_duplicates(subset=["Snl", "Sfr", "Sde", "P", "hs"])
    rrn_sample = rrn_sample.sample(sample_size, random_state=seed)

In [22]:
if "rrn" in datasets:
    url_all_cities = "https://services.socialsecurity.be/REST/referenceData/geography/v1/countries/150/cities?pageSize=0"

    download_if_nexist(url_all_cities, f"{datadir}/full/referencedata_cities.json")

    with open(f"{datadir}/full/referencedata_cities.json", encoding='utf-8') as f:
        refdata_cities = pd.DataFrame(json.load(f)["items"])

        refdata_cities["city"]  = np.where(refdata_cities.officialLanguage=="N", refdata_cities["cityName"].apply(lambda x: x["nl"]), 
                          np.where(refdata_cities.officialLanguage=="DF", refdata_cities["cityName"].apply(lambda x: x["de"]),
                                   refdata_cities["cityName"].apply(lambda x: x["fr"])))
        refdata_cities = refdata_cities[["cityNisCode", "city"]].dropna().drop_duplicates()
        refdata_cities["cityNisCode"] = refdata_cities.cityNisCode.astype(pd.Int64Dtype()).astype(str)


In [23]:
# rrn_sample[rrn_sample.street ==""].replace("", pd.NA).Sfr.fillna(rrn_sample.Sde)

In [24]:
if "rrn" in datasets:
    rrn_sample = rrn_sample.merge(refdata_cities.rename(columns={"cityNisCode":"idM_SRC" }))
    rrn_sample["street"] = np.where(rrn_sample.R == "F", rrn_sample.Snl, rrn_sample.replace("", pd.NA).Sfr.fillna(rrn_sample.Sde).fillna(rrn_sample.Snl))

    rrn_sample = rrn_sample.rename(columns={
                        "hs":      "housenumber",
                        "POri":    "postcode", 
                        })


In [25]:
if "rrn" in datasets:
    rrn_sample[["street", "housenumber", "postcode", "city"]].to_csv(f"{datadir}/rrn_{sample_size}.csv.gz", index=False)

# Repertoire

In [26]:
# Get a sample of enterprise number from KBO open data

# 
# datasets=["rep"]

In [27]:
if "rep" in datasets:
    with ZipFile(kbo_fn) as z:
        
       # open the csv file in the dataset
        with z.open("enterprise.csv") as f:

            cbe_list_full = pd.read_csv(f,
                                        usecols  = ["EnterpriseNumber", "TypeOfEnterprise", "JuridicalForm"], 
                                        dtype=str)
    cbe_list_full

In [28]:
# sample_size=100

In [43]:
if "rep" in datasets:
    # Get a sample of CBE/KBO numbers
    cbe_list_presample = cbe_list_full[(cbe_list_full.TypeOfEnterprise == "2") & 
                                       (cbe_list_full.JuridicalForm.isin(["015", "014"]))].sample(5*sample_size, random_state=seed)
    

In [32]:
import time
def call_repertorium(cbe_number):
    url = f"https://services.socialsecurity.be/REST/employer/identification/v6/employers/search"
    
    r = requests.get(url,
        params= { "enterpriseNumber": str(cbe_number).replace(".", "")})
    
    return json.loads(r.text)

def get_repertorium_address(cbe_number):
    r = call_repertorium(cbe_number)
    
    time.sleep(0.05)
    if "identity" in r and "address" in r["identity"]:
        return r["identity"]["address"]
    else:
        return None
# get_repertorium_address(864279008)

In [45]:
# cbe_list_presample = cbe_list_presample.iloc[10000:]

In [46]:
# Call repertorium web service based on CBE list, until we find "sample_size*1.2" (to be sure to find sample_size unique addresses) distinct addresses
if "rep" in datasets:
    rec_with_addr=[]
    with tqdm(total=sample_size) as pbar:
        for i, rec in cbe_list_presample.reset_index(drop=True).iterrows():
            addr = get_repertorium_address(rec.EnterpriseNumber)
            if addr is not None:
                addr ={k: addr[k] for k in ['streetName','houseNumber', 'postCode', 'municipalityName'] if k in addr}
                if not addr in rec_with_addr :
                    #rec_with_addr.append( (rec.EnterpriseNumber, addr))
                    rec_with_addr.append(  addr)
                    pbar.update()
                else: 
                    print("skipping duplicated address")
            if len(rec_with_addr) >= sample_size:
                break

        rep_sample = pd.DataFrame(rec_with_addr)
        rep_sample

  0%|          | 48/10000 [00:23<1:08:57,  2.41it/s]

skipping duplicated address


  1%|          | 80/10000 [00:42<51:56,  3.18it/s]  

skipping duplicated address


  1%|          | 92/10000 [00:48<1:51:10,  1.49it/s]

skipping duplicated address


  1%|▏         | 128/10000 [01:04<57:55,  2.84it/s]  

skipping duplicated address


  2%|▏         | 166/10000 [01:22<1:09:41,  2.35it/s]

skipping duplicated address


  2%|▏         | 176/10000 [01:26<1:03:36,  2.57it/s]

skipping duplicated address


  2%|▏         | 241/10000 [02:00<1:43:41,  1.57it/s]

skipping duplicated address


  3%|▎         | 274/10000 [02:18<1:10:47,  2.29it/s]

skipping duplicated address


  3%|▎         | 287/10000 [02:26<1:45:55,  1.53it/s]

skipping duplicated address


  3%|▎         | 296/10000 [02:34<1:46:57,  1.51it/s]

skipping duplicated address


  3%|▎         | 299/10000 [02:36<1:42:15,  1.58it/s]

skipping duplicated address


  4%|▎         | 350/10000 [03:04<1:15:20,  2.13it/s]

skipping duplicated address


  4%|▍         | 377/10000 [03:14<51:06,  3.14it/s]  

skipping duplicated address


  4%|▍         | 444/10000 [03:47<1:01:53,  2.57it/s]

skipping duplicated address


  5%|▍         | 479/10000 [04:05<1:16:04,  2.09it/s]

skipping duplicated address


  5%|▍         | 487/10000 [04:09<1:12:52,  2.18it/s]

skipping duplicated address


  5%|▌         | 533/10000 [04:29<44:17,  3.56it/s]  

skipping duplicated address


  5%|▌         | 538/10000 [04:33<1:10:51,  2.23it/s]

skipping duplicated address


  6%|▌         | 589/10000 [04:53<57:23,  2.73it/s]  

skipping duplicated address


  6%|▌         | 619/10000 [05:09<1:43:53,  1.50it/s]

skipping duplicated address


  6%|▋         | 635/10000 [05:18<1:21:48,  1.91it/s]

skipping duplicated address


  6%|▋         | 636/10000 [05:19<1:09:16,  2.25it/s]

skipping duplicated address


  6%|▋         | 638/10000 [05:19<1:04:53,  2.40it/s]

skipping duplicated address


  7%|▋         | 660/10000 [05:30<1:30:49,  1.71it/s]

skipping duplicated address


  7%|▋         | 666/10000 [05:34<1:21:04,  1.92it/s]

skipping duplicated address


  7%|▋         | 679/10000 [05:42<1:43:35,  1.50it/s]

skipping duplicated address


  7%|▋         | 691/10000 [05:46<50:28,  3.07it/s]  

skipping duplicated address


  7%|▋         | 710/10000 [05:57<1:18:08,  1.98it/s]

skipping duplicated address


  7%|▋         | 730/10000 [06:13<2:27:08,  1.05it/s]

skipping duplicated address


  9%|▉         | 878/10000 [07:17<54:49,  2.77it/s]  

skipping duplicated address


  9%|▉         | 940/10000 [07:41<1:03:11,  2.39it/s]

skipping duplicated address


 10%|▉         | 974/10000 [07:54<58:13,  2.58it/s]  

skipping duplicated address


 10%|█         | 1008/10000 [08:08<1:13:18,  2.04it/s]

skipping duplicated address


 10%|█         | 1015/10000 [08:11<1:05:08,  2.30it/s]

skipping duplicated address


 11%|█         | 1055/10000 [08:29<1:08:33,  2.17it/s]

skipping duplicated address


 11%|█▏        | 1131/10000 [08:58<57:36,  2.57it/s]  

skipping duplicated address


 11%|█▏        | 1139/10000 [09:03<1:14:21,  1.99it/s]

skipping duplicated address


 12%|█▏        | 1178/10000 [09:22<1:05:50,  2.23it/s]

skipping duplicated address


 12%|█▏        | 1187/10000 [09:24<40:56,  3.59it/s]  

skipping duplicated address
skipping duplicated address


 12%|█▏        | 1188/10000 [09:27<2:26:00,  1.01it/s]

skipping duplicated address


 12%|█▏        | 1205/10000 [09:35<41:56,  3.50it/s]  

skipping duplicated address


 13%|█▎        | 1252/10000 [09:57<1:37:04,  1.50it/s]

skipping duplicated address


 13%|█▎        | 1255/10000 [10:00<2:00:03,  1.21it/s]

skipping duplicated address


 13%|█▎        | 1274/10000 [10:07<59:38,  2.44it/s]  

skipping duplicated address


 13%|█▎        | 1307/10000 [10:18<32:50,  4.41it/s]  

skipping duplicated address


 13%|█▎        | 1331/10000 [10:29<55:53,  2.59it/s]  

skipping duplicated address


 14%|█▎        | 1369/10000 [10:44<48:46,  2.95it/s]  

skipping duplicated address


 14%|█▍        | 1380/10000 [10:48<40:57,  3.51it/s]  

skipping duplicated address


 14%|█▍        | 1423/10000 [11:08<45:33,  3.14it/s]  

skipping duplicated address


 14%|█▍        | 1436/10000 [11:16<1:41:57,  1.40it/s]

skipping duplicated address


 14%|█▍        | 1440/10000 [11:18<1:03:54,  2.23it/s]

skipping duplicated address


 15%|█▌        | 1516/10000 [11:49<41:29,  3.41it/s]  

skipping duplicated address


 15%|█▌        | 1526/10000 [11:56<1:47:50,  1.31it/s]

skipping duplicated address


 15%|█▌        | 1543/10000 [12:02<47:22,  2.98it/s]  

skipping duplicated address


 16%|█▌        | 1556/10000 [12:07<36:27,  3.86it/s]  

skipping duplicated address


 16%|█▌        | 1578/10000 [12:16<49:31,  2.83it/s]  

skipping duplicated address


 16%|█▌        | 1585/10000 [12:19<1:02:15,  2.25it/s]

skipping duplicated address


 16%|█▌        | 1594/10000 [12:25<54:43,  2.56it/s]  

skipping duplicated address


 16%|█▌        | 1598/10000 [12:27<52:47,  2.65it/s]  

skipping duplicated address


 16%|█▌        | 1602/10000 [12:29<1:00:40,  2.31it/s]

skipping duplicated address


 17%|█▋        | 1690/10000 [13:08<2:23:44,  1.04s/it]

skipping duplicated address


 17%|█▋        | 1693/10000 [13:11<2:06:31,  1.09it/s]

skipping duplicated address


 17%|█▋        | 1717/10000 [13:22<58:59,  2.34it/s]  

skipping duplicated address


 17%|█▋        | 1739/10000 [13:31<44:04,  3.12it/s]  

skipping duplicated address


 17%|█▋        | 1748/10000 [13:35<1:02:44,  2.19it/s]

skipping duplicated address


 18%|█▊        | 1750/10000 [13:36<1:14:04,  1.86it/s]

skipping duplicated address


 18%|█▊        | 1766/10000 [13:44<48:16,  2.84it/s]  

skipping duplicated address


 18%|█▊        | 1804/10000 [14:01<1:17:22,  1.77it/s]

skipping duplicated address


 18%|█▊        | 1819/10000 [14:06<43:24,  3.14it/s]  

skipping duplicated address


 18%|█▊        | 1828/10000 [14:11<39:36,  3.44it/s]  

skipping duplicated address


 18%|█▊        | 1831/10000 [14:12<46:16,  2.94it/s]  

skipping duplicated address


 18%|█▊        | 1834/10000 [14:13<56:59,  2.39it/s]  

skipping duplicated address


 18%|█▊        | 1848/10000 [14:19<44:44,  3.04it/s]  

skipping duplicated address


 19%|█▉        | 1905/10000 [14:39<58:39,  2.30it/s]  

skipping duplicated address


 19%|█▉        | 1931/10000 [14:51<43:04,  3.12it/s]  

skipping duplicated address


 19%|█▉        | 1937/10000 [14:53<37:50,  3.55it/s]

skipping duplicated address


 19%|█▉        | 1944/10000 [14:57<58:28,  2.30it/s]  

skipping duplicated address


 19%|█▉        | 1946/10000 [14:58<53:04,  2.53it/s]  

skipping duplicated address


 20%|█▉        | 1975/10000 [15:09<44:00,  3.04it/s]  

skipping duplicated address


 20%|█▉        | 1976/10000 [15:10<1:03:13,  2.12it/s]

skipping duplicated address
skipping duplicated address


 20%|█▉        | 1990/10000 [15:16<52:31,  2.54it/s]  

skipping duplicated address


 20%|██        | 2011/10000 [15:28<1:28:32,  1.50it/s]

skipping duplicated address


 20%|██        | 2018/10000 [15:31<56:59,  2.33it/s]  

skipping duplicated address


 20%|██        | 2022/10000 [15:33<47:37,  2.79it/s]  

skipping duplicated address


 20%|██        | 2029/10000 [15:37<1:07:45,  1.96it/s]

skipping duplicated address


 21%|██        | 2051/10000 [15:47<49:33,  2.67it/s]  

skipping duplicated address


 21%|██        | 2052/10000 [15:48<1:07:52,  1.95it/s]

skipping duplicated address


 21%|██        | 2080/10000 [15:59<1:07:28,  1.96it/s]

skipping duplicated address


 21%|██        | 2121/10000 [16:14<33:36,  3.91it/s]  

skipping duplicated address


 21%|██▏       | 2127/10000 [16:16<57:00,  2.30it/s]

skipping duplicated address


 21%|██▏       | 2134/10000 [16:19<36:39,  3.58it/s]  

skipping duplicated address


 22%|██▏       | 2158/10000 [16:27<46:40,  2.80it/s]  

skipping duplicated address


 22%|██▏       | 2181/10000 [16:38<52:15,  2.49it/s]  

skipping duplicated address


 22%|██▏       | 2213/10000 [16:52<42:26,  3.06it/s]  

skipping duplicated address


 22%|██▏       | 2229/10000 [17:00<1:08:17,  1.90it/s]

skipping duplicated address


 22%|██▏       | 2246/10000 [17:09<51:33,  2.51it/s]  

skipping duplicated address


 22%|██▏       | 2247/10000 [17:09<56:24,  2.29it/s]

skipping duplicated address
skipping duplicated address


 22%|██▏       | 2249/10000 [17:11<1:06:26,  1.94it/s]

skipping duplicated address


 23%|██▎       | 2256/10000 [17:13<40:01,  3.23it/s]  

skipping duplicated address


 23%|██▎       | 2263/10000 [17:18<1:13:27,  1.76it/s]

skipping duplicated address


 23%|██▎       | 2271/10000 [17:22<51:55,  2.48it/s]  

skipping duplicated address


 23%|██▎       | 2299/10000 [17:34<44:08,  2.91it/s]  

skipping duplicated address


 23%|██▎       | 2320/10000 [17:42<36:19,  3.52it/s]  

skipping duplicated address


 23%|██▎       | 2322/10000 [17:43<45:25,  2.82it/s]

skipping duplicated address


 24%|██▎       | 2355/10000 [17:56<53:16,  2.39it/s]  

skipping duplicated address


 24%|██▎       | 2374/10000 [18:05<57:21,  2.22it/s]  

skipping duplicated address


 24%|██▍       | 2386/10000 [18:10<41:50,  3.03it/s]  

skipping duplicated address


 24%|██▍       | 2392/10000 [18:12<51:35,  2.46it/s]  

skipping duplicated address


 24%|██▍       | 2406/10000 [18:17<33:49,  3.74it/s]  

skipping duplicated address


 24%|██▍       | 2408/10000 [18:18<50:54,  2.49it/s]

skipping duplicated address


 24%|██▍       | 2432/10000 [18:28<40:31,  3.11it/s]  

skipping duplicated address


 25%|██▍       | 2460/10000 [18:41<1:12:31,  1.73it/s]

skipping duplicated address


 25%|██▍       | 2466/10000 [18:44<53:40,  2.34it/s]  

skipping duplicated address


 25%|██▌       | 2501/10000 [19:03<54:57,  2.27it/s]  

skipping duplicated address


 25%|██▌       | 2509/10000 [19:05<36:55,  3.38it/s]

skipping duplicated address


 25%|██▌       | 2539/10000 [19:18<54:09,  2.30it/s]  

skipping duplicated address


 25%|██▌       | 2548/10000 [19:21<39:16,  3.16it/s]  

skipping duplicated address
skipping duplicated address


 26%|██▌       | 2585/10000 [19:41<1:00:33,  2.04it/s]

skipping duplicated address


 26%|██▋       | 2640/10000 [20:04<45:31,  2.69it/s]  

skipping duplicated address


 26%|██▋       | 2645/10000 [20:06<47:14,  2.60it/s]

skipping duplicated address


 26%|██▋       | 2646/10000 [20:07<1:15:15,  1.63it/s]

skipping duplicated address


 27%|██▋       | 2668/10000 [20:18<52:24,  2.33it/s]  

skipping duplicated address


 27%|██▋       | 2685/10000 [20:27<1:14:13,  1.64it/s]

skipping duplicated address


 27%|██▋       | 2701/10000 [20:32<40:25,  3.01it/s]  

skipping duplicated address


 27%|██▋       | 2717/10000 [20:38<37:38,  3.23it/s]  

skipping duplicated address


 28%|██▊       | 2777/10000 [21:05<48:57,  2.46it/s]  

skipping duplicated address


 28%|██▊       | 2782/10000 [21:07<46:15,  2.60it/s]

skipping duplicated address


 28%|██▊       | 2792/10000 [21:11<36:30,  3.29it/s]  

skipping duplicated address


 28%|██▊       | 2813/10000 [21:19<52:54,  2.26it/s]  

skipping duplicated address


 28%|██▊       | 2826/10000 [21:26<1:39:51,  1.20it/s]

skipping duplicated address


 29%|██▊       | 2871/10000 [21:47<35:54,  3.31it/s]  

skipping duplicated address


 29%|██▉       | 2881/10000 [21:53<1:21:13,  1.46it/s]

skipping duplicated address


 29%|██▉       | 2901/10000 [22:02<38:48,  3.05it/s]  

skipping duplicated address


 29%|██▉       | 2918/10000 [22:11<57:49,  2.04it/s]  

skipping duplicated address


 29%|██▉       | 2927/10000 [22:14<54:27,  2.16it/s]  

skipping duplicated address


 30%|██▉       | 2962/10000 [22:29<35:25,  3.31it/s]  

skipping duplicated address


 30%|██▉       | 2988/10000 [22:40<39:35,  2.95it/s]  

skipping duplicated address


 30%|███       | 3014/10000 [22:52<1:04:12,  1.81it/s]

skipping duplicated address


 30%|███       | 3020/10000 [22:55<42:27,  2.74it/s]  

skipping duplicated address


 30%|███       | 3035/10000 [23:02<35:52,  3.24it/s]  

skipping duplicated address


 30%|███       | 3040/10000 [23:04<54:46,  2.12it/s]  

skipping duplicated address


 31%|███       | 3062/10000 [23:12<28:41,  4.03it/s]  

skipping duplicated address


 31%|███       | 3114/10000 [23:35<49:38,  2.31it/s]  

skipping duplicated address
skipping duplicated address


 31%|███▏      | 3126/10000 [23:39<38:23,  2.98it/s]

skipping duplicated address


 31%|███▏      | 3134/10000 [23:42<40:54,  2.80it/s]

skipping duplicated address


 32%|███▏      | 3160/10000 [23:53<47:50,  2.38it/s]  

skipping duplicated address


 32%|███▏      | 3180/10000 [24:03<1:01:42,  1.84it/s]

skipping duplicated address


 32%|███▏      | 3197/10000 [24:10<47:08,  2.41it/s]  

skipping duplicated address
skipping duplicated address


 32%|███▏      | 3243/10000 [24:28<1:04:06,  1.76it/s]

skipping duplicated address


 33%|███▎      | 3251/10000 [24:32<54:35,  2.06it/s]  

skipping duplicated address


 33%|███▎      | 3258/10000 [24:36<1:01:44,  1.82it/s]

skipping duplicated address


 33%|███▎      | 3264/10000 [24:38<42:32,  2.64it/s]  

skipping duplicated address


 33%|███▎      | 3282/10000 [24:48<50:41,  2.21it/s]  

skipping duplicated address


 33%|███▎      | 3305/10000 [24:59<49:22,  2.26it/s]  

skipping duplicated address


 33%|███▎      | 3307/10000 [25:00<48:33,  2.30it/s]

skipping duplicated address


 34%|███▎      | 3352/10000 [25:17<52:02,  2.13it/s]  

skipping duplicated address


 34%|███▍      | 3387/10000 [25:32<40:11,  2.74it/s]  

skipping duplicated address


 34%|███▍      | 3423/10000 [25:47<32:39,  3.36it/s]  

skipping duplicated address


 34%|███▍      | 3433/10000 [25:52<42:46,  2.56it/s]  

skipping duplicated address


 35%|███▍      | 3462/10000 [26:04<36:52,  2.96it/s]  

skipping duplicated address


 35%|███▍      | 3473/10000 [26:10<53:31,  2.03it/s]  

skipping duplicated address


 35%|███▍      | 3490/10000 [26:18<54:31,  1.99it/s]  

skipping duplicated address


 35%|███▌      | 3502/10000 [26:25<53:50,  2.01it/s]  

skipping duplicated address


 35%|███▌      | 3509/10000 [26:29<1:17:04,  1.40it/s]

skipping duplicated address


 36%|███▌      | 3550/10000 [26:46<24:42,  4.35it/s]  

skipping duplicated address


 36%|███▌      | 3565/10000 [26:52<37:20,  2.87it/s]

skipping duplicated address


 36%|███▌      | 3615/10000 [27:13<27:13,  3.91it/s]  

skipping duplicated address


 36%|███▋      | 3638/10000 [27:23<50:18,  2.11it/s]  

skipping duplicated address


 36%|███▋      | 3644/10000 [27:26<43:29,  2.44it/s]  

skipping duplicated address


 37%|███▋      | 3660/10000 [27:32<50:44,  2.08it/s]

skipping duplicated address


 37%|███▋      | 3664/10000 [27:35<58:51,  1.79it/s]  

skipping duplicated address


 37%|███▋      | 3671/10000 [27:38<46:04,  2.29it/s]  

skipping duplicated address


 37%|███▋      | 3673/10000 [27:39<1:01:48,  1.71it/s]

skipping duplicated address


 37%|███▋      | 3678/10000 [27:41<58:53,  1.79it/s]  

skipping duplicated address


 37%|███▋      | 3693/10000 [27:48<34:08,  3.08it/s]

skipping duplicated address


 37%|███▋      | 3696/10000 [27:50<1:11:20,  1.47it/s]

skipping duplicated address


 37%|███▋      | 3710/10000 [27:55<35:17,  2.97it/s]  

skipping duplicated address


 38%|███▊      | 3790/10000 [28:31<1:06:34,  1.55it/s]

skipping duplicated address


 38%|███▊      | 3808/10000 [28:40<46:23,  2.22it/s]  

skipping duplicated address
skipping duplicated address


 38%|███▊      | 3840/10000 [28:54<34:46,  2.95it/s]  

skipping duplicated address


 39%|███▊      | 3866/10000 [29:03<40:16,  2.54it/s]

skipping duplicated address


 40%|███▉      | 3952/10000 [29:39<49:33,  2.03it/s]  

skipping duplicated address


 40%|████      | 4032/10000 [30:12<31:43,  3.14it/s]  

skipping duplicated address


 40%|████      | 4038/10000 [30:15<43:07,  2.30it/s]

skipping duplicated address


 41%|████      | 4075/10000 [30:36<55:22,  1.78it/s]  

skipping duplicated address


 41%|████      | 4086/10000 [30:41<1:12:23,  1.36it/s]

skipping duplicated address


 41%|████      | 4123/10000 [30:58<30:02,  3.26it/s]  

skipping duplicated address


 41%|████      | 4124/10000 [30:59<45:17,  2.16it/s]

skipping duplicated address


 41%|████▏     | 4137/10000 [31:04<37:06,  2.63it/s]  

skipping duplicated address


 42%|████▏     | 4158/10000 [31:14<34:59,  2.78it/s]  

skipping duplicated address


 42%|████▏     | 4163/10000 [31:18<49:14,  1.98it/s]  

skipping duplicated address


 42%|████▏     | 4164/10000 [31:20<1:10:33,  1.38it/s]

skipping duplicated address


 42%|████▏     | 4183/10000 [31:31<44:59,  2.15it/s]  

skipping duplicated address


 42%|████▏     | 4187/10000 [31:33<43:32,  2.23it/s]  

skipping duplicated address


 42%|████▏     | 4209/10000 [31:43<23:25,  4.12it/s]  

skipping duplicated address


 42%|████▏     | 4212/10000 [31:44<35:12,  2.74it/s]

skipping duplicated address


 42%|████▏     | 4216/10000 [31:47<47:05,  2.05it/s]  

skipping duplicated address


 42%|████▏     | 4235/10000 [31:55<32:41,  2.94it/s]  

skipping duplicated address


 43%|████▎     | 4281/10000 [32:14<37:40,  2.53it/s]  

skipping duplicated address


 43%|████▎     | 4311/10000 [32:26<26:27,  3.58it/s]  

skipping duplicated address


 43%|████▎     | 4312/10000 [32:27<34:13,  2.77it/s]

skipping duplicated address


 43%|████▎     | 4332/10000 [32:38<33:56,  2.78it/s]  

skipping duplicated address


 43%|████▎     | 4334/10000 [32:39<54:17,  1.74it/s]

skipping duplicated address
skipping duplicated address


 44%|████▎     | 4355/10000 [32:48<36:25,  2.58it/s]

skipping duplicated address


 44%|████▎     | 4365/10000 [32:54<51:41,  1.82it/s]  

skipping duplicated address


 44%|████▎     | 4367/10000 [32:54<43:40,  2.15it/s]

skipping duplicated address


 44%|████▎     | 4374/10000 [32:59<41:26,  2.26it/s]  

skipping duplicated address


 44%|████▍     | 4375/10000 [32:59<47:19,  1.98it/s]

skipping duplicated address


 44%|████▍     | 4440/10000 [33:28<50:13,  1.84it/s]  

skipping duplicated address


 44%|████▍     | 4444/10000 [33:31<57:21,  1.61it/s]  

skipping duplicated address


 44%|████▍     | 4446/10000 [33:32<1:00:15,  1.54it/s]

skipping duplicated address


 45%|████▍     | 4457/10000 [33:36<30:29,  3.03it/s]  

skipping duplicated address


 45%|████▍     | 4462/10000 [33:39<41:55,  2.20it/s]

skipping duplicated address


 45%|████▍     | 4470/10000 [33:46<1:15:16,  1.22it/s]

skipping duplicated address


 45%|████▍     | 4494/10000 [33:59<37:02,  2.48it/s]  

skipping duplicated address


 45%|████▌     | 4517/10000 [34:11<38:44,  2.36it/s]  

skipping duplicated address


 45%|████▌     | 4520/10000 [34:12<44:10,  2.07it/s]

skipping duplicated address


 45%|████▌     | 4546/10000 [34:22<36:57,  2.46it/s]

skipping duplicated address


 46%|████▌     | 4606/10000 [34:48<47:00,  1.91it/s]  

skipping duplicated address


 46%|████▌     | 4615/10000 [34:53<54:53,  1.63it/s]  

skipping duplicated address


 46%|████▋     | 4635/10000 [35:03<45:37,  1.96it/s]  

skipping duplicated address


 46%|████▋     | 4639/10000 [35:07<49:47,  1.79it/s]  

skipping duplicated address


 47%|████▋     | 4679/10000 [35:27<35:08,  2.52it/s]  

skipping duplicated address


 47%|████▋     | 4682/10000 [35:28<33:00,  2.69it/s]

skipping duplicated address


 47%|████▋     | 4685/10000 [35:29<28:20,  3.12it/s]

skipping duplicated address


 47%|████▋     | 4720/10000 [35:44<30:57,  2.84it/s]

skipping duplicated address


 48%|████▊     | 4762/10000 [36:02<26:16,  3.32it/s]  

skipping duplicated address


 48%|████▊     | 4765/10000 [36:04<39:58,  2.18it/s]

skipping duplicated address


 48%|████▊     | 4799/10000 [36:22<29:16,  2.96it/s]  

skipping duplicated address


 48%|████▊     | 4828/10000 [36:35<53:40,  1.61it/s]

skipping duplicated address


 48%|████▊     | 4832/10000 [36:38<53:12,  1.62it/s]  

skipping duplicated address


 49%|████▊     | 4853/10000 [36:48<30:04,  2.85it/s]  

skipping duplicated address


 49%|████▊     | 4867/10000 [36:56<48:48,  1.75it/s]  

skipping duplicated address


 49%|████▉     | 4878/10000 [37:02<30:59,  2.75it/s]  

skipping duplicated address


 49%|████▉     | 4905/10000 [37:15<27:36,  3.08it/s]  

skipping duplicated address


 49%|████▉     | 4913/10000 [37:18<25:39,  3.30it/s]

skipping duplicated address


 50%|████▉     | 4955/10000 [37:39<53:52,  1.56it/s]  

skipping duplicated address


 50%|████▉     | 4964/10000 [37:43<32:36,  2.57it/s]

skipping duplicated address


 50%|████▉     | 4980/10000 [37:51<27:11,  3.08it/s]

skipping duplicated address


 50%|████▉     | 4983/10000 [37:52<34:23,  2.43it/s]

skipping duplicated address
skipping duplicated address


 51%|█████     | 5057/10000 [38:28<33:57,  2.43it/s]  

skipping duplicated address


 51%|█████     | 5063/10000 [38:30<28:42,  2.87it/s]

skipping duplicated address
skipping duplicated address


 51%|█████     | 5074/10000 [38:36<39:07,  2.10it/s]

skipping duplicated address


 51%|█████     | 5082/10000 [38:39<29:28,  2.78it/s]

skipping duplicated address
skipping duplicated address


 51%|█████     | 5103/10000 [38:49<49:40,  1.64it/s]

skipping duplicated address


 51%|█████     | 5108/10000 [38:51<33:32,  2.43it/s]

skipping duplicated address


 51%|█████     | 5111/10000 [38:54<50:48,  1.60it/s]  

skipping duplicated address


 51%|█████▏    | 5136/10000 [39:09<46:10,  1.76it/s]  

skipping duplicated address


 51%|█████▏    | 5139/10000 [39:11<37:56,  2.14it/s]

skipping duplicated address


 52%|█████▏    | 5151/10000 [39:16<29:26,  2.74it/s]

skipping duplicated address


 52%|█████▏    | 5162/10000 [39:23<1:30:46,  1.13s/it]

skipping duplicated address


 52%|█████▏    | 5167/10000 [39:26<54:03,  1.49it/s]  

skipping duplicated address
skipping duplicated address


 52%|█████▏    | 5184/10000 [39:36<41:44,  1.92it/s]  

skipping duplicated address


 52%|█████▏    | 5194/10000 [39:42<31:57,  2.51it/s]  

skipping duplicated address


 52%|█████▏    | 5199/10000 [39:44<28:55,  2.77it/s]

skipping duplicated address


 52%|█████▏    | 5200/10000 [39:45<45:47,  1.75it/s]

skipping duplicated address


 52%|█████▏    | 5223/10000 [39:55<42:02,  1.89it/s]

skipping duplicated address


 52%|█████▏    | 5248/10000 [40:07<24:32,  3.23it/s]  

skipping duplicated address


 53%|█████▎    | 5253/10000 [40:09<30:57,  2.56it/s]

skipping duplicated address


 53%|█████▎    | 5259/10000 [40:12<27:22,  2.89it/s]

skipping duplicated address


 53%|█████▎    | 5318/10000 [40:37<29:53,  2.61it/s]

skipping duplicated address


 53%|█████▎    | 5320/10000 [40:37<27:01,  2.89it/s]

skipping duplicated address


 53%|█████▎    | 5321/10000 [40:38<45:21,  1.72it/s]

skipping duplicated address


 53%|█████▎    | 5337/10000 [40:45<26:45,  2.90it/s]  

skipping duplicated address


 54%|█████▍    | 5379/10000 [41:04<37:21,  2.06it/s]  

skipping duplicated address


 54%|█████▍    | 5383/10000 [41:05<30:56,  2.49it/s]

skipping duplicated address
skipping duplicated address


 54%|█████▍    | 5413/10000 [41:19<28:22,  2.69it/s]  

skipping duplicated address


 54%|█████▍    | 5437/10000 [41:31<51:28,  1.48it/s]  

skipping duplicated address


 55%|█████▍    | 5457/10000 [41:42<39:13,  1.93it/s]

skipping duplicated address


 55%|█████▌    | 5504/10000 [42:08<35:40,  2.10it/s]  

skipping duplicated address


 55%|█████▌    | 5548/10000 [42:29<28:32,  2.60it/s]

skipping duplicated address


 55%|█████▌    | 5549/10000 [42:31<54:00,  1.37it/s]

skipping duplicated address


 56%|█████▌    | 5570/10000 [42:38<24:33,  3.01it/s]

skipping duplicated address


 56%|█████▌    | 5584/10000 [42:44<28:23,  2.59it/s]

skipping duplicated address


 56%|█████▌    | 5592/10000 [42:49<28:26,  2.58it/s]

skipping duplicated address


 56%|█████▌    | 5599/10000 [42:52<26:46,  2.74it/s]

skipping duplicated address


 56%|█████▋    | 5639/10000 [43:11<43:32,  1.67it/s]

skipping duplicated address


 56%|█████▋    | 5648/10000 [43:14<33:18,  2.18it/s]


In [48]:

# rep_sample

In [49]:
# rep_sample = pd.DataFrame(rec_with_addr)
# rep_sample
# rec_with_addr

In [50]:
def get_lg_pref(item, lg_order):
    for lg in lg_order:
        if lg in item:
            return item[lg]
    return None

if "rep" in datasets:
    rep_sample = rep_sample.rename(columns={"postCode": "postcode", 
                                            "houseNumber": "housenumber",
                                           })

    rep_sample["lg"] = np.where(rep_sample.postcode.str[0].isin(["2", "3", "8", "9"]), "VL",
                       np.where(rep_sample.postcode.str[0].isin(["4", "5", "6", "7"]), "FR",
                       np.where(rep_sample.postcode.str[0:2].between("10", "14"), "FR", "VL")))

    rep_sample["street"] = np.where(rep_sample.lg == "FR", 
                                    rep_sample.streetName.apply(lambda r : get_lg_pref(r, ["fr", "nl", "de"])),
                                    rep_sample.streetName.apply(lambda r : get_lg_pref(r, ["nl", "fr", "de"])))# )


    rep_sample["city"] = np.where(rep_sample.lg == "FR", 
                                  rep_sample.municipalityName.apply(lambda r : get_lg_pref(r, ["fr", "nl", "de"])),
                                  rep_sample.municipalityName.apply(lambda r : get_lg_pref(r, ["nl", "fr", "de"])))



In [51]:
if "rep" in datasets:
    rep_sample[["street", "housenumber", "postcode", "city"]].to_csv(f"{datadir}/rep_{sample_size}.csv.gz", index=False)

# Resto

In [None]:
if "resto" in datasets:
    resto_datadir = f"{datadir}/full/resto"

    resto_full = pd.concat([pd.read_csv(f) for f in glob.glob(f"{resto_datadir}/*.csv.gz")])
    resto_full

In [None]:
if "resto" in datasets:
    resto_sample = resto_full.drop("name", axis=1).drop_duplicates().sample(sample_size, random_state=seed)[["street", "housenumber", "postcode", "city"]]
                               
    resto_sample.to_csv(f"{datadir}/resto_{sample_size}.csv.gz", index=False)

In [1]:
# resto_sample

# NGI 

In [17]:
import geopandas as gpd

In [38]:
ngi_full = pd.read_csv(f"{datadir}/ign_geocoder.csv", sep=';', encoding='latin1')

In [39]:
#.input_request.nunique()
ngi_full = ngi_full.drop_duplicates(subset=["input_request", "ouput_street(ref,cont_id,street,score)", "geometry"])

In [67]:
import shapely
p = shapely.geometry.Point( 647305.1605065995, 670243.7006171012, 0)
gpd.GeoSeries([p]).set_crs("epsg:3812").to_crs("epsg:4326")

0    POINT Z (4.33050 50.84260 0.00000)
dtype: geometry

In [68]:
ngi_full = gpd.GeoDataFrame(ngi_full)
ngi_full["geom"] = gpd.GeoSeries.from_wkt(ngi_full.geometry.fillna(""), crs="epsg:3812")
ngi_full = ngi_full.set_geometry("geom")
ngi_full = ngi_full.to_crs("epsg:4326")

In [69]:
ngi_full[ngi_full["ouput_street(ref,cont_id,street,score)"].fillna("").str.contains("BXL")]#.plot()

Unnamed: 0,input_request,"ouput_city(ref,cont_id,city,score)","ouput_street(ref,cont_id,street,score)",search_time(sec),geometry,geom
1343,rue Ropsy Chaudron 7 1070 Anderlecht,,"[('BXLSTR---1370---2', 1, 'RUEROPSYCHAUDRON', ...",0.028455,POINT Z (647305.1605065995 670243.7006171012 0...,POINT Z (4.33050 50.84260 0.00000)
1348,rue Eloy 114 1070 Anderlecht,,"[('BXLSTR---2152---2', 1, 'RUEELOY', 1.0)]",0.040785,POINT Z (646894.220324351 669500.8136294986 0),POINT Z (4.32467 50.83592 0.00000)
1352,rue Georges Moreau 107 1070 Anderlecht,,"[('BXLSTR---4578---2', 1, 'RUEGEORGESMOREAU', ...",0.030206,POINT Z (646857.098786811 669348.3704329199 0),POINT Z (4.32414 50.83454 0.00000)
1354,rue de Douvres 80 1070 Anderlecht,,"[('BXLSTR---265---2', 1, 'RUEDEDOUVRES', 1.0)]",0.055286,POINT Z (646070.3675027142 669663.9241506008 0...,POINT Z (4.31297 50.83738 0.00000)
1360,rue Van Soust 78 1070 Anderlecht,,"[('BXLSTR---3772---2', 1, 'RUEVANSOUST', 1.0)]",0.023925,POINT Z (645894.6817438435 670731.1856306997 0),POINT Z (4.31046 50.84697 0.00000)
...,...,...,...,...,...,...
2057,rue Jef Devos - Jef Devostraat 55 1190 Forest,,"[('BXLSTR---2582---2', 1, 'RUEJEFDEVOS', 0.55)]",0.051182,POINT Z (647190.9663381542 668012.0429690753 0...,POINT Z (4.32889 50.82253 0.00000)
2060,avenue Reine Marie Henriette - Koninging Maria...,,"[('BXLSTR---4793---2', 1, 'AVENUEREINEMARIEHEN...",0.018543,POINT Z (647529.4125021889 667929.1129648751 0...,POINT Z (4.33369 50.82179 0.00000)
2061,avenue Reine Marie Henriette - Koninging Maria...,,"[('BXLSTR---4793---2', 1, 'AVENUEREINEMARIEHEN...",0.019352,POINT Z (647529.4125021889 667929.1129648751 0...,POINT Z (4.33369 50.82179 0.00000)
2062,rue de Fierlant - de Fierlantstraat 35 1190 Fo...,,"[('BXLSTR---3966---2', 1, 'DEFIERLANTSTRAAT', ...",0.021289,POINT Z (647345.5062939598 668505.537412102 0)...,POINT Z (4.33108 50.82697 0.00000)


In [42]:
ngi_full.loc[8].geometry

'POINT Z (652875.8414014473 712078.2757578157 0) POINT Z (652875.8414014473 712078.2757578157 0) POINT Z (652875.8414014473 712078.2757578157 0) '

In [44]:
print(ngi_full.loc[8].geom)

POINT Z (652875.8414014473 712078.2757578157 0)


In [35]:
ngi_full[ngi_full["ouput_city(ref,cont_id,city,score)"].notnull()]

Unnamed: 0,input_request,"ouput_city(ref,cont_id,city,score)","ouput_street(ref,cont_id,street,score)",search_time(sec),geometry


In [28]:
ngi_full["ouput_street(ref,cont_id,street,score)"].iloc[0]

"[('VLDSTR---287---2015-05-28T18:40:37.400', 1, 'BEGIJNENVEST', 1.0)]"

In [71]:
from geopy.location import Location

In [None]:
ngi_full["locationLocation(placename, (latitude, longitude), feature)

In [81]:
ngi_full["location"] = ngi_full.apply(lambda row: Location(row.input_request, (row.geom.y, row.geom.x) if row.geom else (0,0), row), axis=1)

In [83]:
ngi_full.iloc[0]

input_request                                                Begijnenvest 35 2000 Antwerpen
ouput_city(ref,cont_id,city,score)                                                      NaN
ouput_street(ref,cont_id,street,score)    [('VLDSTR---287---2015-05-28T18:40:37.400', 1,...
search_time(sec)                                                                   0.060004
geometry                                   POINT Z (652411.6826476178 711175.3977379295 0) 
geom                                         POINT Z (4.403343987223315 51.2105286322297 0)
location                                  (Begijnenvest 35 2000 Antwerpen, (51.210528632...
Name: 0, dtype: object