## DEMO: Match Natural Persons

In [None]:
%pip install pandas faker

In [1]:
import random

import pandas as pd
from faker import Faker

from fuzzy_matching.match_multi import MultiMatcher
from fuzzy_matching.encryption import AESGCM4Encryptor

In [2]:
# Set up Faker
faker = Faker(locale="nl-NL")
faker.seed_instance(42)

In [3]:
def generate_ids(n):
    """Generate ID-like strings."""
    country_code = "NLD"

    ids = []
    for _ in range(n):
        gender = random.choice(["M", "F"])
        first = f"{random.randint(0, 9999999):07d}"
        second = f"{random.randint(0, 9999999999999999):016d}"
        ids.append(f"{country_code}{first}{gender}{second}")
    return ids


def generate_names(n):
    """Generate dummy names."""
    return pd.Series([faker.name() for _ in range(n)])


def generate_birthdates(n):
    """Generate dummy birthdates."""
    return pd.Series(
        [
            faker.date_of_birth(minimum_age=18, maximum_age=100).strftime("%d-%m-%Y")
            for _ in range(n)
        ]
    )


In [4]:
n = 5_000
data = pd.DataFrame({
    "name": generate_names(n),
    "birthdate": generate_birthdates(n),
    "national_id": generate_ids(n),
})
data.sample(5)

Unnamed: 0,name,birthdate,national_id
2578,Thijmen van den Bergh,09-07-1986,NLD8837681M1396167356249476
3095,Victor Coret,24-04-1927,NLD8346121F1700384145575625
2199,Aron Uit de Willigen,13-12-1938,NLD2418531M4134711299175494
1493,Alex Martens,02-07-2001,NLD5993351F8560675366192919
4807,Ravi Bastiaanse,23-06-1947,NLD6728214M9078530084067626


In [5]:
data.nunique()

name           4988
birthdate      4593
national_id    5000
dtype: int64

In [6]:
encryption_key = AESGCM4Encryptor.generate_key()
encryption_key = b"\x0e\x84\xa1\x01\xd0\xed\x932\xb5\x1dt\x11\x05\xe5j\xf8"

In [None]:
config = {
    "name": {
        "algoritm": "vector",
        "weight": 0.2,
    },
    "birthdate": {
        "algoritm": "distance",
        "weight": 0.2,
    },
    "national_id": {
        "algoritm": "distance",
        "weight": 0.6
    },
}

In [8]:
matcher = MultiMatcher(10, config, encryption_key, "storage")



In [9]:
matcher.create(data)

In [12]:
result = matcher.get({
    "name": "Weijters Hanna",
    "birthdate": "16-11-2000",
    "national_id": "nld3041429m2520313357516153"
})
result

Unnamed: 0,name,birthdate,national_id,similarity
d3aa2cfdeb2547349f4f7a2ef39f5da0,hanna weijters,19 11 2000,nld3041429m2520313357516153,0.38
74848563f95f4037b702901f24bd9e03,wessel mulders,13 11 2000,nld1439370f4465776501724447,0.226154
1eea231df2b4406bb2a8d70267a41bb9,hannah van rijthoven,13 12 2002,nld9592911f4131053169607880,0.205372
fee8ed96d87b44258fe5236c14053155,brian weijters,17 06 1980,nld6628449f1589607425396994,0.203077
bb96d45d110646d7aed9c749e8563a49,luc weijters,15 06 1993,nld2202546f4689351815483858,0.193799
b8e70117d9724de6896aef5993d1c2f7,hanna lagerweij,18 12 1934,nld4512790m0236642628202766,0.183775
8d8b28221a5540ffb651a2c37c3275a0,lot buijs pieters,27 11 2003,nld5088399m2115227403458975,0.182967
4ea1a045f4484facb668587225171fac,lisanne sitters,19 11 1990,nld7219880f2183630948251834,0.1793
073b08d4311144a1a027ca4a5f4421ad,hannah dachgeldt,14 09 2005,nld6239342m0698755126793061,0.177289
df3a4aaa3b7d407c8723a7d8831de2ff,valerie van westfalen,13 12 2000,nld3234526f4478501624590154,0.171566
