## DEMO: Match Natural Persons

In [None]:
%pip install pandas faker

In [None]:
import random

import pandas as pd
from faker import Faker

from fuzzy_matching.match_person import PersonMatcher
from fuzzy_matching.encryption import AESGCM4Encryptor

In [None]:
# Set up Faker
faker = Faker(locale="nl-NL")
faker.seed_instance(42)

In [None]:
def generate_ids(n):
    """Generate ID-like strings."""
    country_code = "NLD"

    ids = []
    for _ in range(n):
        gender = random.choice(["M", "F"])
        first = f"{random.randint(0, 9999999):07d}"
        second = f"{random.randint(0, 9999999999999999):016d}"
        ids.append(f"{country_code}{first}{gender}{second}")
    return ids


def generate_names(n):
    """Generate dummy names."""
    return pd.Series([faker.name() for _ in range(n)])


def generate_birthdates(n):
    """Generate dummy birthdates."""
    return pd.Series(
        [
            faker.date_of_birth(minimum_age=18, maximum_age=100).strftime("%d-%m-%Y")
            for _ in range(n)
        ]
    )


In [None]:
n = 5_000
data = pd.DataFrame({
    "name": generate_names(n),
    "birthdate": generate_birthdates(n),
    "national_id": generate_ids(n),
})
data.sample(5)

In [None]:
encryption_key = AESGCM4Encryptor.generate_key()
encryption_key

In [None]:
config = {
    "name": {
        "algoritm": "vector",
        "weight": 0.2,
    },
    "birthdate": {
        "algoritm": "distance",
        "weight": 0.2,
    },
    "national_id": {
        "algoritm": "distance",
        "weight": 0.6
    },
}

In [None]:
matcher = PersonMatcher(10, config, encryption_key, "storage")

In [None]:
matcher.create(data)

In [None]:
result = matcher.get({
    "name": "Weijters, Hanna",
    "birthdate": "16-11-2000",
    "national_id": "nld9622792f4887410313628145"
})
result