In [None]:
import os
import sys
import sqlite3
from pathlib import Path

from t_res.geoparser import pipeline, ranking, linking

In [None]:
# --------------------------------------
# Instantiate the ranker:
myranker = ranking.Ranker(
    method="deezymatch",
    resources_path="../resources/wikidata/",
    strvar_parameters={
        # Parameters to create the string pair dataset:
        "ocr_threshold": 60,
        "top_threshold": 85,
        "min_len": 5,
        "max_len": 15,
        "w2v_ocr_path": str(Path("../resources/models/w2v/").resolve()),
        "w2v_ocr_model": "w2v_*_news",
        "overwrite_dataset": False,
    },
    deezy_parameters={
        # Paths and filenames of DeezyMatch models and data:
        "dm_path": str(Path("../resources/deezymatch/").resolve()),
        "dm_cands": "wkdtalts",
        "dm_model": "w2v_ocr",
        "dm_output": "deezymatch_on_the_fly",
        # Ranking measures:
        "ranking_metric": "faiss",
        "selection_threshold": 50,
        "num_candidates": 1,
        "verbose": False,
        # DeezyMatch training:
        "overwrite_training": False,
        "do_test": False,
    },
)

In [None]:
with sqlite3.connect("../resources/rel_db/embeddings_database.db") as conn:
    cursor = conn.cursor()
    mylinker = linking.Linker(
        method="reldisamb",
        resources_path="../resources/",
        rel_params={
            "model_path": "../resources/models/disambiguation/",
            "data_path": "../experiments/outputs/data/lwm/",
            "training_split": "originalsplit",
            "db_embeddings": cursor,
            "with_publication": True,
            "without_microtoponyms": True,
            "do_test": False,
            "default_publname": "United Kingdom",
            "default_publwqid": "Q145",
        },
        overwrite_training=False,
    )

In [None]:
geoparser = pipeline.Pipeline(myranker=myranker, mylinker=mylinker)

## Run pipeline in a modular way

In [None]:
output = geoparser.run_text_recognition(
    "A remarkable case of rattening has just occurred in the building trade next to the Market-street of Shefiield, but also in Lancaster. Not in Nottingham though. Not in Ashton either, nor in Salop! Maybe in Lancaster actually.", 
    place="Manchester", 
    place_wqid="Q18125"
    )

In [None]:
cands = geoparser.run_candidate_selection(output)

In [None]:
output_disamb = geoparser.run_disambiguation(output, cands)

In [None]:
output_disamb