# Toponym Resolution with T-Res

TODO: 
 - briefly explain T-Res.
 - explain what is an HTTP API (and that it relies on the server running).

In [144]:
import requests
import operator
from dataclasses import dataclass
from dacite import from_dict

Helper functions for calling the T-Res API and parsing the response:

In [145]:
HOST = "20.0.184.45"
API_URL = f"http://{HOST}:8000/v2/t-res_deezy_reldisamb-wpubl-wmtops"

@dataclass
class Toponym:
    mention: str
    sentence: str
    pos: int
    end_pos: int
    prediction: str
    cross_cand_score: dict
    latlon: list
    wkdt_class: str
    string_match_score: dict
    
    def __str__(self):
        toponym = self.toponym()
        s = f"Toponym:\t{toponym}"
        if self.mention != toponym:
            s += f"\nMention:\t{self.mention}"    
        s += f"\nWikidata ID:\t{self.prediction}"
        s += f"\nCoordinates:\t{self.latlon}"
        s += f"\nLinking score:\t{self.cross_cand_score[self.prediction]}"
        return s

    def __repr__(self):
        return self.__str__()
    
    def toponym(self):
        # Identify the best string match.
        d = {i[0]: i[1][0] for i in self.string_match_score.items()}
        return max(d.items(), key=operator.itemgetter(1))[0]

class Toponyms:
    toponyms: list

    def __init__(self, data):
        if not isinstance(data, list):
            raise ValueError("Toponyms data must be a list.")
        self.toponyms = [from_dict(data_class=Toponym, data=t) for t in data]

    def __str__(self):
        if not self.toponyms:
            return "Empty list of toponyms."
        return '\n\n'.join([t.__str__() for t in self.toponyms])

    def __repr__(self):
        return self.__str__()

def validate_query(query):
    if not "text" in query.keys():
        raise ValueError("T-Res API query must contain an item named `text`")
    return

def call_api(query, parse = True):
    validate_query(query)
    response = requests.get(f'{API_URL}/toponym_resolution', json=query)
    if not parse:
        return response
    return parse_api_response(response)

def parse_api_response(response):
    if response.status_code != 200:
        print(f"HTTP error code: {response.status_code}")
        print(f"Reason: {response.reason}")
    result = Toponyms(response.json())
    if len(result.toponyms) == 1:
        return result.toponyms[0]
    return result

### Simple example of toponym resolution from text

In [None]:
query = {"text": "A remarkable case of rattening has just occurred in the building trade at Newtown."}
result = call_api(query)
result

### Example with place of publication information

In [None]:
query = {
        "text": "A remarkable case of rattening has just occurred in the building trade at Newtown.",
        "place": "Powys",
        "place_wqid": "Q156150"
        }
result = call_api(query)
result

### Example with OCR error

In [None]:
query = {"text": "A remarkable case of rattening has just occurred in the building trade at Shefiield."}
result = call_api(query)
result

### Example with multiple toponyms

In [None]:
query = {"text": "A remarkable case of rattening has just occurred in the building trade at Shefiield, but also in Leeds. Not in London though."}
result = call_api(query)
result