# Building-related local APIs for use with OpenRefine and similar software
The APIs are described in Jupyter cells with one API/URL per cell using Jupyter Kernelgateway, see https://jupyter-kernel-gateway.readthedocs.io/en/latest/getting-started.html.<br>
Tested with Jupyterlab 3.6.3, Python 3.9.13
The APIs of one notebook are available until the process is stopped with `kill`. The port can be changed if necessary. Replace the `KernelGatewayApp.seed_uri` with your notebook path.

Nominatim OSM APIs usage policy requires a user-agent for queries and a delay of at least 1 second between every query. If you use a custom Nominatim service, uncomment all `sleep(1)`

**libraries used:**
- requests
- geopy
- jupyter_kernel_gateway
- haversine
- pandas

**running kernelgateway**
- `jupyter kernelgateway --KernelGatewayApp.api=kernel_gateway.notebook_http --KernelGatewayApp.seed_uri=/Users/admin/Documents/Jupyter/API/Building_APIs.ipynb --port=10100`

**killing kernelgateway**
- `lsof -i :10100` to find the process PID (look for "Python") 
- `kill [PID]`



## dec2gms service
run from OpenRefine with <br>`"http://127.0.0.1:10100/dec2gms?lat=" + cells['lat'].value + "&lon=" + value`<br> in column lon if there is a column lon and a column lat. Output is encoded in UTF-8.
Test with http://127.0.0.1:10100/dec2gms?lat=50.928780&lon=11.589900, check in https://d-nb.info/gnd/4028557-1

In [None]:
# GET /dec2gms

from geopy.geocoders import Nominatim
from geopy.point import Point
import json

#req = json.loads(REQUEST)

try:
    latitude = float(req['args']['lat'][0])
    longitude = float(req['args']['lon'][0])  
except:
    print("No coordinates provided.")

latitude = 50.928780
longitude = 11.589900

def decimallon_to_dms(decimal):
    degrees = abs(int(decimal))
    minutes_decimal = (abs(decimal) - degrees) * 60
    minutes = int(minutes_decimal)
    seconds = (minutes_decimal - minutes) * 60

    direction = "E" if decimal >= 0 else "W"
    
    dms = f"{direction} {degrees:03d}°{minutes:02d}'{seconds:.2f}''"
    return dms
    
def decimallat_to_dms(decimal):
    degrees = abs(int(decimal))
    minutes_decimal = (abs(decimal) - degrees) * 60
    minutes = int(minutes_decimal)
    seconds = (minutes_decimal - minutes) * 60

    direction = "N" if decimal >= 0 else "S"

    dms = f"{direction} {degrees:03d}°{minutes:02d}'{seconds:.2f}''"
    return dms

# Example usage

latitude_dms = decimallat_to_dms(latitude)
longitude_dms = decimallon_to_dms(longitude)

formatted_coordinates = f"{longitude_dms} / {latitude_dms}"

result = formatted_coordinates

print(result)

In [None]:
# ResponseInfo GET /dec2gms
print(json.dumps({
"headers" : {
    "Content-Type": "text/plain; charset=utf-8"
    },
    "status" : 201
}))

## gms2dec service
run from OpenRefine with <br>`"http://127.0.0.1:10100/gms2dec?lat=" + cells['lat'].value + "&lon=" + value`<br> in column lon if there is a column lon and a column lat. Output is encoded in UTF-8.
Test with http://127.0.0.1:10100/gms2dec?lat=50°56'5.824\"N&lon=10°59'24.112\"E, check in https://d-nb.info/gnd/4028557-1

In [None]:
# GET /gms2dec

from geopy.geocoders import Nominatim
import json

req = json.loads(REQUEST)

try:
    gms_latitude = req['args']['lat'][0]
    gms_longitude = req['args']['lon'][0]

except:
    print("No coordinates provided.")  

def convert_gms_to_decimal(gms_latitude, gms_longitude):
    geolocator = Nominatim(user_agent="my-app")  # Initialize the geolocator
    location = geolocator.geocode(f"{gms_latitude}, {gms_longitude}")  # Geocode the GMS coordinates
    return location.latitude, location.longitude  # Return the decimal coordinates

gms_latitude = gms_latitude.replace("\\", "")
gms_longitude = gms_longitude.replace("\\", "")
# Example usage
#gms_latitude = "50°56'5.824\"N" 
#gms_longitude = "10°59'24.112\"E"
#print(gms_latitude)

decimal_latitude, decimal_longitude = convert_gms_to_decimal(gms_latitude, gms_longitude)

print(f"{decimal_latitude} / {decimal_longitude}")


In [None]:
# ResponseInfo GET /gms2dec
print(json.dumps({
"headers" : {
    "Content-Type": "text/plain; charset=utf-8"
    },
    "status" : 201
}))

## distance between 2 points service
run from OpenRefine with <br>`"http://127.0.0.1:10100/distbetween2?set1=" + cells['set1'].value + "&set2=" + value`<br> in column set2 if there are two columns with point coordinates to compare formatted as ```latxlon```. Output is encoded in UTF-8, unit is km.
Test with http://127.0.0.1:10100/distbetween2?set1=50.928780x11.589900&set2=50.9286x11.589463, check in https://d-nb.info/gnd/4028557-1

In [None]:
# GET /distbetween2

import json
import math
from haversine import haversine, Unit

req = json.loads(REQUEST)

try:
    set1 = req['args']['set1'][0]
    set2 = req['args']['set2'][0]
except:
    print("No coordinates provided.")

#set1 = '50.92x11.589900'
#set2 = '50.928785x11.5898'

lat1 = float(set1.split('x')[0])
lon1 = float(set1.split('x')[1])
set1 = (lat1, lon1)
lat2 = float(set2.split('x')[0])
lon2 = float(set2.split('x')[1])
set2 = (lat2, lon2)

distance = round(haversine(set1, set2, unit='km'), 2)

print(distance)

In [None]:
# ResponseInfo GET /distbetween2
print(json.dumps({
"headers" : {
    "Content-Type": "text/plain; charset=utf-8"
    },
    "status" : 201
}))

## Get Redaktionslevel of an entity or its PPN from its GND-ID via OGND-SRU
Run from OpenRefine with <br>`"http://127.0.0.1:10100/gndid?level=" + cells['gndid'].value`<br> in column gndid or similar for Redaktionslevel and with <br>`"http://127.0.0.1:10100/gndid?ppn=" + cells['gndid'].value`<br> for PPN. Output is encoded in UTF-8.
Test with http://127.0.0.1:10100/gndid?level=5053491-9 and http://127.0.0.1:10100/gndid?ppn=5053491-9.

In [None]:
# GET /gndid

import requests
import xml.etree.ElementTree as ET
import re
import time
import json

value = ""
req_type = 0

req = json.loads(REQUEST)

try: 
    try:
        if str(req['args']['level'][0]) is not "":
            value = str(req['args']['level'][0]) 
            req_type = 1
    except:
        if str(req['args']['ppn'][0]) is not "":
            value = str(req['args']['ppn'][0])
            req_type = 2
except:
    print("no gnd-ID for provided")
    
if value is "":
    print("no value")

#value = "1302315005"    

url = 'https://sru.bsz-bw.de/ognd!rec=2?version=1.1&query=pica.nid+%3D' + value + '&recordSchema=picaxml&version=1.1&maximumRecords=1'

response = requests.get(url)
xml_content = response.text

#extract value using just string search and replace
#ppn = re.search(r'<controlfield tag="001">\d*<\/controlfield>',xml_content)
#ppn = (ppn.group().replace('<controlfield tag=\"001\">','').replace('</controlfield>',''))
#return ppn

root = ET.fromstring(xml_content)

#for marcxmlk10os records (field 042 (Redaktionslevel) missing)
#namespace = {'zs': 'http://www.loc.gov/zing/srw/', 'pica': 'http://www.loc.gov/MARC21/slim'}
#record_element = root.find(".//marc:record", namespace)
#controlfield_element = root.find(".//marc:controlfield[@tag='001']", namespace)
#value = controlfield_element.text

#for picaxml records
namespace = {'pica': 'info:srw/schema/5/picaXML-v1.0'}

if req_type == 1:
    datafield_element = root.find(".//pica:datafield[@tag='002@']", namespace)
    value = datafield_element.find("./pica:subfield[@code='0']", namespace).text

if req_type == 2:
    datafield_element = root.find(".//pica:datafield[@tag='003@']", namespace)
    value = datafield_element.find("./pica:subfield[@code='0']", namespace).text
print(value)

#in case rows in openrefine are skipped try
#time.sleep(0.5)

In [None]:
# ResponseInfo GET /gndid
print(json.dumps({
"headers" : {
    "Content-Type": "text/plain; charset=utf-8"
    },
    "status" : 201
}))

## Find out if subject term is part of subject category 31.3 based on GND-IDs provided
Run from OpenRefine with <br>`"http://127.0.0.1:10100/ifisarchitecture?gnd_id=" + value`<br> in column subjectterm_gndid or similar. Output is encoded in UTF-8.
Test with http://127.0.0.1:10100/ifisarchitecture?gnd_id=4073436-5.

In [None]:
# GET /ifisarchitecture

import requests
import json

req = json.loads(REQUEST)

try:
    gnd_id= req['args']['gnd_id'][0]
except:
    print("No GND-ID provided.")  
    
#gnd_id = '4073436-5'

url = 'http://lobid.org/gnd/' + gnd_id + '.json'
response = requests.get(url)

gnd_data = json.loads(response.text)

for cat in gnd_data['gndSubjectCategory']:
    if '31.3' in cat['id']:
        print('isArchitecture')

In [None]:
# ResponseInfo GET /ifisarchitecture
print(json.dumps({
"headers" : {
    "Content-Type": "text/plain; charset=utf-8"
    },
    "status" : 201
}))

## Get latest succeeding place according to the GND for the provided GND-ID of a place
Run from OpenRefine with <br>`"http://127.0.0.1:10100/latestplace?gnd_id=" + value`<br> in column place_gndid or similar. Output is encoded in UTF-8.
Test with http://127.0.0.1:10100/latestplace?gnd_id=4322223-7.

In [None]:
# GET /latestplace

import requests
import json

req = json.loads(REQUEST)

try:
    gnd_id = req['args']['gnd_id'][0]

except:
    print("No GND-ID provided.")  
    
#gnd_id = '4608037-5'

notfound = 0

def findsucceedingplace(gnd_id):
    notfound = 0
    while notfound == 0:
        url = 'http://lobid.org/gnd/' + gnd_id + '.json'
        response = requests.get(url)
        gnd_data = json.loads(response.text)
        if 'succeedingPlaceOrGeographicName' in gnd_data:
            try:
                gnd_link = gnd_data['succeedingPlaceOrGeographicName'][0]['id']
                gnd_id = gnd_link.replace('https://d-nb.info/gnd/','')
                findsucceedingplace(gnd_id)
            except:
                notfound = 1
        else:
            notfound = 1
            gnd_link = gnd_id
    findsucceedingplace.gnd_link = gnd_link

findsucceedingplace(gnd_id)  

print(findsucceedingplace.gnd_link)

In [None]:
# ResponseInfo GET /latestplace
print(json.dumps({
"headers" : {
    "Content-Type": "text/plain; charset=utf-8"
    },
    "status" : 201
}))

## Find places and parts of places in the GND for given coordinates
Run from OpenRefine with <br>`"http://127.0.0.1:10100/plcsbldgisin?lat=" + cells['lat'].value + "&lon=" + value`<br> in column lon. Results are separated by "|" if there are several matches, each place is provided with its Wikidata- and GND-ID.
Test with http://127.0.0.1:10100/plcsbldgisin?lat=52.0578042&lon=13.7219432

In [None]:
# GET /plcsbldgisin

import requests
import json
from time import sleep

req = json.loads(REQUEST)

headers = {'User-Agent': 'jpytr_bldg_apis'}

try:
    lat = req['args']['lat'][0]
    lon = req['args']['lon'][0]

except:
    print("No coordinates provided.")  
    
#lat = '52.0578042'
#lon = '13.7219432'

url = "https://nominatim.openstreetmap.org/reverse.php?lat=" + lat + "&lon=" + lon + "&zoom=18&format=jsonv2"

response = requests.get(url, headers)
if response.status_code == 200:
    data = response.json()
    osm_id = data['osm_id']
    osm_type = data['osm_type'].replace('way','W').replace('node','N').replace('relation','R')

sleep(1)
url = "https://nominatim.openstreetmap.org/details.php?osmtype=" + osm_type + "&osmid=" + str(osm_id) + "&addressdetails=1&hierarchy=0&group_hierarchy=1&format=json"
response = requests.get(url, headers)

if response.status_code == 200:
    osm_ids = []
    osm_types = []
    Q_ids = []
    GND_ids = []
    count = 0
    result = ""
    data = response.json()
    for entry in data['address']:
        if entry['rank_address'] == 18 or entry['rank_address'] == 16:
            osm_ids.append(entry['osm_id'])
            osm_types.append(entry['osm_type'])
            
    for osm_id, osm_type in zip(osm_ids, osm_types):
        sleep(1)
        url = "https://nominatim.openstreetmap.org/details.php?osmtype=" + osm_type + "&osmid=" + str(osm_id) + "&format=json"
        response = requests.get(url, headers)
        if response.status_code == 200:
            data = response.json()
            try:
                Q_ids.append(data['extratags']['wikidata'])
            except:
                Q_ids.append("No Wikidata-ID for OSM-object " + osm_type + ":" + str(osm_id))

    for Q_id in Q_ids:          
        url = 'https://www.wikidata.org/w/api.php?action=wbgetentities&ids=' + Q_id + '&format=json&language=en&type=item'
        response = requests.get(url)
        if response.status_code == 200:
            data = json.loads(response.text)
            try:
                gndIdentifier = data['entities'][Q_id]['claims']['P227'][0]['mainsnak']['datavalue']['value']
                GND_ids.append(gndIdentifier)
            except:
                GND_ids.append("noGndIdForThisPlace")
    for Q_id, GND_id in zip(Q_ids, GND_ids):
        no = len(Q_ids)
        result += f'{Q_id},{GND_id}'
        count = count + 1
        if count < no:
            result += "|"
        
print(result)

In [None]:
# ResponseInfo GET /plcsbldgisin
print(json.dumps({
"headers" : {
    "Content-Type": "text/plain; charset=utf-8"
    },
    "status" : 201
}))

## Find closest building in OSM with and without Wikidata ID for given coordinates or address
Run from OpenRefine with <br>`"http://127.0.0.1:10100/closestbuilding?lat=" + cells['lat'].value + "&lon=" + value + "&dist=50"`<br> in column lon or <br>`"http://127.0.0.1:10100/closestbuilding?address=" + value + "&dist=50"`<br> in a column like address or nameOftheBuilding. Always provide a place name as part of the address. 
For distance (in meters) choose a small value for dense cities and a large value like `dist=1000` for the countryside. Closest building with its coordinates and closest building with a Wikidata ID are separated by "|".
Test with http://127.0.0.1:10100/closestbuilding?lat=50.934951&lon=10.990031&dist=50 and http://127.0.0.1:10100/closestbuilding?address=Am%20Planetarium%206%20jena&dist=50

In [None]:
# GET /closestbuilding

import requests
import json
from time import sleep
from haversine import haversine, Unit
import pandas as pd

req = json.loads(REQUEST)

headers = {'User-Agent': 'jpytr_bldg_apis'}

try:
    if 'lat' in req['args']:
        latitude = req['args']['lat'][0]
        longitude = req['args']['lon'][0]
    elif 'address' in req['args']:
        address = req['args']['address'][0]
    else:
        print("No coordinates or address provided.")  
except:
    print("No data sent.")
    
try:
    maxdist = req['args']['dist'][0]
except:
    maxdist = '25'
    
#latitude = '52.0578042'
#longitude = '13.7219432'

try:
    url = "https://nominatim.openstreetmap.org/search?q=" + address + "&format=jsonv2"
    response = requests.get(url, headers)
    if response.status_code == 200:
        data = response.json()
        latitude = data[0]['lat']
        longitude = data[0]['lon']
except:
    pass

def point_dist(x):
    source = (float(latitude), float(longitude))
    lat, lon = map(float, x.split(','))
    target = (lat, lon)
    distance = round(haversine(source, target)*1000)
    return distance

try:
    overpass_url = "https://overpass-api.de/api/interpreter?data=%5Bout%3Ajson%5D%5Btimeout%3A25%5D%3B%0A%28node%5B%22building%22%5D%28around%3A" + maxdist + "%2C" + latitude + "%2C" + longitude + "%29%3B%0Away%5B%22building%22%5D%28around%3A" + maxdist + "%2C" + latitude + "%2C" + longitude + "%29%3B%0Arelation%5B%22building%22%5D%28around%3A" + maxdist + "%2C" + latitude + "%2C" + longitude + "%29%3B%0A%29%3B%0Aout%20center%3B%0A%3E%3B%0Aout%20skel%20qt%3B"
    response = requests.get(overpass_url)
    if response.status_code == 200:
        data = response.json()
    else:
        print("Error: Request to Overpass API failed.")

    data['elements'] = [element for element in data['elements'] if 'tags' in element]
    #data['elements'] = [element for element in data['elements']]
    df = pd.DataFrame(data['elements'])
   
    df_2 = pd.json_normalize(df['tags'])
    
    if 'type' in df_2.columns:
        df_2 = df_2.drop(['type'], axis=1)
    
    df = pd.concat([df.drop(['tags'], axis=1), df_2], axis=1)
    df = df.fillna("")
    
    df['center'] = df.apply(lambda x: f"{x['center']['lat']}, {x['center']['lon']}" if isinstance(x['center'], dict) and x['center'] else f"{x['lat']}, {x['lon']}", axis=1)

    #from IPython.core.display import HTML
    #display(HTML(df.to_html()))

    df['distance'] = df['center'].apply(point_dist)
    min_distance_row = df.loc[df['distance'].idxmin()]
    closest_type = min_distance_row['type']
    closest_id = str(min_distance_row['id'])
    closest_center = str(min_distance_row['center'])
    
    
    try:
        filtered_df = df[df['wikidata'] != '']
        min_distance_row = filtered_df.loc[filtered_df['distance'].idxmin()]
        wikidata_value = min_distance_row['wikidata']
        closest_type_wikidata = min_distance_row['type']
        closest_id_wikidata = str(min_distance_row['id'])
                 
        url = 'https://www.wikidata.org/w/api.php?action=wbgetentities&ids=' + wikidata_value + '&format=json&language=en&type=item'
        response = requests.get(url)
        if response.status_code == 200:
            data = json.loads(response.text)
            try:
                gndIdentifier = data['entities'][wikidata_value]['claims']['P227'][0]['mainsnak']['datavalue']['value']
            except:
                gndIdentifier = "noGndIdForThisPlace"

    except:
        pass
            
    print1 = closest_type + "," + closest_id + ",(" + closest_center + ")" + "|"
    closest_type = None
    closest_id = None
    try:
        print2 = closest_type_wikidata + "," + closest_id_wikidata + "," + wikidata_value + "," + gndIdentifier
        closest_type_wikidata = None
        closest_id_wikidata = None
        wikidata_value = None
    except:
        print2 = "No OSM-linked wikidata object in range."
    print(print1 + print2)

except:
    print("Something went wrong.")

In [None]:
# ResponseInfo GET /closestbuilding
print(json.dumps({
"headers" : {
    "Content-Type": "text/plain; charset=utf-8"
    },
    "status" : 201
}))