Copyright (c) 2022 Christian Oechler

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

In [None]:
!pip install beautifulsoup4
!pip install lxml
!pip install wikibaseintegrator==0.12

In [None]:
import json

# Imports for the WikibaseIntegrator
from wikibaseintegrator import WikibaseIntegrator
from wikibaseintegrator import wbi_login as wbi_login
from wikibaseintegrator.wbi_config import config as wbi_config
from wikibaseintegrator import wbi_helpers as wbi_helpers
from wikibaseintegrator.datatypes import Item, String, Time, URL
from wikibaseintegrator.wbi_enums import WikibaseDatePrecision

In [None]:
'''
Function returns the login credentials from the config file
'''
def get_login_credentials():
    file_pointer = open("config.json")
    
    # Reads the data from the config file
    data = json.load(file_pointer)
    
    return data["login_name"], data["login_password"]

In [None]:
'''
Function return the server api address credentials from the config file 
'''
def get_server_address_config():
    file_pointer = open("config.json")
    
    # Reads the data from the config file
    data = json.load(file_pointer)
    
    return data["mediawiki_api"], data["sparql_endpoint_url"], data["wikibase_url"]

In [None]:
# Gets the login and the password for the wikibase database
login, password = get_login_credentials()

# Gets the config vailes für the wikibase server
mediawiki_api, sparql_endpoint_url, wikibase_url = get_server_address_config()

wbi_config['MEDIAWIKI_API_URL'] = mediawiki_api
wbi_config['SPARQL_ENDPOINT_URL'] = sparql_endpoint_url
wbi_config['WIKIBASE_URL'] = wikibase_url

# Creates wikibase integrator instance
login_instance = wbi_login.Clientlogin(user=login, password=password)
wikiBaseIntegrator = WikibaseIntegrator(login=login_instance)

## Schritt 1: Anlage der Kategorie für Straßen

In [None]:
category_id = None

# Sends a search request to the mediawiki api to check if the category exists
result = wbi_helpers.search_entities(search_string="Straße in Gießen", search_type='item', language='de')

# Saves the id of the item, if it exists or creates a new one
if (len(result) > 0):
    category_id = result[0]
else:    
    # Creates a new item for the wikibase database
    item = wikiBaseIntegrator.item.new()

    # Set a german label of the category "Straße in Gießen"
    item.labels.set(language='de', value="Straße in Gießen")

    # Writing the item into wikibase database
    item.write()
    
    # Saves the item ID from the write result
    result = item.get_json()
    
    category_id = result["id"]
    
print("INFO: The item id for the category is: {}".format(category_id))

## Schritt 2: Import der Straßen in die Wikibase-Datenbank

In [None]:
# Saves the property ids by given property names
property_ids = {
                 "instance_of": "P15",
                 "name": "P13"
}

In [None]:
# Opens the JSON-file with the streets
file_pointer = open("streets.json")

# Reads the JSON-file with the sessions
data = json.load(file_pointer)

# Proposal counter
counter = 0

for street in data:
    counter = counter + 1
    
    # Saves the properties of the item
    properties = []
    
    # Sends a search request to the mediawiki api to check if the street already exists
    result = wbi_helpers.search_entities(search_string=street["name"][0:250], search_type='item', language='de')
    
    # Skips the adding of the street as a new item if it already exists
    if (len(result) > 0):
        print('Importing {} of {}: {} SKIPPED'.format(str(counter), str(len(data)), street["name"]))
        continue
    
    try:
         # Creates a new item for the wikibase database
        item = wikiBaseIntegrator.item.new()

        # Set a german label of the street
        item.labels.set(language='de', value=street["name"][0:250])

        # Add the category "Straße in Gießen" as property
        instance_of_property = Item(value=category_id, prop_nr=property_ids["instance_of"])
        properties.append(instance_of_property)

        # Add the name of the street as property
        name_property = String(value=street["name"], prop_nr=property_ids["name"])
        properties.append(name_property)

        #Add the properties of the property list to the street item
        item.claims.add(properties)

        # Writing the item into wikibase database
        item.write()
        
        # Get the id of the new created item of the street
        result = item.get_json()
        item_id = result["id"]
        
        # Display the progress message
        print('Importing {} of {}: {} ({})'.format(str(counter), str(len(data)), street["name"], item_id))
        
        
    except BaseException:
        print('Importing {} of {}: {} FAILED'.format(str(counter), str(len(data)), street["name"]))