In [None]:
!pip install beautifulsoup4
!pip install lxml
!pip install wikibaseintegrator==0.12

In [None]:
import urllib.request
import json
import time
import random
import re
import subprocess

from bs4 import BeautifulSoup
from datetime import datetime
from time import mktime

# Imports for the WikibaseIntegrator
from wikibaseintegrator import WikibaseIntegrator
from wikibaseintegrator import wbi_login as wbi_login
from wikibaseintegrator.wbi_config import config as wbi_config
from wikibaseintegrator.datatypes import Item, String, Time, URL
from wikibaseintegrator.wbi_enums import WikibaseDatePrecision

In [None]:
def getSessions(year, month):
    # Status messsages which month will be currently processed
    print("Getting sessions of the month " + str(month) + " from year " + str(year) + " ...")
    
    url_string = 'https://parlamentsinfo.giessen.de/si0040.php?__cjahr=' + str(year) + '&__cmonat=' + str(month) + '&__canz=1&__cselect=0'
    f = urllib.request.urlopen(url_string)
    
    html = f.read().decode('utf-8')
    soup = BeautifulSoup(html, 'html.parser')
    
    currentWeekDay = None
    
    # Saves all session elements
    list_of_session_elements = []
    
    archive_link = None
    
    # Tries to archive the url to the internet archive and save the link
    try:
        archive_link = archive_url(url_string)
    except BaseException:
        print("Could not get an archive link for: " + url_string)
    
    for tr_element in soup.find_all('tr'):
        session_information = {}
        
        tr_element = str(tr_element)
        
        soup2 = BeautifulSoup(tr_element, 'html.parser')
        span_element = soup2.find('span',{"class": "weekday"})
        
        if (span_element != None):
            currentWeekDay = span_element.getText()
            
        date_string = currentWeekDay + "." + str(month) + "." + str(year)
        session_information["session_date"] = date_string
            
        soup3 = BeautifulSoup(tr_element, 'html.parser')
        div_element = soup3.find('div', {"class": "smc-el-h"})
        
        soup4 = BeautifulSoup(tr_element, 'html.parser')
        li_element = soup4.find('li', {"class": "list-inline-item"})
        
        if (li_element != None):
            times = re.findall('[0-9][0-9]:[0-9][0-9]', li_element.getText())

            if (len(times) == 1):
                session_information["session_starttime"] = times[0]
            elif (len(times) == 2):
                session_information["session_starttime"] = times[0]
                session_information["session_endtime"] = times[1]
    
        if (div_element != None):
            div_element_text = str(div_element)
            soup5 = BeautifulSoup(div_element_text, 'html.parser')
            link_element = soup5.find('a', {"class": "smce-a-u smc-link-normal smc_doc smc_datatype_si"})
            
            name_string = div_element.getText()
            session_information["session_name"] = name_string
            
            now = datetime.now()
            reference_access_date = now.strftime("%d.%m.%Y") 
            
            session_information["reference"] = url_string
            session_information["reference_access_date"] = reference_access_date
            
            if (archive_link != None):
                session_information["reference_archive_link"] = archive_link

            if (link_element != None):
                session_information["link"] = "https://parlamentsinfo.giessen.de/" + link_element['href'] 
                
            list_of_session_elements.append(session_information)
                
    # Return a list with all sessions of the month
    return list_of_session_elements

In [None]:
def get_session_details(url_to_session):
    f = urllib.request.urlopen(url_to_session)
    
    html = f.read().decode('utf-8')
    soup = BeautifulSoup(html, 'html.parser')
    
    session_id = soup.find('div', {"class": "smc-dg-td-2 smc-table-cell siname"}).getText()
    session_name = soup.find('div', {"class": "smc-dg-td-2 smc-table-cell sigrname"}).getText()
    session_date = soup.find('div', {"class": "smc-dg-td-2 smc-table-cell sidat"}).getText()
    session_start_end = soup.find('div', {"class": "smc-dg-td-2 smc-table-cell yytime"}).getText()
            
    now = datetime.now()
    reference_date = now.strftime("%d.%m.%Y") 
    
    session_information = {"session_id": session_id,
                           "session_name": session_name,
                           "session_date": session_date,
                           "reference": url_to_session,
                           "reference_access_date": reference_date
                          }
    
    # Tries to archive the url to the internet archive and save the link
    try:
        archive_link = archive_url(url_to_session)
        session_information["reference_archive_link"] = archive_link
    except BaseException:
        print("Could not get an archive link for: " + url_to_session)

    times = re.findall('[0-9][0-9]:[0-9][0-9]', session_start_end)
    
    if (len(times) == 1):
        session_information["session_starttime"] = times[0]
    elif (len(times) == 2):
        session_information["session_starttime"] = times[0]
        session_information["session_endtime"] = times[1]
    
    agenda = []
    
    for tr_element in soup.find_all('tr', {"class": "smc-t-r-l"}):
        tr_element = str(tr_element)
        
        # Saves the agenda item with a proposal
        agenda_item = {}
        
        soup2 = BeautifulSoup(tr_element, 'html.parser')
        span_element = soup2.find('span',{"class": "badge"})
        
        if (span_element == None):
            continue
        
        span_element_text = span_element.getText()
        
        result_order = re.findall('[0-9]+', span_element_text)
        
        if (len(result_order) > 0):
            agenda_item["order"] = int(result_order[0])
        
       
        
        if (re.findall('(Ö|N){1}', span_element_text))[0] == "Ö":
            agenda_item["public_status"] = True
        elif (re.findall('(Ö|N){1}', span_element_text))[0] == "N":
            agenda_item["public_status"] = False
        
            
        soup3 = BeautifulSoup(tr_element, 'html.parser')
        link_to_proposal = soup3.find('a',{"class": "smce-a-u smc-link-procedure smc_doc smc_field_voname smcnowrap smc_datatype_vo"})
        
        if (link_to_proposal == None):
            continue
            
        url_to_proposal = "https://parlamentsinfo.giessen.de/" + link_to_proposal['href']
        agenda_item["url_to_proposal"] = url_to_proposal
            
        agenda.append(agenda_item)
        
    
    session_information["agenda"] = agenda    
        
            
    return session_information

In [None]:
'''
Funktion reads the propsal information from the webpage of the parlament information system session.net
and saves them into an JSON object and returns it.
'''
def get_proposal_details(url_to_proposal):
    print("Fetching information of " + url_to_proposal + " ...")
    
    # Saves the details of the proposal
    proposal_information = {}
    
    f = urllib.request.urlopen(url_to_proposal)
    
    html = f.read().decode('utf-8')
    soup = BeautifulSoup(html, 'html.parser')
    
    proposal_subject = soup.find('div', {"class": "smc-dg-td-2 smc-table-cell vobetr"})
    proposal_number = soup.find('div', {"class": "smc-dg-td-2 smc-table-cell voname"})
    proposal_filenumber = soup.find('div', {"class": "smc-dg-td-2 smc-table-cell voakz"})
    
    # Catches the parlament information link
    proposal_information["session_net_link"] = url_to_proposal
    
    # Process the subject, proposal_type and the proposal date
    if proposal_subject != None:
        proposal_subject = str(proposal_subject.getText())
        proposal_subject_array = proposal_subject.split("-")
        
        # Checks that all pieces are existing
        if (len(proposal_subject_array) >= 3):
            
            # Catches the proposal subject
            subject = proposal_subject_array[0:-2]
            proposal_information["proposal_subject"] = ("".join(subject)).strip()
            
            # Catches the proposal type
            authors = proposal_subject_array[-2]
            proposal_type = re.findall("(Antrag|Anfrage)", authors)
            if (len(proposal_type) > 0):
                if proposal_type[0] == "Antrag":
                    proposal_information["proposal_type"] = "Antrag"
                elif proposal_type[0] == "Anfrage":
                    proposal_information["proposal_type"] = "Anfrage"
                    
            
            # Catches the proposal date
            authors = proposal_subject_array[-2]
            proposal_date = re.findall("[0-9][0-9]\.[0-9][0-9]\.[0-9][0-9][0-9][0-9]", authors)
            if (len(proposal_date) > 0):
                proposal_information["proposal_date"] = proposal_date[0]
                
     
    # Process the propsal number
    if proposal_number != None:
        proposal_information["proposal_number"] = proposal_number.getText()
    
    
    # Process the proppsal filenumber
    if proposal_filenumber != None:
        proposal_information["proposal_filenumber"] = proposal_filenumber.getText()
        
    
    # Tries to archive the url to the internet archive and save the link
    try:
        archive_link = archive_url(url_to_proposal)
        proposal_information["reference_archive_link"] = archive_link
    except BaseException:
        print("Could not get an archive link for: " + url_to_proposal)
    
    
    # Calculate the date of the access of the page as a reference
    reference_access_date = datetime.now().strftime("%d.%m.%Y")
    
    proposal_information["reference_url"] = url_to_proposal
    proposal_information["reference_access_date"] = reference_access_date
    
    return proposal_information

In [None]:
'''
Archives the url to the Internet Archive
'''
def archive_url(url):
    command = "wget --spider 'https://web.archive.org/save/" + url + "'"
    
    for i in range(0,5):
        # Increase the waiting time to avoid too many attempts in a short time
        waiting_time = 2 * i * 15
        time.sleep(waiting_time)
    
        process = subprocess.Popen(command, 
                                   stdout = subprocess.PIPE,
                                   stderr = subprocess.PIPE,
                                   text = True,
                                   shell = True
                                  )

        std_out, std_err = process.communicate()

        resultRegEx = re.findall("https:\/\/web.archive\.org\/web\/[0-9]{14}\/", std_err.strip())

        if (len(resultRegEx) > 1):
            return resultRegEx[0] + url
    
    # Raise an exception if the webpage can not be archived
    raise ValueError("No archive url was generated.")

In [None]:
'''
Function gets the link to the calendar page which links to the session
'''
def get_reference_link_by_session_link(session_link):
    # Opens the JSON-file with the sessions
    file_pointer = open("sessions_list.json")

    # Reads the JSON-file with the sessions
    data = json.load(file_pointer)
    
    for session in data:
        if session["link"] == session_link:
            return session["reference"]
    
    # Raises an exception if no reference link could be found
    raise NoValueError("Could not find a linking reference for: " + session_link)

In [None]:
'''
Function geth the link to a session page which links to the proposal
'''
def get_reference_link_by_proposal_link(propopsal_link):
    # Opens the JSON-file with the sessions
    file_pointer = open("sessions_details_list.json")
    
    # Reads the JSON-file with the sessions
    data = json.load(file_pointer)
    
    for session in data:
        # Checks if the session contains an agenda
        if 'agenda' in session.keys():
            
            # Iterates through the agenda items
            for agenda_item in session["agenda"]:
                
                # Returns the session link with further information as a reference link for the proposal
                if agenda_item["url_to_proposal"] == propopsal_link:
                    return session["reference"], session["reference_archive_link"], session["reference_access_date"]
                
    # Raises an exception if no reference link could be found
    raise NoValueError("Could not find a linking reference for: " + propopsal_link)

In [None]:
'''
Function returns the login credentials from the config file
'''
def get_login_credentials():
    file_pointer = open("config.json")
    
    # Reads the data from the config file
    data = json.load(file_pointer)
    
    return data["login_name"], data["login_password"]

In [None]:
'''
Function return the server api address credentials from the config file 
'''
def get_server_address_config():
    file_pointer = open("config.json")
    
    # Reads the data from the config file
    data = json.load(file_pointer)
    
    return data["mediawiki_api"], data["sparql_endpoint_url"], data["wikibase_url"]

In [None]:
'''
Function get the reference
'''
def get_reference_parlamentsinfosystem(reference_url, archive_url, retrieved_at):
    reference = []
    
    # Adds the source of the reference
    source = String(value="Parlamentsinformationssystem der Stadt Gießen", prop_nr="P25")
    reference.append(source)                               
                
    # Adds the url of the reference
    reference_url = String(value=reference_url, prop_nr="P12")
    reference.append(reference_url)
                
    # Adds the archive url of the reference
    archive_url = String(value=archive_url, prop_nr="P14")
    reference.append(archive_url)
                
    # Adds the retrieved date of the reference
    retrieved_date = datetime.fromtimestamp(mktime(time.strptime(retrieved_at, "%d.%m.%Y")))
    retrieved_date = retrieved_date.strftime("+%Y-%m-%dT%H:%M:%SZ")
    retrieved_at = Time(time=retrieved_date, prop_nr="P16", precision=WikibaseDatePrecision.DAY, references=references)
    reference.append(retrieved_at)
                
    return reference

## Schritt 1: Ermittelt alle Gremiensitzungen

In [None]:
list_of_all_sessions = []

for year in range(2000,2023):
    for month in range(1,13):
        # Tries to fetch all sessions from the month
        try:
            list_of_all_sessions = list_of_all_sessions + getSessions(year, month)
        except BaseException:
            print("Could not fetch sessions from month " + str(month) + " of year " + str(year))
        
        # Waits a random time to prevent DDOS protection is triggered
        waiting_time = random.randint(10,60)
        time.sleep(waiting_time)

# Saves all sessions in a JSON file
with open('sessions_list.json', 'w', encoding='utf-8') as f:
    json.dump(list_of_all_sessions, f, ensure_ascii=False, indent=4)

## Schritt 2: Ermittelt die Details zu den Gremiensitzungen und zugehörige Anträge

In [None]:
# Opens the JSON-file with the sessions
file_pointer = open("sessions_list.json")

# Reads the JSON-file with the sessions
data = json.load(file_pointer)

list_of_all_sessions = []

# Iterates through the sessions
for session in data:
    
    print("Get information from " + session["session_name"] + " (" + session["session_date"] + ") ...")
    
    # Checks, if the session has a link
    if 'link' in session.keys():
        try:
            list_of_all_sessions.append(get_session_details(session['link']))
        except BaseException:
            print("Could not get the session details of: " + session['link'])
          
        # Waits a random time to prevent DDOS protection is triggered
        waiting_time = random.randint(0, 60)
        time.sleep(waiting_time)
    else:
        list_of_all_sessions.append(session)
        
        
#Unique list of proposal links
list_of_proposal_links = []


# TODO: Fix comparisons of the links
for session in list_of_all_sessions:
    if 'agenda' in session.keys():
        for agenda_item in session["agenda"]:
            if agenda_item["url_to_proposal"] not in list_of_proposal_links:
                list_of_proposal_links.append({"url_to_proposal":agenda_item["url_to_proposal"]})


# Saves the sessions with the details in a JSON-file
with open('sessions_details_list.json', 'w', encoding='utf-8') as file_pointer:
    json.dump(list_of_all_sessions, file_pointer, ensure_ascii=False, indent=4)
    
# Saves the links of the proposal in a JSON-file
with open('proposal_link_list.json', 'w', encoding='utf-8') as file_pointer:
    json.dump(list_of_proposal_links, file_pointer, ensure_ascii=False, indent=4)

## Schritt 3: Ermittelt alle Details zu den einzelnen Einträgen

In [None]:
# Opens the JSON-file with the proposals links
file_pointer = open("proposal_link_list.json")

# Reads the data from the JSON-file with the proposal links
data = json.load(file_pointer)

list_of_all_proposals_with_details = []

for proposal in data:
    try:
        proposal_with_details = get_proposal_details(proposal["url_to_proposal"])
        list_of_all_proposals_with_details.append(proposal_with_details)
    except BaseException:
        print("Error: Could not get details of the proposal: " + proposal["url_to_proposal"])
    
    # Waits a random time to prevent DDOS protection is triggered
    waiting_time = random.randint(0, 30)
    time.sleep(waiting_time)
    
with open('proposals_with_details.json', 'w', encoding='utf-8') as file_pointer:
    json.dump(list_of_all_proposals_with_details, file_pointer, ensure_ascii=False, indent=4)

## Schritt 4: Erstellung einer JSON-Datei für den Antrags-Import

In [None]:
# Opens the JSON-file with the sessions
file_pointer = open("proposals_with_details.json")

# Reads the JSON-file with the sessions
data = json.load(file_pointer)


# Saves the processed sessions for the import
processed_proposals_for_import = []


# Iterates through the saved proposals
for proposal in data:
    processed_proposal = {}
    
    # Generates the name of the item
    if 'proposal_subject' in proposal.keys():
        processed_proposal["name_of_item"] = proposal["proposal_subject"].strip().replace("\t"," ")
    else:
        processed_proposal["name_of_item"] = ""
        continue
        
    
    # Added the category as "Instanz von" property
    processed_proposal["instance_of"] = {
                                         "value": "Vorlage für städtisches Gremium"
                                        }
    
    # Adds the proposal subject as "Betreff" property
    if 'proposal_subject' in proposal.keys():
         processed_proposal["subject"] = {
                                          "value": proposal["proposal_subject"].strip().replace("\t"," "),
                                          "reference_url": proposal["reference_url"].strip(),
                                          "archive_url": proposal["reference_archive_link"].strip(),
                                          "retrieved_at": proposal["reference_access_date"].strip() 
                                         }
    
    # Adds the proposal date as "Antragsdatum" property
    if 'proposal_date' in proposal.keys():
        processed_proposal["date"] = {
                                      "value": proposal["proposal_date"].strip().replace("\t"," "),
                                      "reference_url": proposal["reference_url"].strip(),
                                      "archive_url": proposal["reference_archive_link"].strip(),
                                      "retrieved_at": proposal["reference_access_date"].strip() 
                                     }
    
    # Adds the proposal type as "Typ" property
    if 'proposal_type' in proposal.keys():
        processed_proposal["type"] = {
                                      "value": proposal["proposal_type"].strip().replace("\t"," "),
                                      "reference_url": proposal["reference_url"].strip(),
                                      "archive_url": proposal["reference_archive_link"].strip(),
                                      "retrieved_at": proposal["reference_access_date"].strip() 
                                     }
    
    # Adds the proposal number as "Vorlagennummer" property
    if 'proposal_number' in proposal.keys():
        processed_proposal["number"] = {
                                          "value": proposal["proposal_number"].strip().replace("\t"," "),
                                          "reference_url": proposal["reference_url"].strip(),
                                          "archive_url": proposal["reference_archive_link"].strip(),
                                          "retrieved_at": proposal["reference_access_date"].strip() 
                                       }
    
    # Adds the propsal filenumber as "Aktenzeichen" property
    if 'proposal_filenumber' in proposal.keys():
        processed_proposal["filenumber"] = {
                                              "value": proposal["proposal_filenumber"].strip().replace("\t"," "),
                                              "reference_url": proposal["reference_url"].strip(),
                                              "archive_url": proposal["reference_archive_link"].strip(),
                                              "retrieved_at": proposal["reference_access_date"].strip() 
                                           }
        
    # Adds the session.net link as "Session.net Link" property
    if 'session_net_link' in proposal.keys():
        # Gets the reference information for the session.net link
        reference_url, archive_url, retrieved_at = get_reference_link_by_proposal_link(proposal["session_net_link"])
        processed_proposal["session.net_link"] = {
                                                    "value": proposal["session_net_link"].strip(),
                                                    "reference_url": reference_url.strip(),
                                                    "archive_url": archive_url.strip(),
                                                    "retrieved_at": retrieved_at.strip() 
                                                 }
    
    
    # Adds the processed proposal to the list of processed proposals
    processed_proposals_for_import.append(processed_proposal)


with open('proposals_import.json', 'w', encoding='utf-8') as file_pointer:
    json.dump(processed_proposals_for_import, file_pointer, ensure_ascii=False, indent=4)

## Schritt 5: Erstellung einer JSON-Datei für den Gremiensitzungs-Import

In [None]:
# Opens the JSON-file with the sessions
file_pointer = open("sessions_details_list.json")

# Reads the JSON-file with the sessions
data = json.load(file_pointer)


# Saves the processed sessions for the import
processed_sessions_for_import = []


# Iterates through the saved sessions
for session in data:
    processed_session = {}
    
    # Generates the name of the item
    processed_session["name_of_item"] = session["session_name"] + " (" + session["session_date"] + ")"
    
    # Added the category as "instenace of" property
    processed_session["instance_of"] = {
                                        "value": "Gremiensitzung der Stadt"
                                       }
    
    # Added the session id as "" property
    if 'session_id' in session.keys():
        processed_session["session_number"] = {
                                                "value": session["session_id"].strip(),
                                                "reference_url": session["reference"],
                                                "archive_url": session["reference_archive_link"],
                                                "retrieved_at": session["reference_access_date"] 
                                              }
    
    
    # Added the date of the session as "Datum" property
    if 'session_date' in session.keys():
        processed_session["date"] = {
                                          "value": session["session_date"].strip(),
                                          "reference_url": session["reference"],
                                          "archive_url": session["reference_archive_link"],
                                          "retrieved_at": session["reference_access_date"] 
                                         }
    
    # Added the start time of the session as "Anfangszeit" property
    if 'session_starttime' in session.keys():
        processed_session["starttime"] = {
                                          "value": session["session_starttime"].strip(),
                                          "reference_url": session["reference"],
                                          "archive_url": session["reference_archive_link"],
                                          "retrieved_at": session["reference_access_date"] 
                                         }

    # Added the end time of the session as "Endzeit" property
    if 'session_endtime' in session.keys():
        processed_session["endtime"] = {
                                          "value": session["session_endtime"].strip(),
                                          "reference_url": session["reference"],
                                          "archive_url": session["reference_archive_link"],
                                          "retrieved_at": session["reference_access_date"] 
                                         }
        
    # Added the session link as "Session.Net Link" property
    if 'link' in session.keys():
        value, reference_url, archive_url, retrieved_at = get_reference_linkinfo_by_session_link(session["reference_url"])
        processed_session["link"] = {
                                          "value": session["link"].strip(),
                                          "reference_url": reference_url, 
                                          "archive_url": archive_url,
                                          "retrieved_at": retrieved_at 
                                         }
    
    # Added the agenda items as "Tagesordnungspunkte" property                                                                                             
    if "agenda" in session.keys() and len(session["agenda"]) > 0:
        processed_session["agenda"] = []
        
        for item in session["agenda"]:
            processed_item = {
                                "public_status": item["public_status"],
                                "url": item["url_to_proposal"],
                                "reference_url": session["reference"],
                                "archive_url": session["reference_archive_link"],
                                "retrieved_at": session["reference_access_date"] 
                             }
            
            if 'order' in item:
                processed_item["order"] = item["order"]
            
            processed_session["agenda"].append(processed_item)
        
    
    processed_sessions_for_import.append(processed_session)
    
                                                                                                 
with open('session_import.json', 'w', encoding='utf-8') as file_pointer:
    json.dump(processed_sessions_for_import, file_pointer, ensure_ascii=False, indent=4)

## Schritt 6: Import der Daten in die Wikibase-Datenbank

In [None]:
# Gets the login and the password for the wikibase database
login, password = get_login_credentials()

# Gets the config vailes für the wikibase server
mediawiki_api, sparql_endpoint_url, wikibase_url = get_server_address_config()

wbi_config['MEDIAWIKI_API_URL'] = mediawiki_api
wbi_config['SPARQL_ENDPOINT_URL'] = sparql_endpoint_url
wbi_config['WIKIBASE_URL'] = wikibase_url

# Creates wikibase integrator instance
login_instance = wbi_login.Clientlogin(user=login, password=password)
wikiBaseIntegrator = WikibaseIntegrator(login=login_instance)

In [None]:
# Saves the item ids by given item names
item_ids = {
            "proposal_category": "Q48",
            "session_category": "Q1066"
            }


# Saves the property ids by given property names
property_ids = {
                 "instance_of": "P15",
                 "propsal_date": "P20",
                 "propsal_type": "P24",
                 "propsal_number": "P22",
                 "filenumber": "P21",
                 "session_net_link": "P23",
                 "propsal_subject": "P26",
    
                 "session_number": "P28",
                 "session_date": "P27",
                 "session_starttime": "P29",
                 "session_endtime": "P30",
    
                 "agenda_item": "P31",
                 "public_state": "P32",
                 "order": "P33"
}

### Import der Anträge in die Wikibase-Datenbank

In [None]:
# Opens the JSON-file with the sessions
file_pointer = open("proposals_import.json")

# Reads the JSON-file with the sessions
data = json.load(file_pointer)


# Proposal counter
counter = 0

# Saves the mapping of proposal link and item id 
map_between_proposal_and_item_id = {}


# Iterates through the proposals
for proposal in data:
    counter = counter + 1
    
    # Display the progress message
    print('Importing {} of {} ...'.format(str(counter), str(len(data))))
    
    # Creates a new item for the wikibase database
    item = wikiBaseIntegrator.item.new()
    
    # Saves the properties of the item
    properties = []
    
    print(proposal["name_of_item"])
    
    # Set a german label of the session or abort the processing
    if 'name_of_item' in proposal.keys():
        item.labels.set(language='de', value=proposal["name_of_item"][0:250])
    else:
        print("Item can not created because session has no name")
        continue
    
    
    # Add the category "Gremiensitzung der Stadt Gießen" as property
    instance_of_property = Item(value=item_ids["proposal_category"], prop_nr=property_ids["instance_of"])
    properties.append(instance_of_property)
    
    
    for key in proposal.keys():
        property_for_proposal = None
        
        if key == "date":
            # Build reference from the parlamentsinfo system
            references = [get_reference_parlamentsinfosystem(proposal[key]["reference_url"], proposal[key]["archive_url"], proposal[key]["retrieved_at"])]

            proposal_date = datetime.fromtimestamp(mktime(time.strptime(proposal[key]["value"], "%d.%m.%Y")))
            proposal_date = proposal_date.strftime("+%Y-%m-%dT%H:%M:%SZ")
            property_for_proposal = Time(time=proposal_date, prop_nr=property_ids["propsal_date"], precision=WikibaseDatePrecision.DAY, references=references)
        
        elif key == "subject":
            # Build reference from the parlamentsinfo system
            references = [get_reference_parlamentsinfosystem(proposal[key]["reference_url"], proposal[key]["archive_url"], proposal[key]["retrieved_at"])]

            property_for_proposal = String(value=proposal[key]["value"][0:400], prop_nr=property_ids["propsal_subject"], references=references)

        elif key == "type":
            # Build reference from the parlamentsinfo system
            references = [get_reference_parlamentsinfosystem(proposal[key]["reference_url"], proposal[key]["archive_url"], proposal[key]["retrieved_at"])]

            property_for_proposal = String(value=proposal[key]["value"], prop_nr=property_ids["propsal_type"], references=references)
        
        elif key == "number":
            # Build reference from the parlamentsinfo system
            references = [get_reference_parlamentsinfosystem(proposal[key]["reference_url"], proposal[key]["archive_url"], proposal[key]["retrieved_at"])]

            property_for_proposal = String(value=proposal[key]["value"], prop_nr=property_ids["propsal_number"], references=references)
        
        elif key == "filenumber":
            # Build reference from the parlamentsinfo system
            references = [get_reference_parlamentsinfosystem(proposal[key]["reference_url"], proposal[key]["archive_url"], proposal[key]["retrieved_at"])]

            property_for_proposal = String(value=proposal[key]["value"], prop_nr=property_ids["filenumber"], references=references)
        
        elif key == "session.net_link":
            # Build reference from the parlamentsinfo system
            references = [get_reference_parlamentsinfosystem(proposal[key]["reference_url"], proposal[key]["archive_url"], proposal[key]["retrieved_at"])]

            property_for_proposal = URL(value=proposal[key]["value"], prop_nr=property_ids["session_net_link"], references=references)
        
        else:
            continue
            
     
        # Add the property to the property list
        properties.append(property_for_proposal)
        
    
    # Add the properties of the property list to the session item
    item.claims.add(properties)
    
    # Writing the item into wikibase database
    item.write()
    
    # Saves the Item ID from the write result
    result = item.get_json()
    
    
    url = proposal["session.net_link"]["value"]
    qid = result["id"]
    
    if qid not in map_between_proposal_and_item_id:
        map_between_proposal_and_item_id[url] = qid
        
    
with open('mapping_proposal_item.json', 'w', encoding='utf-8') as file_pointer:
    json.dump(map_between_proposal_and_item_id, file_pointer, ensure_ascii=False, indent=4)

### Import der Gremiensitzungen in die Wikibase-Datenbank

In [None]:
# Opens the JSON-file with the sessions
file_pointer = open("session_import.json")

# Reads the JSON-file with the sessions
data = json.load(file_pointer)


# Session counter
counter = 0

# Gets the mapping of proposal link and item id
file_pointer = open("mapping_proposal_item.json")
map_between_proposal_and_item_id = json.load(file_pointer)


for session in data:
    counter = counter + 1
    
    # Display the progress message
    print('Importing {} of {}: {}'.format(str(counter), str(len(data)), session["name_of_item"]))
    
    # Creates a new item for the wikibase database
    item = wikiBaseIntegrator.item.new()
    
    # Saves the properties of the item
    properties = []
    
    
    # Set a german label of the session or abort the processing
    if 'name_of_item' in session.keys():
        item.labels.set(language='de', value=session["name_of_item"])
    else:
        print("Item can not created because session has no name")
        continue
    
    
    # Add the category "Gremiensitzung der Stadt Gießen" as property
    instance_of_property = Item(value=item_ids["session_category"], prop_nr=property_ids["instance_of"])
    properties.append(instance_of_property)
        
        
    for key in session.keys():
        property_for_session = None               
        
        if key == "session_number":
            # Build reference from the parlamentsinfo system
            references = [get_reference_parlamentsinfosystem(session[key]["reference_url"], session[key]["archive_url"], session[key]["retrieved_at"])]

            property_for_session = String(value=session[key]["value"], prop_nr=property_ids["session_number"], references=references)

        elif key == "date":
            # Build reference from the parlamentsinfo system
            references = [get_reference_parlamentsinfosystem(session[key]["reference_url"], session[key]["archive_url"], session[key]["retrieved_at"])]

            session_date = datetime.fromtimestamp(mktime(time.strptime(session["date"]["value"], "%d.%m.%Y")))
            session_date = session_date.strftime("+%Y-%m-%dT%H:%M:%SZ")
            property_for_session = Time(time=session_date, prop_nr=property_ids["session_date"], precision=WikibaseDatePrecision.DAY, references=references)
        
        elif key == "starttime":
            # Build reference from the parlamentsinfo system
            references = [get_reference_parlamentsinfosystem(session[key]["reference_url"], session[key]["archive_url"], session[key]["retrieved_at"])]

            property_for_session = String(value=session[key]["value"], prop_nr=property_ids["session_starttime"], references=references)
            
        elif key == "endtime":
            # Build reference from the parlamentsinfo system
            references = [get_reference_parlamentsinfosystem(session[key]["reference_url"], session[key]["archive_url"], session[key]["retrieved_at"])]

            property_for_session = String(value=session[key]["value"], prop_nr=property_ids["session_endtime"], references=references)
        
        elif key == "agenda":
           
            for agenda_item in session[key]:
                # Build reference from the parlamentsinfo system
                references = [get_reference_parlamentsinfosystem(agenda_item["reference_url"], agenda_item["archive_url"], agenda_item["retrieved_at"])]

                # Build the qualifiers for the agenda topics
                public_state = None
                if agenda_item["public_status"] == True:
                    public_state = "öffentlich"
                else:
                    public_state = "nicht öffentlich"
                
                qualifiers = [
                                String(value=str(agenda_item["order"]), prop_nr=property_ids["order"]),
                                String(value=public_state, prop_nr=property_ids["public_state"])
                             ]
                
                url = agenda_item["url"]
                
                if url not in map_between_proposal_and_item_id:
                    print("Error: Could not get proposal as item!")
                    continue
                
                qid = map_between_proposal_and_item_id[url]
                property_for_session = Item(value=qid, prop_nr=property_ids["agenda_item"], references=references, qualifiers=qualifiers)
                properties.append(property_for_session)
                
            continue
        
        elif key == "session.net_link":
            # Build reference from the parlamentsinfo system
            references = [get_reference_parlamentsinfosystem(session[key]["reference_url"], session[key]["archive_url"], session[key]["retrieved_at"])]

            property_for_session = URL(value="", prop_nr=property_ids["session_net_link"], references=references)
        
        else:
            continue
        
        # Add the property to the property list
        properties.append(property_for_session)
    
    
    # Add the properties of the property list to the session item
    item.claims.add(properties)
    
    # Writing the item into wikibase database
    item.write()