In [None]:
import requests
from os import startfile
from os import path

"""
Created by Kristina Savickaja for Enriching Audiovisual Encyclopedias project (Digital Humanities in Practice).
Code extracts WikiData item information via SPARQL query.

Info:
1. File will be created in directory where script is run.
2. Header (first line) is for properties. Following lines are for values. 
3. Sometimes value displays as 'null' when opened in MS Excel. If you import it, it will disappear because 'null' is an empty cell.
4. Output is a csv-file in UTF-8.
5. Uses comma (,) to separate values with different properties and semicolon (;) for values with same properties.
6. Puts all values in "" to prevent escape.
7. Asks for user input.
"""

file = "WD_actors.csv"

def newFile(query_resp):
    ### Header
    # List to store properties
    listProps = []

    # Loops through query response and adds properties to listProps, prevents doubles
    for item in query_resp:
        query_resp2 = item['propLabel']['value']
        if query_resp2 not in listProps:
            listProps.append(query_resp2)

    # Adds "" to prevent escape
    listProps2 = []
    for item in listProps:
        item = '"' + item + '"'
        listProps2.append(item)

    # List to writeString
    writeString = ','.join(listProps2)

    # Adds name to writeString
    writeString = '"naam"' + ',' + writeString

    # Write writeString with properties to a file
    outF = open(file, "a", encoding="utf-8")
    outF.write(writeString)
    outF.write("\n")
    outF.close()
    ### End of Header

    ### Values
    # Writes the name (input) before writing the rest of values
    outF6 = open(file, "a", encoding="utf-8")
    outF6.write('"' + a + '"' + ",")
    outF6.close()

    # Loops over listProps and query_resp. If Listprops == (prop of query_resp) it will add the value to the list and write when
    # all prop's of query_resp have been compared to listProps.
    for props in listProps:
        writeList = []
        for item in query_resp:
            if (item['propLabel']['value']) == props:
                item2 = item['valueLabel']['value']
                # appends item to list because 1 prop can have multiple values
                writeList.append(item2)

        # Convert list to string with ; between values
        writeString3 = ';'.join(writeList)

        # Write string to file and adds "" to prevent escape
        outF7 = open(file, "a", encoding = "utf-8")
        outF7.write('"' + writeString3 + '"')
        outF7.close()

        # Write comma to separate values by prop
        outF8 = open(file, "a", encoding = "utf-8")
        outF8.write(",")
        outF8.close()

    #Create a new line for the next query
    outF = open(file, "a", encoding = "utf-8")
    outF.write("\n")
    outF.close()

    ### End of Values

def addToFile(query_resp):
    ### Read from file
    with open(file) as f:
        lines = f.readlines()

    # Selects first line and put in string variable headerline
    headerline = lines[0]

    # Slices out header, lines is the data/values
    lines = lines[1:]

    # Decodes headerline from file (utf-8) and encode back as windows-1252 to prepare for comparison
    headerline = headerline.encode("windows-1252").decode("utf-8")

    # Removes all \n from str
    headerline = headerline.replace('\n', '')

    # Converts headerline from str to List with delimitor
    headerlinelist = headerline.split(",")

    # Removes first element (name) from list to prepare for comparison
    headerlinelist.pop(0)

    ### End of read from file

    ### Header
    # Gets all props from query and adds "" to prevent escape
    listProps = []
    for item in query_resp:
        query_resp2 = item['propLabel']['value']
        if query_resp2 not in listProps:
            query_resp2 = '"' + query_resp2 + '"'
            listProps.append(query_resp2)

    # loops through query prop list and if item is not in file header, appends
    for item in listProps:
        if item not in headerlinelist:
            headerlinelist.append(item)

    # Adds back name to the beginning of headerlinelist
    headerlinelist.insert(0, '"naam"')

    # Writes headerlinelist to file
    writeString = ','.join(headerlinelist)
    outF = open(file, "w", encoding = "utf-8")
    outF.write(writeString)
    outF.write("\n")
    outF.close()

    ### End of Header

    # Write values back to file
    for line in lines:
        line = line.encode("windows-1252").decode("utf-8")
        outF2 = open(file, "a", encoding = "utf-8")
        outF2.write(line)
        outF2.close()

    ### Data write from query
    # Writes the name before writing the rest of values
    outF4 = open(file, "a", encoding="utf-8")
    outF4.write('"' + a + '"' + ",")
    outF4.close()

    # Remove name prop from list to prepare for comparison
    headerlinelist.pop(0)

    for props in headerlinelist:
        writeList = []
        for item in query_resp:
            # Adds "" to properties from query to prepare for comparison
            itemProp = '"' + (item['propLabel']['value']) + '"'
            if itemProp == props:
                itemValue = (item['valueLabel']['value'])
                # Appends item to list because 1 prop can have multiple values
                writeList.append(itemValue)

        # Converts list to string with ; between values
        writeString3 = ';'.join(writeList)

        # Writes string to file
        outF7 = open(file, "a", encoding = "utf-8")
        # Adds "" to prevent escape
        outF7.write('"' + writeString3 + '"')
        outF7.close()

        # Write comma to separate props
        outF8 = open(file, "a", encoding = "utf-8")
        outF8.write(",")
        outF8.close()
        
    # Write new line for next query
    outF8 = open(file, "a", encoding = "utf-8")
    outF8.write("\n")
    outF8.close()

    print("Query written.")

    ### End of Data write from query


### SPARQL, can be modified for a more accurate search (i.e. specify "singer", ask for a URI)
a = input("Enter name (e.g 'Annie_M.G._Schmidt'): ")
sparql1 = """SELECT DISTINCT ?property ?propLabel ?valueLabel WHERE {<https://nl.wikipedia.org/wiki/"""
sparql2 = """{}""".format(a)
sparql3 = """> schema:about ?item .?item ?property ?value .hint:Query hint:optimizer "None" .?prop wikibase:directClaim ?property .SERVICE wikibase:label { bd:serviceParam wikibase:language "nl" }}"""

# Combines all strings into request
sparqlQ = sparql1 + sparql2 + sparql3

# Specifies URL to query, and queries
url = 'https://query.wikidata.org/sparql'
r = requests.get(url, params={'format': 'json', 'query': sparqlQ})

# Dictionary return from SPARQL request in dictionary request_dict
try:
    request_dict = r.json()
except Exception as e:
    print("JSON error, check if input is correct. Errow follows. Exiting.")
    print(e)
    exit(1)

# Browse
query_resp = (request_dict['results']['bindings'])

# Checks if list is empty
if not query_resp:
    print("Query returned empty list variable 'query_resp'. Please check if page exists. Exiting.")
    exit(1)

### End of SPARQL

if path.exists(file):
    addToFile(query_resp)
else:
    newFile(query_resp)