In [None]:
import rdflib
from rdflib import URIRef
import tqdm

ldQuery = rdflib.Graph()

ldResult = ldQuery.query("""
   PREFIX  rdf:  <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    PREFIX  iospress-dt: <http://ld.iospress.nl/rdf/datatype/>
    PREFIX  xsd:  <http://www.w3.org/2001/XMLSchema#>
    PREFIX  rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX  iospress: <http://ld.iospress.nl/rdf/ontology/>
    PREFIX  iospress-geocode: <http://ld.iospress.nl/rdf/geocode/>

SELECT ?title ?pubDate ?type ?issueOrBook ?abstract ?paperID ?doi ?doiURL ?authors ?editors ?preprintDate ?pageStart ?pageEnd ?articleIssueNumber ?keywords ?sourceFile
      WHERE {
      SERVICE <http://ld.iospress.nl/sparql> {
      {
        { ?paper  rdf:type  iospress:Chapter }
        UNION
        { ?paper  rdf:type  iospress:Article }
        ?paper rdfs:label ?title.
        ?paper iospress:publicationDate  ?pubDate.
        ?paper rdf:type ?type.
        ?paper iospress:partOf ?issueOrBook.
        ?paper iospress:publicationAbstract ?abstract.
        ?paper iospress:id ?paperID.
        ?paper iospress:publicationDoi ?doi.
        ?paper iospress:publicationDoiUrl ?doiURL.
        ?paper iospress:publicationAuthorList ?authors.
        ?paper iospress:publicationEditorList ?editors.
        ?paper iospress:publicationPreprintDate ?preprintDate.
        ?paper iospress:publicationPageStart ?pageStart.
        ?paper iospress:publicationPageEnd ?pageEnd.
        ?paper iospress:articleInIssue ?articleIssueNumber.
        ?paper iospress:publicationIncludesKeyword ?keywords.
        ?paper iospress:sourceFile ?sourceFile.

               ?issueOrBook iospress:partOf ?volumeOrSerie.
            ?volumeOrSerie iospress:partOf <http://ld.iospress.nl/rdf/artifact/sw>

      }}}
      LIMIT 50
""")


In [None]:
# USED TO CREATE EXPORT.TXT

export = open(file='export.txt',mode='a')
exportList = []
for row in ldResult:
	if exportList.__contains__(row.keywords):
		continue
	else:
		exportList.append(row.keywords)
		export.write(f"{row.keywords}\n")
		print(row.keywords)
print(f"Number of Results: {len(ldResult)}")

In [None]:
# RECOMMEND YOU LIMIT QUERY RESULTS FROM LDCONNECT to 50 BEFORE RUNNING

from wikibaseintegrator import wbi_login, WikibaseIntegrator, wbi_helpers
from wikibaseintegrator.datatypes import ExternalID, String, Quantity, Time, MonolingualText, GlobeCoordinate, Item
from wikibaseintegrator.wbi_config import config as wbi_config
from wikibaseintegrator.wbi_enums import ActionIfExists
import getpass

#import logging
#logging.basicConfig(level=logging.DEBUG)

#for progress bars
from tqdm.notebook import trange, tqdm

#Wikidata User Agent - update for production?
wbi_config['USER_AGENT'] = 'AndrewEells (https://www.wikidata.org/wiki/User:AndrewEells)'

#setting up test.wikidata.org
#delete to point to PRODUCTION wikidata
wbi_config['MEDIAWIKI_API_URL'] = 'https://test.wikidata.org/w/api.php'
wbi_config['WIKIBASE_URL'] = 'https://test.wikidata.org'


user = input("Your Wikidata UserName: ")
password = getpass.getpass(prompt="Your Wikidata password: ")

login_instance = wbi_login.Clientlogin(user=user, password=password)

#uncomment PRODUCTION lines and comment out corresponding lines

#PRODUCTION - wd_publishedIn = Item(value='Q15817015',prop_nr='P1433')
wd_publishedIn = Item(value='Q213541',prop_nr='P149')


wbi = WikibaseIntegrator(login=login_instance)


# used to de-dupe results from LDConnect
paperIDs = []


#loop through query results
for row in tqdm(ldResult, desc='overall progress'):
	#print(row.paperID)


	if paperIDs.__contains__(row.paperID):
		pass
	else:
		paperIDs.append(row.paperID)

		existingEntries = wbi_helpers.search_entities(f'{row.title}')

		if len(existingEntries) > 1:
			multipleEntries = open('entriesToCombine.txt','a')
			multipleEntries.write(f'{row.paperID}, {row.title}, {existingEntries}')
			continue
			print(f"MULTIPLES --- {row.paperID} --- {row.title}")

		elif len(existingEntries) == 0:
			item = wbi.item.new()
			print(f"NEW ITEM --- {row.paperID} --- {row.title}")

		else:
			item = wbi.item.get(existingEntries[0])
			print(f"UPDATED EXISTING --- {row.paperID} --- {row.title}")


		################################################################
		# TITLE
		################################################################


		#PRODUCTION - wd_title = MonolingualText(f"{row.title}",'en', prop_nr='P1476')
		wd_title = MonolingualText(f"{row.title}",'en', prop_nr='P77107')

		item.claims.add(claims=wd_title)



		################################################################
		# PAGES STRING
		################################################################


		pagesString = f"{row.pageStart}-{row.pageEnd}"

		#PRODUCTION - wd_Pages = String(pagesString, prop_nr='P304',references=[wd_publishedIn])
		wd_Pages = String(pagesString, prop_nr='P619',references=[wd_publishedIn])

		item.claims.add(wd_Pages,action_if_exists=ActionIfExists.APPEND_OR_REPLACE)


		################################################################
		# NUMPAGES
		################################################################

		numPages = int(row.pageEnd) - int(row.pageStart)

		#PRODUCTION - wd_numPages = Quantity(numPages, prop_nr='P1104', references=[wd_publishedIn])
		wd_numPages = Quantity(numPages, prop_nr='P69', references=[wd_publishedIn])

		item.claims.add(wd_numPages,action_if_exists=ActionIfExists.APPEND_OR_REPLACE)



		################################################################
		# PUB DATE
		################################################################


		#PRODUCTION - wd_pubDate = Time(f"+{row.pubDate}T00:00:00Z", prop_nr="P577", references=[wd_publishedIn])
		wd_pubDate = Time(f"+{row.pubDate}T00:00:00Z", prop_nr="P761", references=[wd_publishedIn])

		item.claims.add(wd_pubDate,action_if_exists=ActionIfExists.APPEND_OR_REPLACE)


		################################################################
		# KEYWORDS
		################################################################


		#PRODUCTION - wd_keyWords = String(value=row.keywords, prop_nr="P921", references=[wd_publishedIn])
		#wd_keyWords = String(value=row.keywords, prop_nr="P96524", references=[wd_publishedIn])

		#item.claims.add(wd_keyWords,action_if_exists=ActionIfExists.REPLACE_ALL)


		################################################################
		# DOI
		################################################################


		#PRODUCTION - wd_doi = ExternalID(f"{row.doi}", prop_nr="P356", references=[wd_publishedIn])
		wd_doi = ExternalID(f"{row.doi}", prop_nr="P97017", references=[wd_publishedIn])

		item.claims.add(wd_doi,action_if_exists=ActionIfExists.APPEND_OR_REPLACE)


		################################################################
		# PUBLISHED IN
		################################################################


		item.claims.add(wd_publishedIn,action_if_exists=ActionIfExists.APPEND_OR_REPLACE)


		################################################################
		# ISSUE NUMBER
		################################################################


		articleIssueNumber = f"{row.articleIssueNumber}".split('-')[-1]

		#PRODUCTION - wd_issueNumber = String(value=articleIssueNumber, prop_nr="P433")
		wd_issueNumber = String(value=articleIssueNumber[1::], prop_nr="P618",references=[wd_publishedIn])

		item.claims.add(wd_issueNumber,action_if_exists=ActionIfExists.APPEND_OR_REPLACE)


		################################################################
		# VOLUME NUMBER
		################################################################


		articleVolumeNumber = f"{row.articleIssueNumber}".split('-')[-2]

		#PRODUCTION - wd_volumeNumber = String(value=articleVolumeNumber, prop_nr="P478")
		wd_volumeNumber = String(value=articleVolumeNumber[1::], prop_nr="P617")

		item.claims.add(wd_volumeNumber,action_if_exists=ActionIfExists.APPEND_OR_REPLACE)


		################################################################
		# AUTHORS
		################################################################


		authorList = []
		authors = rdflib.Graph()
		authors.parse(f"{row.authors}")


		for s, p, o in authors:
			authorOrder = f"{p}".split('_')[-1]
			#print(f"author number: {authorOrder}")

			#PRODUCTION - wd_seriesOrdinal = String(value=f"{authorOrder}", prop_nr="P1545")
			wd_seriesOrdinal = String(value=f"{authorOrder}", prop_nr="P551")
			authorInfo = rdflib.Graph()
			authorInfo.parse(o)

			authorFirstName = authorInfo.value(URIRef(o), URIRef('http://ld.iospress.nl/rdf/ontology/contributorFirstName'),
											   None)
			authorLastName = authorInfo.value(URIRef(o), URIRef('http://ld.iospress.nl/rdf/ontology/contributorLastName'),
											  None)

			#PRODUCTION - wd_authorString = String(value=f"{authorFirstName} {authorLastName}", prop_nr="P2093", references=[wd_publishedIn], qualifiers=[wd_seriesOrdinal])
			wd_authorString = String(value=f"{authorFirstName} {authorLastName}", prop_nr="P80807", references=[wd_publishedIn], qualifiers=[wd_seriesOrdinal])

			item.claims.add(wd_authorString,action_if_exists=ActionIfExists.APPEND_OR_REPLACE)


		item.labels.set(language='en', value=f'{row.title}')
		item.descriptions.set(language='en',value=f"A journal article published in the Semantic Web Journal, primarily concerned with {row.keywords.lower()}.",action_if_exists=ActionIfExists.REPLACE_ALL)
		item.write()
		#print(item)