In [3]:
# (c) 2021 Vanderbilt University. 
# This program is released under a GNU General Public License v3.0 http://www.gnu.org/licenses/gpl-3.0
# Author: Steve Baskauf
# 2021-01-17

import requests
import json
import csv
import sys # Read CLI arguments
import datetime
from fuzzywuzzy import fuzz
from fuzzywuzzy import process

#import os
#from time import sleep
#from pathlib import Path

# Use the following code for a stand-alone script if you want to pass in a value (e.g. file path) when running
# the script from the command line. If no arguments are passed, the "else" value will be used.

if len(sys.argv) == 2: # if exactly one argument passed (i.e. the configuration file path)
    file_path = sys.argv[1] # sys.argv[0] is the script name
else:
    file_path = 'file.csv'

# ----------------
# File IO
# ----------------

# Many functions operate on a list of dictionaries, where each item in the list represents a spreadsheet row
# and each column is identified by a dictionary item whose key is the column header in the spreadsheet.
# The first two functions read and write from files into this data structure.

# Read from a CSV file into a list of dictionaries
def read_dicts_from_csv(filename):
    with open(filename, 'r', newline='', encoding='utf-8') as file_object:
        dict_object = csv.DictReader(file_object)
        array = []
        for row in dict_object:
            array.append(row)
    return array

# Write a list of dictionaries to a CSV file
# The fieldnames object is a list of strings whose items are the keys in the row dictionaries that are chosen
# to be the columns in the output spreadsheet. The order in the list determines the order of the columns.
def write_dicts_to_csv(table, filename, fieldnames):
    with open(filename, 'w', newline='', encoding='utf-8') as csv_file_object:
        writer = csv.DictWriter(csv_file_object, fieldnames=fieldnames)
        writer.writeheader()
        for row in table:
            writer.writerow(row)


# Extracts the local name part of an IRI, e.g. a qNumber from a Wikidata IRI
def extract_local_name(iri):
    # pattern is http://www.wikidata.org/entity/Q6386232
    pieces = iri.split('/')
    last_piece = len(pieces)
    return pieces[last_piece - 1]

# To sort a list of dictionaries by a particular dictionary key's values, define the following function
# then invoke the sort using the code that follows

# function to use in sort
def sort_funct(row):
    return row['filename'] # sort by the filename key

'''
output_list.sort(key = sort_funct) # sort by the filename field
'''

print()




In [12]:
works = read_dicts_from_csv('works.csv')
pubs = read_dicts_from_csv('Presentations_Publications_2014-2019.csv')

for pub_index in range(len(pubs)):
    found = False
    for work in works:
        testRatio = fuzz.token_set_ratio(pubs[pub_index]['Title'], work['label_en'])
        ratio = fuzz.ratio(pubs[pub_index]['Title'], work['label_en'])
        partial_ratio = fuzz.partial_ratio(pubs[pub_index]['Title'], work['label_en'])
        sort_ratio = fuzz.token_sort_ratio(pubs[pub_index]['Title'], work['label_en'])
        set_ratio = fuzz.token_set_ratio(pubs[pub_index]['Title'], work['label_en'])
        if ratio >= 90:
            print(testRatio, partial_ratio, sort_ratio,set_ratio)
            print(work['label_en'])
            print(pubs[pub_index]['Title'])
            print()
            found = True
            pubs[pub_index]['pub_qid'] = work['qid']
            break

fieldnames = pubs[0].keys()
write_dicts_to_csv(pubs,'Presentations_Publications_2014-2019.csv' , fieldnames)
print('done')

100 91 100 100
Text mining in business libraries
Text Mining in Business Libraries

100 91 100 100
Data-first manifesto: shifting priorities in scholarly communications
Data-First Manifesto: Shifting Priorities in Scholarly Communications

100 100 100 100
Liberalism versus Democracy? Abraham Kuyper and Carl Schmitt as Critics of Liberalism
Liberalism versus Democracy? Abraham Kuyper and Carl Schmitt as Critics of Liberalism.

100 100 100 100
On Teaching XQuery to Digital Humanists
On Teaching XQuery to Digital Humanists

100 100 100 100
Introduction to Wikidata: The Wikipedia of Facts
Introduction to Wikidata: The Wikipedia of Facts

100 100 100 100
Data Curation 101 for Theological Librarians
Data Curation 101 for Theological Librarians.

100 100 100 100
Building Institutional Repositories in Theological Libraries
Building Institutional Repositories in Theological Libraries

100 100 100 100
Doing Digital Humanities in Theological Libraries
Doing Digital Humanities in Theological Libra