In [1]:
import json, codecs, csv

In [2]:
# Open the CSV containing the content.
infile = codecs.open('../csv/item-records-export.csv','r','utf-8')
item_record_csv = csv.reader(infile,delimiter=',')

# Create an iterable for the script.
item_records = [row for row in item_record_csv]

# Remove the header row.
del item_records[0]

In [3]:
# Establish the data model.
data = {}
data['@context']={}
data['@context']['@base'] = 'https://catalog.digitallatin.org'
data['@context']['dcterms'] = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
data['@context']['frbr'] = 'http://vocab.org/frbr/core#'
data['@context']['madsrdf'] = 'http://www.loc.gov/mads/rdf/v1#'
data['@context']['rdf'] = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
data['@context']['schema'] = 'http://schema.org/'
data['@context']['Title'] = 'dcterms:title'
data['@context']['Author'] = 'dcterms:creator'
data['@context']['Editor'] = 'dcterms:contributor'
data['@context']['Publisher'] = 'dcterms:publisher'
data['@context']['Place'] = 'schema:City'
data['@context']['Date'] = 'dcterms:date'
data['@context']['SourceURI'] = 'dcterms:URI'
data['@context']['Repository'] = 'schema:Library'
data['@context']['Rights'] = 'dcterms:rights'
data['@context']['Type'] = 'dcterms:type'
data['@context']['Format'] = 'dcterms:format'
data['@context']['DLLid'] = 'dcterms:identifier'
data['@context']['References'] = {}
data['@context']['References']['@id'] = 'dcterms:references'
data['@context']['References']['@type'] = '@id'
data['@context']['DLL Author'] = 'frbr:Creator'
data['@context']['DLL Work'] = 'frbr:exemplarOf'
data['@context']['DLL Editor'] = 'dcterms:contributor'
data['@graph'] = []

# Iterate over each item and process it.
for i in item_records:
    dll_link = i[0]
    dll_id = i[1]
    record_title = i[2]
    
    # Handle the author(s) of the item. If there are > 1 values, return those values.
    def author_process():
        author = i[3]
        # If there aren't any values, return an empty value.
        if not author:
            author = ''
            return author

        # If there is > 1 value, the values will be separated by ';'. Process the
        # values as a list, then return the joined list.
        elif ';' in author:
            authors = []
            author_list = author.split(';')
            for author in author_list:
                authors.append(author)
            author = authors
            return author

        # If there is only one author, return it.
        else:
            return author
        
    author = author_process()
    
    # Handle the editor(s) of the item. If there are > 1 values, return those values.
    def editor_process():
        editor = i[4]
        # If there aren't any values, return an empty value.
        if not editor:
            editor = ''
            return editor

        # If there is > 1 value, the values will be separated by ';'. Process the
        # values as a list, then return the joined list.
        elif ';' in editor:
            editors = []
            editor_list = editor.split(';')
            for editor in editor_list:
                editors.append(editor)
            editor = editors
            return editor

        # If there is only one editor, return it.
        else:
            return editor
        
    editor = editor_process()
    
    # Handle the reference(s) to DLL Work(s) and return the appropriate number of values.
    def dll_work_process():
        dll_work = i[5]
        # If there aren't any values, return an empty value.
        if not dll_work:
            dll_work = ''
            return dll_work

        # If there is > 1 value, the values will be separated by ';'. Process the
        # values as a list, then return the joined list.
        elif ';' in dll_work:
            dll_works = []
            dll_work_list = dll_work.split(';')
            for dll_work in dll_work_list:
                dll_works.append(dll_work)
            dll_work = dll_works
            return dll_work

        # If there is only one dll_work, return it.
        else:
            return dll_work
        
    dll_work = dll_work_process()
    
    publisher = i[6]
    place = i[7]
    date = i[8]
    source_uri = i[9]
    repository = i[10]
    rights = i[11]
    item_type = i[12]
    item_format = i[13]
    
    # Make the JSON object.
    item = {'@id': dll_link,
            'Title': record_title,
            'Author': author,
            'Editor': editor,
            'Publisher': publisher,
            'Place': place,
            'Date': date,
            'SourceURI': source_uri,
            'Repository': repository,
            'Rights': rights,
            'Type': item_type,
            'Format': item_format,
            'DLLid': dll_id,
            'References': {
                'DLL Author': author,
                'DLL Work': dll_work}}

    # Add the item to the graph.
    data['@graph'].append(item)

In [4]:
# Write the results to a file for uploading to the git repo.
with codecs.open('../json-ld/item-records.json','a',encoding='utf-8') as outfile:
    json.dump(data,outfile,sort_keys = True, indent = 4, ensure_ascii = False)