In [1]:
TTL_HEADER = """                                                                                                             
@base <https://github.com/Coleridge-Initiative/adrf-onto/wiki/Vocabulary> .                                                  
@prefix cito:   <http://purl.org/spar/cito/> .                                                                               
@prefix dct:    <http://purl.org/dc/terms/> .                                                                                
@prefix foaf:   <http://xmlns.com/foaf/0.1/> .                                                                               
@prefix rdf:    <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .                                                              
@prefix xsd:    <http://www.w3.org/2001/XMLSchema#> .                                                                        
"""

# PUB_TEMPLATE = """                                                                                                           
# :{}                                                                                                                          
#   rdf:type :ResearchPublication ;                                                                                            
#   dct:title "{}"@en ;                                                                                                        
#   dct:identifier "{}" ;                                                                                                      
#   dct:language "en" ;                                                                                                        
#   foaf:page "{}"^^xsd:anyURI ;                                                                                               
#   .                                                                                                                          
# """
PUB_TEMPLATE = """                                                                                                           
:{}                                                                                                                          
  rdf:type :ResearchPublication ;                                                                                            
  dct:title "{}"@en ;                                                                                                        
  dct:subject "{}" ;                                                                                                        
  dct:identifier "{}" ;                                                                                                      
  dct:language "en" ;                                                                                                        
  foaf:page "{}"^^xsd:anyURI ;                                                                                               
  .                                                                                                                          
"""

In [5]:
# http://purl.org/dc/terms/subject

In [2]:
from rdflib import Graph, plugin
from rdflib.plugin import register, Parser, Serializer
import pyld
import json

# format as JSON_LD                                                                                                      
with open("vocab.json", "r") as f:
    CONTEXT = json.load(f)

frags = [TTL_HEADER]


In [15]:
partition_path = "metadata_final/20191021_pub_md.json"

with open(partition_path) as json_file:
    usda_md = json.load(json_file)

In [18]:
import hashlib

def get_hash (strings, prefix=None, digest_size=10):
    """
    construct a unique identifier from a collection of strings
    """
    m = hashlib.blake2b(digest_size=digest_size)
    
    for elem in sorted(map(lambda x: x.encode("utf-8").lower().strip(), strings)):
        m.update(elem)

    if prefix:
        id = prefix + m.hexdigest()
    else:
        id = m.hexdigest()

    return id

In [21]:
# usda_md_with_hash =
for p in usda_md:
    hash_val = 'publication-{}'.format(get_hash(p['title']))
    p.update({'pub_id':hash_val})

In [22]:
usda_md[0]

{'title': 'Measuring Food Expenditure Poverty in SNAP Populations: Some Extensions with an Application to the American Recovery and Reinvestment Act',
 'datasets': ['dataset-033'],
 'pdf': 'https://academic.oup.com/aepp/article-pdf/41/1/133/27762442/ppy004.pdf',
 'doi': '10.1093/aepp/ppy004',
 'new_keywords': 'act,population',
 'journal_title': 'Applied Economic Perspectives and Policy',
 'authors': ['Yang, Yanliang. Department of Agricultural and Applied Economics, Virginia Tech',
  'Davis, George C. Department of Agricultural and Applied Economics, Virginia Tech',
  'You, Wen. Department of Agricultural and Applied Economics, Virginia Tech'],
 'pub_id': 'publication-9d21370465e28c315dd9'}

In [None]:
[
    "10.1111/agec.12444",
    "publication-e84ab84d25278051773e",
    "http://purl.umn.edu/205424",
    "Agricultural Economics",
    "Does a nutritious diet cost more in food deserts?"
  ]

In [None]:
for entry in results:
    frags.append(PUB_TEMPLATE.format(pub_id, title, doi, pub_url))


In [None]:

for doi, pub_id, pub_url, journal, title in results:
    frags.append(PUB_TEMPLATE.format(pub_id, title, doi, pub_url))

g = Graph()
g.parse(data="\n".join(frags), format="n3")

jsonld = json.loads(g.serialize(format="json-ld", context=CONTEXT))
jsonld = pyld.jsonld.compact(jsonld, CONTEXT)

print(json.dumps(jsonld, indent=2))