# Testing struct

_[extruct](https://github.com/scrapinghub/extruct)_ is a library for extracting embedded metadata from HTML markup. It also has a built-in HTTP server to test its output as JSON. _extruct_ supports embedded JSON-LD


In [1]:
import sys
!{sys.executable} -m pip install extruct



## Function Wrapper

In [2]:
import extruct
import requests
from w3lib.html import get_base_url

def json_ld_from(url):
  r = requests.get(url)
  base_url = get_base_url(r.text, r.url)
  data = extruct.extract(r.text, base_url=base_url)
  return data['json-ld']

## Test extraction

In [3]:
import pprint
pp = pprint.PrettyPrinter(indent=2)
pp.pprint(json_ld_from('https://www.aragon.es/inicio'))
pp.pprint(json_ld_from('https://www.aragon.es/actualidad'))
pp.pprint(json_ld_from('https://www.aragon.es/actualidad/mujer-emprendedora-y-formacion-objetivos-de-la-nueva-edicion-del-programa-de-iniciativas-empresariales-en-aragon'))
pp.pprint(json_ld_from('https://www.aragon.es/politica-de-cookies'))



[ { '@context': { 'dcmi': 'http://purl.org/dc/dcmitype/',
                  'dct': 'http://purl.org/dc/terms/',
                  'ei2a': 'http://opendata.aragon.es/def/e2ia#',
                  'schema': 'http://schema.org/'},
    '@type': ['ei2a:Document', 'dcmi:InteractiveResource'],
    'dct:coverage': { '@id': 'http://opendata.aragon.es/recurso/territorio/ComunidadAutonoma/Arag%C3%B3n',
                      '@type': 'schema:Place'},
    'dct:creator': { '@id': 'http://opendata.aragon.es/recurso/sectorpublico/organization/gobierno-aragon',
                     '@type': 'schema:GovernmentOrganization',
                     'schema:name': 'Gobierno de Aragón'},
    'dct:date': '2019-01-30',
    'dct:description': '',
    'dct:format': 'text/html',
    'dct:language': 'es',
    'dct:publisher': { '@id': 'http://opendata.aragon.es/recurso/sectorpublico/organization/gobierno-aragon',
                       '@type': 'schema:GovernmentOrganization',
                       'schema:name': 

[ { '@context': { 'dcmi': 'http://purl.org/dc/dcmitype/',
                  'dct': 'http://purl.org/dc/terms/',
                  'ei2a': 'http://opendata.aragon.es/def/e2ia#',
                  'schema': 'http://schema.org/'},
    '@type': ['ei2a:Document', 'dcmi:InteractiveResource'],
    'dct:coverage': { '@id': 'http://opendata.aragon.es/recurso/territorio/ComunidadAutonoma/Arag%C3%B3n',
                      '@type': 'schema:Place'},
    'dct:creator': { '@id': 'http://opendata.aragon.es/recurso/sectorpublico/organization/gobierno-aragon',
                     '@type': 'schema:GovernmentOrganization',
                     'schema:name': 'Gobierno de Aragón'},
    'dct:date': '2019-01-11',
    'dct:description': '',
    'dct:format': 'text/html',
    'dct:language': 'es',
    'dct:publisher': { '@id': 'http://opendata.aragon.es/recurso/sectorpublico/organization/gobierno-aragon',
                       '@type': 'schema:GovernmentOrganization',
                       'schema:name': 

## Test query

In [4]:
# import sys
# !{sys.executable} -m pip install rdflib-jsonld

In [5]:
from rdflib import Graph, plugin
from rdflib.serializer import Serializer
import json
s = json.dumps(json_ld_from('https://www.aragon.es/politica-de-cookies'))
g = Graph().parse(publicID='https://www.aragon.es/politica-de-cookies', data=s, format='json-ld')
r = json.loads(g.serialize(format='json-ld'))
pp.pprint(r)

[ { '@id': 'http://opendata.aragon.es/recurso/territorio/ComunidadAutonoma/Arag%C3%B3n',
    '@type': ['http://schema.org/Place']},
  { '@id': '_:N0d6096378e444a468556955b588e2005',
    '@type': [ 'http://purl.org/dc/dcmitype/InteractiveResource',
               'http://opendata.aragon.es/def/e2ia#Document'],
    'http://purl.org/dc/terms/coverage': [ { '@id': 'http://opendata.aragon.es/recurso/territorio/ComunidadAutonoma/Arag%C3%B3n'}],
    'http://purl.org/dc/terms/creator': [ { '@id': 'http://opendata.aragon.es/recurso/sectorpublico/organization/gobierno-aragon'}],
    'http://purl.org/dc/terms/date': [{'@value': '2019-01-11'}],
    'http://purl.org/dc/terms/description': [{'@value': ''}],
    'http://purl.org/dc/terms/format': [{'@value': 'text/html'}],
    'http://purl.org/dc/terms/language': [{'@value': 'es'}],
    'http://purl.org/dc/terms/publisher': [ { '@id': 'http://opendata.aragon.es/recurso/sectorpublico/organization/gobierno-aragon'}],
    'http://purl.org/dc/terms/right

In [6]:
qres = g.query(
    """SELECT DISTINCT ?a ?b
       WHERE {
          ?a rdf:type ?b 
       }""")
    
for row in qres:
    print("%s type of %s" % row)


N0d6096378e444a468556955b588e2005 type of http://purl.org/dc/dcmitype/InteractiveResource
N78d637c555fd444a99067e86c3996806 type of http://schema.org/WebSite
N68b281d162d54c3daac7f3ea223fcddf type of http://schema.org/ContactPoint
N78d637c555fd444a99067e86c3996806 type of http://schema.org/WebPage
N0d6096378e444a468556955b588e2005 type of http://opendata.aragon.es/def/e2ia#Document
N333595ed70484dcabb1c6c89c0b18dbf type of http://schema.org/ImageObject
http://opendata.aragon.es/recurso/territorio/ComunidadAutonoma/Arag%C3%B3n type of http://schema.org/Place
http://opendata.aragon.es/recurso/sectorpublico/organization/gobierno-aragon type of http://schema.org/GovernmentOrganization
http://www.aragon.es/EnlacesPie/Aviso_Legal type of http://schema.org/WebPage
