# Get JSON-LD from a source URL


In [1]:
import json
import sonormal.getjsonld

sonormal.prepareSchemaOrgLocalContexts()

url = "https://datadavev.github.io/examples/jsonld/ex01.html"

response = await sonormal.getjsonld.downloadJsonAsync(url)

In [2]:
response_summary = sonormal.getjsonld.responseSummary(response)
print(json.dumps(response_summary, indent=2))

{
  "request": {
    "url": "https://datadavev.github.io/examples/jsonld/ex01.html",
    "headers": {
      "User-Agent": "python-requests/2.25.1",
      "Accept-Encoding": "gzip, deflate",
      "Accept": "application/ld+json;q=1.0, application/json;q=0.9, text/html;q=0.8, application/xml+xhtml;q=0.7, */*;q=0.1",
      "Connection": "keep-alive"
    }
  },
  "responses": [
    {
      "url": "https://datadavev.github.io/examples/jsonld/ex01.html",
      "status_code": 200,
      "result": "<< body >>",
      "elapsed": 0.063864,
      "headers": {
        "connection": "keep-alive",
        "content-length": "342",
        "server": "GitHub.com",
        "content-type": "text/html; charset=utf-8",
        "permissions-policy": "interest-cohort=()",
        "last-modified": "Thu, 22 Apr 2021 21:21:27 GMT",
        "access-control-allow-origin": "*",
        "etag": "W/\"6081e8d7-271\"",
        "expires": "Mon, 12 Jul 2021 15:19:18 GMT",
        "cache-control": "max-age=600",
        

The downloaded HTML:

In [4]:
print(response["response"].text)

<!DOCTYPE html>
<html>

<head>
    <title>Hello JSON-LD</title>
    <link rel="stylesheet" href="../css/common.css" />
    <script src="../js/common.js"></script>
    <script type="application/ld+json">
            {
                "@context":{
                    "@vocab":"https://example.net/"
                },
                "@id": "http://data.example.net/example_01",
                "name": "This is example 01"
            }
        </script>
</head>

<body>
    <p>Hello JSON-LD.</p>
    <p><a href="https://github.com/datadavev/examples/blob/main/docs/jsonld/ex01.html">View the source.</a></p>
</body>

</html>


The extracted JSON-LD

In [5]:
print(json.dumps(response["document"], indent=2))

{
  "@context": {
    "@vocab": "https://example.net/"
  },
  "@id": "http://data.example.net/example_01",
  "name": "This is example 01"
}


Retrieve from a service with a few redirects

In [7]:
url = "https://earthref.org/MagIC/17099"
url = "https://search.dataone.org/view/https%3A%2F%2Fpasta.lternet.edu%2Fpackage%2Fmetadata%2Feml%2Fknb-lter-cap%2F686%2F1"
response = await sonormal.getjsonld.downloadJsonAsync(url)

response_summary = sonormal.getjsonld.responseSummary(response)
print(json.dumps(response_summary, indent=2))

No JSON-LD in plain source https://search.dataone.org/view/https%3A%2F%2Fpasta.lternet.edu%2Fpackage%2Fmetadata%2Feml%2Fknb-lter-cap%2F686%2F1


{
  "request": {
    "url": "https://search.dataone.org/view/https%3A%2F%2Fpasta.lternet.edu%2Fpackage%2Fmetadata%2Feml%2Fknb-lter-cap%2F686%2F1",
    "headers": {
      "upgrade-insecure-requests": "1",
      "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_16_0) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/71.0.3542.0 Safari/537.36",
      "accept": "application/ld+json;q=1.0, application/json;q=0.9, text/html;q=0.8, application/xml+xhtml;q=0.7, */*;q=0.1"
    }
  },
  "responses": [
    {
      "url": "https://search.dataone.org/view/https%3A%2F%2Fpasta.lternet.edu%2Fpackage%2Fmetadata%2Feml%2Fknb-lter-cap%2F686%2F1",
      "status_code": 200,
      "result": "<< body >>",
      "elapsed": 0.272944,
      "headers": {
        "date": "Mon, 12 Jul 2021 15:14:27 GMT",
        "server": "Apache/2.4.48 (Ubuntu)",
        "last-modified": "Fri, 09 Apr 2021 20:19:29 GMT",
        "etag": "\"287b-5bf8fe18ed61a-gzip\"",
        "accept-ranges": "bytes",
        "vary": "Acc

The downloaded HTML:

In [9]:
print(response["response"].text)

<!DOCTYPE html><html lang="en" data-framework="backbonejs" class=""><head>
  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  <script async="" src="//www.google-analytics.com/analytics.js"></script><script type="text/javascript" async="" src="https://maps.googleapis.com/maps/api/js?v=3&amp;key=AIzaSyDuQ9r_7EeSfspKYs2SET7sv4c8FysLIk4&amp;callback=__async_req_1__"></script><script type="text/javascript">
    // The path to your configuration file for MetacatUI. This can be any web-accessible location.
    var appConfigPath = "/js/themes/dataone/config.js";
  </script>

  <style>#loading-app{width:100%; height: 100%; left: 0px; top: 0px; margin-left:auto; margin-right:auto; position:absolute; background-color:#FFF;}.loading-icon{position:relative; margin-top:100px; margin-bottom:20px; margin-left: 40%; width:90px;height:90px}.loading-icon .wBall{position:absolute;width:86px;height:86px;opacity:0;-moz-transform:rotate(225deg);-moz-animation:orbit 4.4s infinite;-webkit-

The extracted JSON-LD

In [10]:
print(json.dumps(response["document"], indent=2))

[
  {
    "@context": {
      "@vocab": "https://schema.org/"
    },
    "@type": "Dataset",
    "@id": "https://dataone.org/datasets/https%3A%2F%2Fpasta.lternet.edu%2Fpackage%2Fmetadata%2Feml%2Fknb-lter-cap%2F686%2F1",
    "datePublished": "2021-03-01T00:00:00Z",
    "dateModified": "2021-03-01T20:45:34.873Z",
    "publisher": {
      "@type": "Organization",
      "name": "LTER Network Member Node"
    },
    "identifier": "https://pasta.lternet.edu/package/metadata/eml/knb-lter-cap/686/1",
    "url": "https://dataone.org/datasets/https%3A%2F%2Fpasta.lternet.edu%2Fpackage%2Fmetadata%2Feml%2Fknb-lter-cap%2F686%2F1",
    "schemaVersion": "https://eml.ecoinformatics.org/eml-2.2.0",
    "isAccessibleForFree": true,
    "name": "Urban Ecological Infrastructure (UEI) in the greater Phoenix, Arizona metropolitan area and surrounding Sonoran desert region (2010-2017)",
    "creator": [
      {
        "@type": "Person",
        "name": "Jeffrey Brown"
      },
      {
        "@type": "Perso