# Get JSON-LD from a source URL


In [1]:
import json
import sonormal.getjsonld

sonormal.installDocumentLoader()

url = "https://datadavev.github.io/examples/jsonld/ex01.html"

jsonld, response = await sonormal.getjsonld.downloadJsonAsync(url)

In [2]:
response_summary = sonormal.getjsonld.responseSummary(response)
print(json.dumps(response_summary, indent=2))

{
  "request": {
    "url": "https://datadavev.github.io/examples/jsonld/ex01.html",
    "headers": {
      "User-Agent": "python-requests/2.25.1",
      "Accept-Encoding": "gzip, deflate",
      "Accept": "application/ld+json;q=1.0, applcation/json;q=0.9, text/html;q=0.8, application/xml+xhtml;q=0.7",
      "Connection": "keep-alive"
    }
  },
  "responses": [
    {
      "url": "https://datadavev.github.io/examples/jsonld/ex01.html",
      "status_code": 200,
      "result": "<< body >>",
      "elapsed": 0.079823,
      "headers": {
        "connection": "keep-alive",
        "content-length": "342",
        "server": "GitHub.com",
        "content-type": "text/html; charset=utf-8",
        "last-modified": "Tue, 23 Feb 2021 20:40:09 GMT",
        "access-control-allow-origin": "*",
        "etag": "W/\"60356829-271\"",
        "expires": "Tue, 02 Mar 2021 14:47:57 GMT",
        "cache-control": "max-age=600",
        "content-encoding": "gzip",
        "x-proxy-cache": "MISS",
   

The downloaded HTML:

In [3]:
print(response.text)

<!DOCTYPE html>
<html>

<head>
    <title>Hello JSON-LD</title>
    <link rel="stylesheet" href="../css/common.css" />
    <script src="../js/common.js"></script>
    <script type="application/ld+json">
            {
                "@context":{
                    "@vocab":"https://example.net/"
                },
                "@id": "http://data.example.net/example_01",
                "name": "This is example 01"
            }
        </script>
</head>

<body>
    <p>Hello JSON-LD.</p>
    <p><a href="https://github.com/datadavev/examples/blob/main/docs/jsonld/ex01.html">View the source.</a></p>
</body>

</html>


The extracted JSON-LD

In [4]:
print(json.dumps(jsonld, indent=2))

{
  "@context": {
    "@vocab": "https://example.net/"
  },
  "@id": "http://data.example.net/example_01",
  "name": "This is example 01"
}


Retrieve from a service with a few redirects

In [5]:
url = "https://earthref.org/MagIC/17099"
#url = "https://search.dataone.org/view/https%3A%2F%2Fpasta.lternet.edu%2Fpackage%2Fmetadata%2Feml%2Fknb-lter-cap%2F686%2F1"
jsonld, response = await sonormal.getjsonld.downloadJsonAsync(url)

response_summary = sonormal.getjsonld.responseSummary(response)
print(json.dumps(response_summary, indent=2))

No JSON-LD in plain source https://earthref.org/MagIC/17099


IN: %s https://earthref.org/MagIC/17099
OT: %s https://earthref.org/MagIC/17099
IN: %s http://earthref.org/MagIC/17099/
OT: %s http://earthref.org/MagIC/17099/
IN: %s https://earthref.org/MagIC/17099/
OT: %s https://earthref.org/MagIC/17099/
IN: %s https://www2.earthref.org/MagIC/17099
OT: %s https://www2.earthref.org/MagIC/17099
IN: %s https://www2.earthref.org/3c54dae4bcf905f42f5c9e7667736fd08ffd7826.css?meteor_css_resource=true
IN: %s https://www2.earthref.org/lib/semantic-ui/compiled/2.4.2/semantic.css
IN: %s https://www2.earthref.org/6be857144aa9f22087118eaa43bbb0bbecb2f9f9.js?meteor_js_resource=true
OT: %s https://www2.earthref.org/3c54dae4bcf905f42f5c9e7667736fd08ffd7826.css?meteor_css_resource=true
OT: %s https://www2.earthref.org/6be857144aa9f22087118eaa43bbb0bbecb2f9f9.js?meteor_js_resource=true
OT: %s https://www2.earthref.org/lib/semantic-ui/compiled/2.4.2/semantic.css
IN: %s https://fonts.googleapis.com/css?family=Open%20Sans:400,700,400italic,700italic&subset=latin
OT: %s

The downloaded HTML:

In [6]:
print(response.text)

<!DOCTYPE html><html><head>
  <link rel="stylesheet" type="text/css" class="__meteor-css__" href="/3c54dae4bcf905f42f5c9e7667736fd08ffd7826.css?meteor_css_resource=true">
<link rel="stylesheet" href="/lib/semantic-ui/compiled/2.4.2/semantic.css">

<title>MagIC Search | EarthRef.org</title><link rel="canonical" href="https://earthref.org/MagIC/17099" data-react-helmet="true"><script id="schemaorg" type="application/ld+json" data-react-helmet="true">{
  "@context": {
    "@vocab": "https://schema.org/",
    "geosci-time": "http://schema.geoschemas.org/contexts/temporal#"
  },
  "@type": "Dataset",
  "identifier": "http://dx.doi.org/10.7288/V4/MAGIC/{cid}",
  "url": "https://earthref.org/MagIC/{cid}",
  "isAccessibleForFree": true,
  "license": "https://creativecommons.org/licenses/by/4.0/",
  "provider": {
    "@id": "https://earthref.org/MagIC",
    "type": "Organization",
    "legalName": "Magnetics Information Consortium (MagIC) Data Repository",
    "name": "MagIC",
    "url": "https

The extracted JSON-LD

In [7]:
print(json.dumps(jsonld, indent=2))

[
  {
    "@context": {
      "@vocab": "https://schema.org/",
      "geosci-time": "http://schema.geoschemas.org/contexts/temporal#"
    },
    "@type": "Dataset",
    "identifier": "http://dx.doi.org/10.7288/V4/MAGIC/{cid}",
    "url": "https://earthref.org/MagIC/{cid}",
    "isAccessibleForFree": true,
    "license": "https://creativecommons.org/licenses/by/4.0/",
    "provider": {
      "@id": "https://earthref.org/MagIC",
      "type": "Organization",
      "legalName": "Magnetics Information Consortium (MagIC) Data Repository",
      "name": "MagIC",
      "url": "https://earthref.org/MagIC"
    },
    "publisher": {
      "@id": "https//earthref.org/MagIC"
    },
    "sdPublisher": "EarthRef.org",
    "sdLicense": "https://creativecommons.org/licenses/by/4.0/",
    "sdDatePublished": "2021-03-02T16:31:57.278Z",
    "labNames": "Paleomagnetic Lab (Helmholtz Centre Potsdam, Germany)",
    "distribution": {
      "@type": "DataDownload",
      "contentUrl": "https://earthref.org/Ma