# Normalizing schema.org JSON-LD to `https` based term definitions with `jsonld.js`

This example is for [`jsonld.js`](https://github.com/digitalbazaar/jsonld.js) which is a Javascript library implementing the [JSON-LD specification](https://w3c.github.io/json-ld-syntax/).

`Schema.org` is in the process of moving from `http` to `https` as the prefix for all terms. 

This can be problemmatic for consumers, since the same property name from each scheme is considered different by RDF processors. So `http://schema.org/Identifier !== https://schema.org/Identifier`.

Here we provide an example of how consumers may work with either in a prefix agnostic manner by normalizing content to use a schema.org prefix of `https://schema.org/`.

This involves changing any IRIs from `http://schema.org/` to `https://schema.org/`. Ideally this can happen in one location, the value of `@vocab` in the context. However, the source representations can vary considerably in their structure. For example each of `data["d1"]` through `data["d6"]` are identical representations of the same RDF except for the value of the name. Extracting the `name` value for each of these as plain JSON objects would be cumbersome:

In [1]:
// Load the jsonld.js library
const jsonld = require('jsonld');

// container for a few examples
var data = new Object();

data["d1"] = {
    "@context":"http://schema.org/",
    "@type":"Dataset",
    "name":"http remote @context"
};

data["d2"] = {
    "@context":"https://schema.org/",
    "@type":"Dataset",
    "name":"https remote @context"
};

data["d3"] = {
    "@context": {
        "@vocab":"http://schema.org/"
    },
    "@type":"Dataset",
    "name":"http @vocab only"
};

data["d4"] = {
    "@context": {
        "SO":"http://schema.org/"
    },
    "@type":"SO:Dataset",
    "SO:name":"http with namespace prefix = SO"
};

data["d5"] = {
    "@context": {
        "SO":"http://schema.org/"
    },
    "@graph":[
        {
            "@type":"SO:Dataset",
            "SO:name":"Single dataset graph with global http namespace prefix = SO"
        }
    ]
};

data["d6"] = {
    "@graph":[
        {
            "@context": {
                "SO":"http://schema.org/"
            },
            "@type":"SO:Dataset",
            "SO:name":"Single dataset graph with dataset local @context http with namespace prefix = SO"
        }
    ]
};
console.log(`Loaded ${Object.keys(data).length} instances.`);

Loaded 6 instances.


The [JSON-LD API](https://www.w3.org/TR/json-ld11-api/) provides functionality to assist with the processing of JSON-LD using plain javascript object access. [Removing the context](https://www.w3.org/TR/json-ld11-api/#expansion) from each of `d1-d6` demonstrates they are identical structures, just the value of the name varies:

In [2]:
// Note: The operations here are forced to be synchronous for the purposes of output clarity. A production
// implementation should take advantage of an asynchronous pattern for more efficient processing.

async function expandAll(sources) {

    async function doExpand(d) {
        return await jsonld.expand(d);
    }

    for (let k in sources) {
        let expanded = await doExpand(sources[k]);
        console.log("dataset: ", k);
        console.log(JSON.stringify(expanded, null, 2));
    }
}

await expandAll( data );

dataset:  d1
[
  {
    "@type": [
      "http://schema.org/Dataset"
    ],
    "http://schema.org/name": [
      {
        "@value": "http remote @context"
      }
    ]
  }
]
dataset:  d2
[
  {
    "@type": [
      "http://schema.org/Dataset"
    ],
    "http://schema.org/name": [
      {
        "@value": "https remote @context"
      }
    ]
  }
]
dataset:  d3
[
  {
    "@type": [
      "http://schema.org/Dataset"
    ],
    "http://schema.org/name": [
      {
        "@value": "http @vocab only"
      }
    ]
  }
]
dataset:  d4
[
  {
    "@type": [
      "http://schema.org/Dataset"
    ],
    "http://schema.org/name": [
      {
        "@value": "http with namespace prefix = SO"
      }
    ]
  }
]
dataset:  d5
[
  {
    "@type": [
      "http://schema.org/Dataset"
    ],
    "http://schema.org/name": [
      {
        "@value": "Single dataset graph with global http namespace prefix = SO"
      }
    ]
  }
]
dataset:  d6
[
  {
    "@type": [
      "http://schema.org/Dataset"
    ]

Since all the various sources are now in a common structure, it is trivial to change the namespace by then [compacting](https://www.w3.org/TR/json-ld11-api/#compaction) the data graphs to set a common context, changing the value of `@vocab` by directly manipulating the JSON, then recompacting with a `https://schema.org/` namespace. The final compation step is necessary for any source content that was already using the `https` variant of the namespace since it would not be compacted in the first compaction step.

During the compaction process, the `graph:true` option is enabled here to handle situations where more than one graph may be present in the source (such as the `d7` example injected below):


In [3]:
let SO_HTTP_CONTEXT = {
    "@context":{
        "@vocab":"http://schema.org/"
    }
}
let SO_HTTPS_CONTEXT = {
    "@context":{
        "@vocab":"https://schema.org/"
    }
}

async function normalizeSchemaOrg( o ) {
    // First expand to remove context
    let expanded = await jsonld.expand( o );

    // Add context
    let normalized = await jsonld.compact(
        expanded, 
        SO_HTTP_CONTEXT,
        {"graph":true}
    );
    // Switch the namespace to use https
    normalized["@context"]["@vocab"] = "https://schema.org/";
    
    // Now recompact with https namespace
    let finalized = await jsonld.compact(
        normalized, 
        SO_HTTPS_CONTEXT,
        {"graph":true}
    )
    return finalized;
}


async function normalizeAll(sources) {
    let response = new Object();
    for (let k in sources) {
        let res = await normalizeSchemaOrg( sources[k] );
        response[k] = res;
    }
    return response;
}

data["d7"] = {
    "@context": {
        "SO":"http://schema.org/"
    },
    "@graph":[
        {
            "@id":"./d7a",
            "@type":"SO:Dataset",
            "SO:name":"Double dataset graph with global http namespace prefix = SO",
        },
        {
            "@id":"./d7b",
            "@type":"SO:Dataset",
            "SO:name":"Double dataset graph with global http namespace prefix = SO",
        },
    ]    
}

let normalized = await normalizeAll(data);
for (let k in normalized) {
    console.log(`Dataset ${k}:`);
    console.log(JSON.stringify(normalized[k], null, 2));
};

Dataset d1:
{
  "@context": {
    "@vocab": "https://schema.org/"
  },
  "@graph": [
    {
      "@type": "Dataset",
      "name": "http remote @context"
    }
  ]
}
Dataset d2:
{
  "@context": {
    "@vocab": "https://schema.org/"
  },
  "@graph": [
    {
      "@type": "Dataset",
      "name": "https remote @context"
    }
  ]
}
Dataset d3:
{
  "@context": {
    "@vocab": "https://schema.org/"
  },
  "@graph": [
    {
      "@type": "Dataset",
      "name": "http @vocab only"
    }
  ]
}
Dataset d4:
{
  "@context": {
    "@vocab": "https://schema.org/"
  },
  "@graph": [
    {
      "@type": "Dataset",
      "name": "http with namespace prefix = SO"
    }
  ]
}
Dataset d5:
{
  "@context": {
    "@vocab": "https://schema.org/"
  },
  "@graph": [
    {
      "@type": "Dataset",
      "name": "Single dataset graph with global http namespace prefix = SO"
    }
  ]
}
Dataset d6:
{
  "@context": {
    "@vocab": "https://schema.org/"
  },
  "@graph": [
    {
      "@type": "Dataset",
      

Now properties of the objects can be easily extracted with plain javascript since now each object has a structure like:

```javascript
{
  "@context": {"@vocab":"https://schema.org/"},
  "@graph": [
    { object 0 }, 
    { object 1 }, 
    ...
  ]
}
```

For example, extracting the names of all Datasets in each structure:

In [4]:
function printDatasetName(ds) {
    ds["@graph"].forEach( (o, i) => {
        if (o["@type"] == "Dataset") {
            console.log(`Dataset ${i} has name = ${o.name}`);        
        }
    })
}

for (let k in normalized) {
    console.log(`Normaized ${k}:`)
    printDatasetName(normalized[k]);
    console.log();
}

Normaized d1:
Dataset 0 has name = http remote @context

Normaized d2:
Dataset 0 has name = https remote @context

Normaized d3:
Dataset 0 has name = http @vocab only

Normaized d4:
Dataset 0 has name = http with namespace prefix = SO

Normaized d5:
Dataset 0 has name = Single dataset graph with global http namespace prefix = SO

Normaized d6:
Dataset 0 has name = Single dataset graph with dataset local @context http with namespace prefix = SO

Normaized d7:
Dataset 0 has name = Double dataset graph with global http namespace prefix = SO
Dataset 1 has name = Double dataset graph with global http namespace prefix = SO



This approach works regardless of whether the source is using the `http://schema.org/` or `https://schema.org/` variants of the namespace. For example:

In [5]:
let http_data = {
    "@context":{
        "@vocab":"http://schema.org/"
    },
    "@type":"Dataset",
    "name":"test http, vocab only"
};

let https_data = {
    "@context":{
        "@vocab":"https://schema.org/"
    },
    "@type":"Dataset",
    "name":"test https, vocab only"    
}

console.log("Dataset using http://schema.org/");
printDatasetName( await normalizeSchemaOrg(http_data) );

console.log("");
console.log("Dataset using https://schema.org/");
printDatasetName( await normalizeSchemaOrg(https_data) );


Dataset using http://schema.org/
Dataset 0 has name = test http, vocab only

Dataset using https://schema.org/
Dataset 0 has name = test https, vocab only


Note that the process of normalization ignores other namespaces, which are fully expanded as a result of the first step normalization process. Other expected namespaces can be included in the final compaction context if desired. For example:

In [6]:
let dataset = {
  "@context": {
    "@vocab": "http://schema.org/",
    "dbpedia": "http://dbpedia.org/resource/"
  },
  "@type": "Dataset",
  "name": "Removal of organic carbon by natural bacterioplankton communities as a function of pCO2 from laboratory experiments between 2012 and 2016",
  "spatialCoverage": {
    "@type": "Place",
    "geo": {
      "@type": "GeoShape",
      "line": "39.3280,120.1633 40.445,123.7878"
    },
    "additionalProperty": {
      "@type": ["PropertyValue", "dbpedia:Spatial_reference_system"],
      "@id": "http://www.opengis.net/def/crs/OGC/1.3/CRS84"
    }
  }
}

let dataset_normalized = await normalizeSchemaOrg(dataset);
console.log(JSON.stringify(dataset_normalized, null, 2));

// Modify the context for compaction
SO_HTTPS_CONTEXT["@context"]["dbpedia"] = "http://dbpedia.org/resource/";
console.log("\nAdjusted context:");

// Redo normalization with the adjusted compaction context
dataset_normalized = await normalizeSchemaOrg(dataset);
console.log(JSON.stringify(dataset_normalized, null, 2));

{
  "@context": {
    "@vocab": "https://schema.org/"
  },
  "@graph": [
    {
      "@type": "Dataset",
      "name": "Removal of organic carbon by natural bacterioplankton communities as a function of pCO2 from laboratory experiments between 2012 and 2016",
      "spatialCoverage": {
        "@type": "Place",
        "additionalProperty": {
          "@id": "http://www.opengis.net/def/crs/OGC/1.3/CRS84",
          "@type": [
            "PropertyValue",
            "http://dbpedia.org/resource/Spatial_reference_system"
          ]
        },
        "geo": {
          "@type": "GeoShape",
          "line": "39.3280,120.1633 40.445,123.7878"
        }
      }
    }
  ]
}

Adjusted context:
{
  "@context": {
    "@vocab": "https://schema.org/",
    "dbpedia": "http://dbpedia.org/resource/"
  },
  "@graph": [
    {
      "@type": "Dataset",
      "name": "Removal of organic carbon by natural bacterioplankton communities as a function of pCO2 from laboratory experiments between 2012 and 