# Validate B2find/EudatCore XML schema

* https://b2find.eudat.eu/site/forproviders/#eudat-core-metadata-schema
* https://gitlab.eudat.eu/eudat-metadata/eudat-core-schema
* https://pypi.org/project/xmlschema/

In [1]:
import xmlschema
import json
from pprint import pprint


## Load Eudat-Core example

In [2]:
eudat_core_xsd = 'http://schema.eudat.eu/meta/kernel-core-1.0/schema.xsd'
eudat_core_gitlab_xsd = 'https://gitlab.eudat.eu/eudat-metadata/eudat-core-schema/-/raw/master/eudat-core.xsd'
eudat_core_example_xml = 'https://gitlab.eudat.eu/eudat-metadata/eudat-core-schema/-/raw/master/examples/core-full-example.xml'


In [3]:
xs = xmlschema.XMLSchema(eudat_core_gitlab_xsd)

## Validate eudat core example

In [4]:
xs.is_valid(eudat_core_example_xml)

True

In [5]:
pprint(xs.to_dict(eudat_core_example_xml))

{'@xmlns': 'http://schema.eudat.eu/schema/kernel-1',
 '@xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance',
 '@xsi:schemaLocation': 'http://schema.eudat.eu/schema/kernel-1 '
                        'http://schema.eudat.eu/meta/kernel-core-1.0/schema.xsd',
 'community': 'EUDAT community',
 'contacts': {'contact': ['info@community.org',
                          'https://community.org/contact']},
 'contributors': {'contributor': [{'contributorName': 'Miller, Elizabeth'},
                                  {'contributorName': 'PaNOSC'},
                                  {'contributorName': 'Deutsches '
                                                      'Klimarechenzentrum'}]},
 'creators': {'creator': [{'creatorName': 'Miller, Elizabeth'},
                          {'creatorName': 'Fuller, Barry'}]},
 'descriptions': {'description': ['XML example of all EUDAT Core Metadata '
                                  'Schema v1.0 properties.']},
 'disciplines': {'discipline': ['English lite

## Validate local b2f example

In [6]:
xs.is_valid("b2f-example.xml")

False

In [7]:
try:
    xs.validate("b2f-example.xml")
except Exception as err:
    print(err)

failed validating <Element '{http://schema.eudat.eu/schema/kernel-1}instruments' at 0x7fe5afe357c0> with XsdGroup(model='sequence', occurs=[1, 1]):

Reason: The content of element '{http://schema.eudat.eu/schema/kernel-1}instruments' is not complete. Tag 'instrument' expected.

Schema:

  <xs:complexType xmlns:xs="http://www.w3.org/2001/XMLSchema">
    <xs:sequence>
      <xs:element name="instrument" minOccurs="1" maxOccurs="unbounded">
        <xs:annotation>
          <xs:documentation>The technical instrument(s) used to produce the data in the resource.</xs:documentation>
        </xs:annotation>
        <xs:complexType>
          <xs:simpleContent>
            <xs:extension base="nonEmptyContentStringType">
              <xs:attribute name="instrumentIdentifier" type="nonEmptyContentStringType" use="optional" />
              <xs:attribute name="instrumentIdentifierType" type="openIdentifierType" use="optional" />
            </xs:extension>
          </xs:simpleContent>
        <

## Use local b2f schema

In [8]:
b2f_xs = xmlschema.XMLSchema("b2f.xsd")

In [9]:
b2f_xs.is_valid("b2f-example.xml")

True

## Export to json

* https://xmlschema.readthedocs.io/en/latest/usage.html#decoding-to-json
* https://xmlschema.readthedocs.io/en/latest/converters.html

In [10]:
print(json.dumps(xs.to_dict(eudat_core_example_xml, decimal_type=str), indent=4)) 

{
    "@xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance",
    "@xmlns": "http://schema.eudat.eu/schema/kernel-1",
    "@xsi:schemaLocation": "http://schema.eudat.eu/schema/kernel-1 http://schema.eudat.eu/meta/kernel-core-1.0/schema.xsd",
    "titles": {
        "title": [
            {
                "@{http://www.w3.org/XML/1998/namespace}lang": "en",
                "$": "EUDAT Core metadata schema full example"
            },
            {
                "@{http://www.w3.org/XML/1998/namespace}lang": "en",
                "$": "Here you find exapmles for the EUDAT Core metadata schema"
            }
        ]
    },
    "community": "EUDAT community",
    "identifiers": {
        "identifier": [
            {
                "@identifierType": "DOI",
                "$": "10.1594/WDCC/CCSRNIES_SRES_B2"
            },
            {
                "@identifierType": "ARK",
                "$": "2013A&A...558A.149B"
            },
            {
                "@identifierTyp

## JSON schema

* https://json-schema.org/
* https://github.com/json-schema-org
* https://pypi.org/project/jsonschema/

In [11]:
b2f_json_schema = {
  "$id": "https://example.com/b2f.schema.json",
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "title": "B2Find",
  "required": [ "identifier", "title" ],
  "type": "object",
  "properties": {
    "identifier": {
      "type": "array",
      "items": {
        "type": "string"
      },
    },
    "title": {
      "type": "array",
      "items": {
        "type": "string"
      },
    },
    "community": {
      "type": "string",
    },
  },
  "$defs": {
    "veggie": {
      "type": "object",
      "required": [ "veggieName", "veggieLike" ],
      "properties": {
        "veggieName": {
          "type": "string",
          "description": "The name of the vegetable."
        },
        "veggieLike": {
          "type": "boolean",
          "description": "Do I like this vegetable?"
        }
      }
    }
  }
}

In [12]:
b2f_data = {
    "identifier": ["https://b2share.eudat.eu/records/bb8964ff899c4711a0e8875b87ab2800"],
    "title": ["JSON schema example"],
    "community": "EUDAT community",
}

In [13]:
from jsonschema import validate

In [14]:
validate(instance=b2f_data, schema=b2f_json_schema)