# Install the package using pip 

In [23]:
!pip install data2rdf



# Load packages

In [1]:
%load_ext autoreload
%autoreload 2

from data2rdf import AnnotationPipeline, Parser

import os



Following is an example of running the pipeline which demonstrates the required input files:

* `raw_data`: the path to the raw data to be processed: 
    * either a csv-, xlsx-/xlsm-, json-file or a python-`dict`
    * `mapping_file`: either a csv-, xlsx-/xlsm- or json-file or a python-`dict` with a mapping from a concept in the `raw_data`-file to class - iri.
* `extra_triples`[optional]: a ttl-file with extra triples which are put on top of the produced graph from the `mapping_file`.
* `parser_args`[optional]: a python-`dict` with additional arguments needed by the parser, e.g. `header_length` for the `Parser.csv`
* `config`[optional]: a python-`dict` with additonal arguments to set for the pipeline run, e.g. `graph_identifier` for the identifier of the `rdflib.Graph`



# CSV Example

In [6]:
working_folder = os.path.join("../" ,"tests", "csv_pipeline_test")

extra = os.path.join(working_folder, "input" , "method-graph", "tensile_test_method_v6.mod.ttl")
mapping_file = os.path.join(working_folder,"input" , "mapping" ,"tensile_test_mapping.csv")
raw_data = os.path.join(working_folder, "input" , "data" ,"DX56_D_FZ2_WR00_43.TXT")

parser_args = {
      "header_sep":"\t",
      "column_sep":"\t",
      "header_length":20
   }

pipeline = AnnotationPipeline(
    raw_data=raw_data,
    parser=Parser.csv,
    mapping=mapping_file,
    parser_args=parser_args,
    extra_triples=extra,
)

print(pipeline.graph.serialize())

@prefix csvw: <http://www.w3.org/ns/csvw#> .
@prefix dcat: <http://www.w3.org/ns/dcat#> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix fileid: <https://www.example.org/> .
@prefix foaf1: <http://xmlns.com/foaf/spec/> .
@prefix prov: <http://www.w3.org/ns/prov#> .
@prefix qudt: <http://qudt.org/schema/qudt/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

fileid:TensileTestExperiment a prov:Activity ;
    prov:generated fileid:AbsoluteCrossheadTravel,
        fileid:Extension,
        fileid:Remark,
        fileid:StandardForce,
        fileid:TimeStamp,
        fileid:dataset ;
    prov:hadPlan fileid:TestStandard ;
    prov:used fileid:DisplacementTransducer,
        fileid:ForceMeasuringDevice,
        fileid:TensileTestSpecimen,
        fileid:TensileTestingMachine,
        fileid:TestingFacility ;
    prov:wasAssociatedWith fileid:Tester ;
    prov:wasInfluencedBy fileid:ExperimentPreparation .

fileid:TestingS

# Excel parser example

In [8]:
working_folder = os.path.join("../" ,"tests", "xls_pipeline_test")

extra = os.path.join(working_folder, "input" , "method-graph", "tensile_test_method_v6.mod.ttl")
raw_data = os.path.join(working_folder,"input" , "data" ,"AFZ1-Fz-S1Q.xlsm")
mapping = os.path.join(working_folder, "input" , "mapping" ,"tensile_test_mapping.csv")

parser = "excel"


pipeline = AnnotationPipeline(
    raw_data=raw_data,
    parser=Parser.excel,
    mapping=mapping,
    extra_triples=extra,
)

print(pipeline.graph.serialize())

                                  does not have a value at location `UU31`.
                                  Concept will be omitted in graph.
                                  


@prefix csvw: <http://www.w3.org/ns/csvw#> .
@prefix dcat: <http://www.w3.org/ns/dcat#> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix fileid: <https://www.example.org/> .
@prefix foaf1: <http://xmlns.com/foaf/spec/> .
@prefix prov: <http://www.w3.org/ns/prov#> .
@prefix qudt: <http://qudt.org/schema/qudt/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

fileid:TensileTestExperiment a prov:Activity ;
    prov:generated fileid:AbsoluteCrossheadTravel,
        fileid:Extension,
        fileid:Remark,
        fileid:StandardForce,
        fileid:TimeStamp,
        fileid:dataset ;
    prov:hadPlan fileid:TestStandard ;
    prov:used fileid:DisplacementTransducer,
        fileid:ForceMeasuringDevice,
        fileid:TensileTestSpecimen,
        fileid:TensileTestingMachine,
        fileid:TestingFacility ;
    prov:wasAssociatedWith fileid:Tester ;
    prov:wasInfluencedBy fileid:ExperimentPreparation .

fileid:TestingS

## JSON Example

In [9]:
working_folder = os.path.join("../" ,"tests", "json_pipeline_test")

mapping_file = os.path.join(working_folder,"input" , "mapping" ,"tensile_test_mapping.csv")
raw_data = os.path.join(working_folder, "input" , "data" ,"sample_data.json")


pipeline = AnnotationPipeline(
    raw_data=raw_data,
    parser=Parser.json,
    mapping=mapping_file
)

print(pipeline.graph.serialize())

@prefix dcat: <http://www.w3.org/ns/dcat#> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix fileid: <https://www.example.org/> .
@prefix foaf1: <http://xmlns.com/foaf/spec/> .
@prefix ns1: <prov:> .
@prefix qudt: <http://qudt.org/schema/qudt/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

fileid:dataset a dcat:Dataset ;
    dcterms:hasPart fileid:Dictionary ;
    dcat:distribution [ a dcat:Distribution ;
            dcat:accessURL "https://www.example.org/download/"^^xsd:anyURI ;
            dcat:mediaType "https://www.iana.org/assignments/media-types/application/json"^^xsd:anyURI ] .

fileid:Dictionary a ns1:Dictionary ;
    ns1:hadDictionaryMember [ a ns1:KeyEntityPair ;
            ns1:pairEntity [ a ns1:Entity ;
                    qudt:quantity fileid:WidthChange ] ;
            ns1:pairKey "Breitenaenderung"^^xsd:string ],
        [ a ns1:KeyEntityPair ;
            ns1:pairEntity [ a ns1:Entity ;
           

## Dict Example 

In [10]:
raw_data = {
  "data": {
    "Breitenaenderung": {
      "unit": "mm",
      "value": 1.0
    },
    "Dehnung": [
      1.0,
      2.0,
      3.0
    ],
    "Standardkraft": {
      "array": [
        2.0,
        3.0,
        4.0
      ],
      "unit": "kN"
    }
  },
  "details": {
    "Bemerkungen": "foobar"
  }
}

mapping = {
  "Bemerkungen": {
    "iri": "https://w3id.org/steel/ProcessOntology/Remark",
    "key": "Bemerkungen",
    "value_location": "details.Bemerkungen"
  },
  "Breitenaenderung": {
    "iri": "https://w3id.org/steel/ProcessOntology/WidthChange",
    "key": "Breitenaenderung",
    "unit_location": "data.Breitenaenderung.unit",
    "value_location": "data.Breitenaenderung.value"
  },
  "Dehnung": {
    "iri": "https://w3id.org/steel/ProcessOntology/PercentageElongation",
    "key": "Dehnung",
    "unit": "%",
    "value_location": "data.Dehnung"
  },
  "Standardkraft": {
    "iri": "https://w3id.org/steel/ProcessOntology/Force",
    "key": "Standardkraft",
    "unit_location": "data.Standardkraft.unit",
    "value_location": "data.Standardkraft.array"
  }
}


pipeline = AnnotationPipeline(
    raw_data=raw_data,
    parser=Parser.json,
    mapping=mapping_file
)

print(pipeline.graph.serialize())

@prefix dcat: <http://www.w3.org/ns/dcat#> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix fileid: <https://www.example.org/> .
@prefix foaf1: <http://xmlns.com/foaf/spec/> .
@prefix ns1: <prov:> .
@prefix qudt: <http://qudt.org/schema/qudt/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

fileid:dataset a dcat:Dataset ;
    dcterms:hasPart fileid:Dictionary ;
    dcat:distribution [ a dcat:Distribution ;
            dcat:accessURL "https://www.example.org/download/"^^xsd:anyURI ;
            dcat:mediaType "https://www.iana.org/assignments/media-types/application/json"^^xsd:anyURI ] .

fileid:Dictionary a ns1:Dictionary ;
    ns1:hadDictionaryMember [ a ns1:KeyEntityPair ;
            ns1:pairEntity [ a ns1:Entity ;
                    dcterms:hasPart fileid:Remark ] ;
            ns1:pairKey "Bemerkungen"^^xsd:string ],
        [ a ns1:KeyEntityPair ;
            ns1:pairEntity [ a ns1:Entity ;
                   