<a href="https://colab.research.google.com/github/MarkusSchilling/py_scripts/blob/main/RDF_generation_TT_data_RDFlib_based.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**RDF creation of tensile test data**

In this notebook, data originating from an arbitrary tensile test is read in as CSV data. Then, this data is transformed to RDF data by creating triples using RDFlib library and some helper functions. Therefore, values tokenized from the CSV are allocated to concepts of the tensile test ontology (TTO) developed in the frame of the PMD project (TTO as PMD application ontology can be found here: https://github.com/materialdigital/application-ontologies/tree/main/tensile_test_ontology_TTO). The RDF data is directly saved in a graph. Furthermore, SPARQL queries may be performed for consistency checks.

Accordingly, relevant python packages are installed and imported first.

In [1]:
# Installing relevant python package of rdflib
!pip install rdflib

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting rdflib
  Downloading rdflib-6.3.2-py3-none-any.whl (528 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m528.1/528.1 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
Collecting isodate<0.7.0,>=0.6.0
  Downloading isodate-0.6.1-py2.py3-none-any.whl (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.7/41.7 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: isodate, rdflib
Successfully installed isodate-0.6.1 rdflib-6.3.2


In [2]:
# Import of relevant package parts 
from rdflib import Graph, Literal, URIRef, BNode, Namespace
from rdflib.term import Identifier
from rdflib.collection import Collection
from rdflib.namespace import RDF, RDFS, SKOS, XSD, OWL
import rdflib.plugins.sparql.update
import pandas as pd
import io

In [3]:
# Definition of helper methods / functions

# Helper method 'add' used to write triples to an RDF graph
def add(s,p,o):
   
    # in this case p is "ObjectProperty"
    if o.find('http://')==0 or o.find('https://')==0:
        g.add( (URIRef(s), URIRef(p), URIRef(o)) )
        
    # in this case p is "DatatypeProperty"
    else:
        # if we can parse o as Float, just set the datatype
        try:
            g.add( (URIRef(s), URIRef(p), Literal(float(o), datatype=XSD.float) ))
        except:
            g.add( (URIRef(s), URIRef(p), Literal(o) ))
            

# Global counter possibly usable for the creation of arbitrary instance names (numbers)
instanceCounter = 0
def nextInstanceNum():
    global instanceCounter
    instanceCounter = instanceCounter + 1
    return str(instanceCounter)

In [4]:
# 'Import' of CSV data by copying original tensile test data to the 'CSV' dataframe
CSV="""experiment_id;testpiece_id;machine;test_temperature;Rp01;Rp02;Rm;Rp01_in_Pa;Rp02_in_Pa;Rm_in_Pa;A;A/100;test_date;operator;strain_rate;l_0;Fracture position;reduction of area;reduction/100;valid;file format temperature;idx_mech_start;idx_mech_end;diameter;material_state;ageing duration;ageing temperature;material state name;material state name clear
ABR5RTzl1;22-1;#N/A;20;341;365;428;341000000;365000000;428000000;5.30%;0.00053;2/2/2017;anonymous;1.0E-04;29877;BiM;9.59%;0.000959;True;A;94;5055;5.97;ABR5;0;0;T61;T61
ABR58zl2;22-2;#N/A;80;321;340;407;321000000;340000000;407000000;7%;0.0007;2/2/2017;anonymous;1.0E-04;29877;BiM;9.32%;0.000932;True;A;1294;6601;5.98;ABR5;0;0;T61;T61
ABR516zl3;22-3;#N/A;160;313;324;346;313000000;324000000;346000000;13%;0.0013;2/3/2017;anonymous;1.0E-04;29877;BiM;15.54%;0.001554;True;A;2900;9612;6.01;ABR5;0;0;T61;T61
ABR518zl4;22-4;#N/A;180;288;300;320;288000000;300000000;320000000;11%;0.0011;2/3/2017;anonymous;1.0E-04;29877;BiM;16.23%;0.001623;True;A;2917;9055;6.01;ABR5;0;0;T61;T61
ABR519zl8;22-8;#N/A;190;280;292;307;280000000;292000000;307000000;12%;0.0012;2/28/2017;anonymous;1.0E-04;29877;BiM;#N/A;#WERT!;True;A;5180;13100;6;ABR5;0;0;T61;T61
ABR8RTzl1;43-1;CERT;20;226;246;342;226000000;246000000;342000000;7.90%;0.00079;9/8/2017;anonymous;1.0E-04;29772;BiM;14.14%;0.001414;True;B;1090;-10;6.01;ABR8;15000;190;Alt VII;190 °C, 15000 h
ABR819zl2;43-2;CERT;190;181;196;216;181000000;196000000;216000000;20%;0.002;9/12/2017;anonymous;1.0E-04;29772;BiM;19.68%;0.001968;True;B;69894;-10;6.001;ABR8;15000;190;Alt VII;190 °C, 15000 h
ABR9RTzl1;1-1;CERT;20;222;236;330;222000000;236000000;330000000;8.30%;0.00083;9/8/2017;anonymous;1.0E-04;29772;BiM;18.36%;0.001836;True;B;692;-10;5.994;ABR9;25000;190;Alt IX;190 °C, 25000 h
ABR919zl2;1-2;CERT;190;168;183;210;168000000;183000000;210000000;19.50%;0.00195;9/11/2017;anonymous;1.0E-04;29772;BiM;18.38%;0.001838;True;B;49617;-10;5.999;ABR9;25000;190;Alt IX;190 °C, 25000 h
ABR10RTzl1;197-1;CERT;20;263;280;362;263000000;280000000;362000000;8.20%;0.00082;9/8/2017;anonymous;1.0E-04;29772;BiM;64.29%;0.006429;True;B;870;-10;5.993;ABR10;15000;160;Alt X;160 °C, 15000 h
ABR1016zl2;197-2;CERT;160;230;245;268;230000000;245000000;268000000;17.30%;0.00173;9/12/2017;anonymous;1.0E-04;29772;BiM;56.12%;0.005612;True;B;77010;-10;5.999;ABR10;15000;160;Alt X;160 °C, 15000 h
ABR11RTzl1;3-1;CERT;20;264;279;360;264000000;279000000;360000000;7.70%;0.00077;9/11/2017;anonymous;1.0E-04;29772;BiM;47.47%;0.004747;True;B;590;-10;5.997;ABR11;25000;160;Alt XI;160 °C, 25000 h
ABR1116zl2;3-2;CERT;160;215;233;257;215000000;233000000;257000000;15.70%;0.00157;9/12/2017;anonymous;1.0E-04;29772;BiM;41.19%;0.004119;True;B;84836;-10;5.92;ABR11;25000;160;Alt XI;160 °C, 25000 h
ABR1RTd6;none;;20;267;278;-1;;;;;;;;;;;;;;;;;;ABR1;1000;190;Alt I;190 °C, 1000 h
ABR2RTd3;none;;20;237;250;-1;;;;;;;;;;;;;;;;;;ABR2;8800;190;Alt II;190 °C, 8800 h
ABR3RTd2;none;;20;292;306;-1;;;;;;;;;;;;;;;;;;ABR3;1000;160;Alt III;160 °C, 1000 h
ABR4RTd2;none;;20;273;286;-1;;;;;;;;;;;;;;;;;;ABR4;8800;160;Alt IV;160 °C, 8800 h
ABR7RTd1;none;;20;122;131;-1;;;;;;;;;;;;;;;;;;ABR7;160;350;Alt VII;160 °C, 350 h
"""

In [11]:
# Creating the RDF graph 'g' and, as the case may be, parsing the PMDco (see also: https://github.com/materialdigital/core-ontology) to include all PMDco concepts into the graph
g = Graph()
# g.parse("https://w3id.org/pmd/co")

In [12]:
# Definition of IRI prefixes (namespaces)
base = Namespace("https://w3id.org/pmd/co/")
g.bind("base", base)
unit = Namespace("http://qudt.org/vocab/unit/")
g.bind("unit", unit)
DC = Namespace("https://www.dublincore.org/specifications/dublin-core/dcmi-terms/")
g.bind("DC", DC)

# Application namespace
prefix = Namespace("https://w3id.org/pmd/ao/tte/")
g.bind("prefix", prefix)

# Turtle style abbreviation for RDF.type
a = RDF.type

# Make the data be real ontologies that we can check 
onto = URIRef(prefix)
g.add((onto, a, OWL.Ontology))
g.add((onto, OWL.imports, URIRef(base)))

# Add creators:
markus = URIRef("https://orcid.org/0000-0002-7094-5371")
bernd = URIRef("https://orcid.org/0000-0002-3717-7104")
joerg = URIRef("https://orcid.org/0000-0001-7192-7143")
philipp = URIRef("https://orcid.org/0000-0003-4971-3645")

g.add((onto, DC.creator, markus))
g.add((bernd, RDFS.label, Literal("Markus Schilling", datatype=XSD.string)))
g.add((onto, DC.creator, bernd))
g.add((bernd, RDFS.label, Literal("Bernd Bayerlein", datatype=XSD.string)))
g.add((onto, DC.creator, joerg))
g.add((bernd, RDFS.label, Literal("Jörg Waitelonis", datatype=XSD.string)))
g.add((onto, DC.creator, philipp))
g.add((bernd, RDFS.label, Literal("Philipp von Hartrott", datatype=XSD.string)))

"""
# Creation of triples
for line in CSV.split("\n")[1:]:
    #print(line)
    tok = line.split(";")
    if len(tok)<=1:
        break
    
    # Definition of tokens in accordance with columns of original csv file
    experiment_id = tok[0]    
    testpiece_id = tok[1]
    machine = tok[2]
    temperature = tok[3]
    rp01 = tok[4]
    rp02 = tok[5]
    rm = tok[6]
    operator = tok[13]
    strainrate = tok[14]
    agingtime = tok[25]
    agingtemp = tok[26]
    matstatename = tok[28]
"""

# Load data from string above. Also possible: Load data from a URL
data = pd.read_csv(io.StringIO(CSV), sep=';')

# Iterate over the first n rows of the data table
#for idx, row in data.iloc[0:5].iterrows():

# Iterate over the full data table
for idx, row in data.iterrows():
  # Row could also be used directly in the place where we create the statements
  experiment_id     = row["experiment_id"]    
  testpiece_id      = row["testpiece_id"]  
  machine           = row["machine"]  
  temperature       = row["test_temperature"]  
  rp01              = row["Rp01"]  
  rp02              = row["Rp02"] 
  rm                = row["Rm"]
  operator          = row["operator"]
  strainrate        = row["strain_rate"]
  agingtime         = row["ageing duration"]
  agingtemp         = row["ageing temperature"]
  matstatename      = row["material state name clear"]

  # Creation of triples for all instances (considering all connections between them). There is about one "block" per instance.
  experimentIRI = URIRef(prefix + experiment_id)
  g.add((experimentIRI, a, base.ProcessIdentifier)) 
  g.add((experimentIRI, base.value, Literal(experiment_id, datatype=XSD.string)))

  processIRI = URIRef(experimentIRI + "_process")
  g.add((processIRI, a, base.TensileTest))
  g.add((processIRI, base.characteristic, experimentIRI))

  # Test piece as input prior to tensile test
  testpieceID_IRI = URIRef(prefix + f"testpiece_name/{testpiece_id}")
  g.add((testpieceID_IRI, a, base.TestPieceName))
  g.add((testpieceID_IRI, a, base.Metadata))
  g.add((testpieceID_IRI, base.value, Literal(testpiece_id, datatype=XSD.string)))

  testpieceIRI = URIRef(testpieceID_IRI  + f"_testpiece/{testpiece_id}")
  g.add((testpieceIRI, a, base.TestPiece))
  g.add((testpieceIRI, base.characteristic, testpieceID_IRI))

  # Test piece(s) as output after tensile test (typically, 2 fractured parts will be formed)
  testpieceAfterTest1ID_IRI = URIRef(testpieceID_IRI + "_afterTest_1")
  g.add((testpieceAfterTest1ID_IRI, a, base.TestPieceName))
  g.add((testpieceAfterTest1ID_IRI, a, base.Metadata))
  g.add((testpieceAfterTest1ID_IRI, base.value, Literal(testpiece_id + "_afterTest_1", datatype=XSD.string)))

  testpieceAfterTest1IRI = URIRef(testpieceIRI  + "_testpiece_afterTest_1")
  g.add((testpieceAfterTest1IRI, a, base.TestPiece))
  g.add((testpieceAfterTest1IRI, base.characteristic, testpieceAfterTest1ID_IRI))

  testpieceAfterTest2ID_IRI = URIRef(testpieceID_IRI + "_afterTest_2")
  g.add((testpieceAfterTest2ID_IRI, a, base.TestPieceName))
  g.add((testpieceAfterTest2ID_IRI, a, base.Metadata))
  g.add((testpieceAfterTest2ID_IRI, base.value, Literal(testpiece_id + "_afterTest_2", datatype=XSD.string)))

  testpieceAfterTest2IRI = URIRef(testpieceIRI  + "_testpiece_afterTest_2")
  g.add((testpieceAfterTest2IRI, a, base.TestPiece))
  g.add((testpieceAfterTest2IRI, base.characteristic, testpieceAfterTest2ID_IRI))

  g.add((processIRI, base.input, testpieceIRI))
  g.add((processIRI, base.output, testpieceAfterTest1IRI))
  g.add((processIRI, base.output, testpieceAfterTest2IRI))

  # The next section contains some secondary data.
  rp01IRI = URIRef(experimentIRI + "_rp01")
  g.add((rp01IRI, a, base.Rp01))
  g.add((rp01IRI, a, base.SecondaryData))
  g.add((rp01IRI, a, base.Measurement))
  g.add((rp01IRI, base.value, Literal(rp01, datatype=XSD.float)))
  g.add((rp01IRI, base.unit, unit.MegaPa))
  g.add((processIRI, base.output, rp01IRI))

  rp02IRI = URIRef(experimentIRI + "_rp02")
  g.add((rp02IRI, a, base.Rp01))
  g.add((rp02IRI, a, base.SecondaryData))
  g.add((rp02IRI, a, base.Measurement))
  g.add((rp02IRI, base.value, Literal(rp02, datatype=XSD.float)))
  g.add((rp02IRI, base.unit, unit.MegaPa))
  g.add((processIRI, base.output, rp02IRI))

  rmIRI = URIRef(experimentIRI + "_tensileStrength")
  g.add((rmIRI, a, base.TensileStrength))
  g.add((rmIRI, a, base.SecondaryData))
  g.add((rmIRI, a, base.Measurement))
  g.add((rmIRI, base.value, Literal(rm, datatype=XSD.float)))
  g.add((rmIRI, base.unit, unit.MegaPa))
  g.add((processIRI, base.output, rmIRI))

  # The next section contains some metadata.
  machineIRI = URIRef(experimentIRI + "_machine")
  g.add((machineIRI, a, base.TensileTestingMachine))
  g.add((machineIRI, a, base.Metadata))
  g.add((machineIRI, base.value, Literal(machine, datatype=XSD.string)))
  g.add((processIRI, base.characteristic, machineIRI))

  temperatureIRI = URIRef(experimentIRI + "_temperature")
  g.add((temperatureIRI, a, base.EnvironmentalTemperature))
  g.add((temperatureIRI, a, base.Metadata))
  g.add((temperatureIRI, a, base.Measurement))
  g.add((temperatureIRI, base.value, Literal(temperature, datatype=XSD.float)))
  g.add((temperatureIRI, base.unit, unit.DEG_C))
  g.add((processIRI, base.characteristic, temperatureIRI))

  setTemperatureIRI = URIRef(experimentIRI + "_setTemperature")
  g.add((temperatureIRI, a, base.EnvironmentalTemperature))
  g.add((temperatureIRI, a, base.Metadata))
  g.add((temperatureIRI, a, base.SetPoint))
  g.add((temperatureIRI, base.value, Literal(temperature, datatype=XSD.float)))
  g.add((temperatureIRI, base.unit, unit.DEG_C))
  g.add((processIRI, base.characteristic, setTemperatureIRI))

  operatorIRI = URIRef(experimentIRI + "_operator")
  g.add((operatorIRI, a, base.Operator))
  g.add((operatorIRI, a, base.Metadata))
  g.add((operatorIRI, base.value, Literal(operator, datatype=XSD.string)))
  g.add((processIRI, base.characteristic, operatorIRI))

  strainrateIRI = URIRef(experimentIRI + "_strainRate")
  g.add((strainrateIRI, a, base.StrainRate))
  g.add((strainrateIRI, a, base.Metadata))
  g.add((strainrateIRI, a, base.SetPoint))
  g.add((strainrateIRI, base.value, Literal(strainrate, datatype=XSD.float)))
  g.add((processIRI, base.characteristic, strainrateIRI))

  agingtimeIRI = URIRef(experimentIRI + "_agingTime")
  g.add((agingtimeIRI, a, base.AgingTime))
  g.add((agingtimeIRI, a, base.Metadata))
  g.add((agingtimeIRI, a, base.SetPoint))
  g.add((agingtimeIRI, base.value, Literal(agingtime, datatype=XSD.float)))
  g.add((agingtimeIRI, base.unit, unit.HR))
  g.add((processIRI, base.characteristic, agingtimeIRI))

  agingtemperatureIRI = URIRef(experimentIRI + "_agingTemperature")
  g.add((agingtemperatureIRI, a, base.AgingTemperature))
  g.add((agingtemperatureIRI, a, base.Metadata))
  g.add((agingtemperatureIRI, a, base.SetPoint))
  g.add((agingtemperatureIRI, base.value, Literal(agingtemp, datatype=XSD.float)))
  g.add((agingtemperatureIRI, base.unit, unit.DEG_C))
  g.add((processIRI, base.characteristic, agingtemperatureIRI))

  matstatenameIRI = URIRef(experimentIRI + "_materialStateName")
  g.add((matstatenameIRI, a, base.MaterialDesignation))
  g.add((matstatenameIRI, a, base.Metadata))
  g.add((matstatenameIRI, base.value, Literal(matstatename, datatype=XSD.string)))
  g.add((processIRI, base.characteristic, matstatenameIRI))

  # break


In [13]:
g.serialize("RDF-TTO-Orowan.ttl", format="ttl")

<Graph identifier=N94a3c08c67bd45349b20fcdc8ebbac14 (<class 'rdflib.graph.Graph'>)>