# RDFLIB

In [1]:
import re 

def cleaning_label(label):
    pattern = r"[&\-\(\)\\/]"  
    # Replace them with a space  
    label = re.sub(pattern, ' ', label)

    return label

In [45]:
import rdflib
from rdflib import Graph, URIRef, Namespace
from rdflib.namespace import SKOS, RDF, DCTERMS, OWL, XSD
from rdflib import Literal as LiteralRDF

def add_concept(taxonomy: Graph, namespace: str, concept:dict, level: int) -> None:
    uri = get_uri(namespace, concept, level)

    # Concept
    taxonomy.add((URIRef(uri), RDF.type, SKOS.Concept)) 

    #taxonomy.add((URIRef(uri), SKOS.altLabel, LiteralRDF("altLabel", lang="fr")))
    taxonomy.add((URIRef(uri), SKOS.broader, URIRef(get_uri(namespace, concept, level-1))))
    taxonomy.add((URIRef(uri), SKOS.definition, LiteralRDF(concept[f"Description Catégorie L{level}"], lang="fr")))
    taxonomy.add((URIRef(uri), DCTERMS.identifier, LiteralRDF(concept[f"ID catégorie L{level}"])))
    taxonomy.add((URIRef(uri), SKOS.inScheme, URIRef(get_uri(namespace, concept, 2))))
    #taxonomy.add((URIRef(uri), DCTERMS.isReplacedBy, URIRef(get_uri(namespace, concept, level-1))))
    taxonomy.add((URIRef(uri), SKOS.prefLabel, LiteralRDF(cleaning_label(concept[f"Titre Catégorie L{level}"]), lang="fr")))
    #taxonomy.add((URIRef(uri), DCTERMS.replaces, URIRef(get_uri(namespace, concept, level-1))))
    taxonomy.add((URIRef(uri), URIRef("http://publications.europa.eu/ontology/euvoc#status"), URIRef("http://publications.europa.eu/resource/authority/concept-status/CURRENT")))
    taxonomy.add((URIRef(uri), OWL.versionInfo, LiteralRDF("0.0.1")))

def add_topConcept(taxonomy: Graph, namespace: str, concept:dict, level: int) -> None:
    uri = get_uri(namespace, concept, level)
    # Concept
    taxonomy.add((URIRef(uri), RDF.type, SKOS.Concept)) 

    #taxonomy.add((URIRef(uri), SKOS.altLabel, LiteralRDF("", lang="fr")))
    taxonomy.add((URIRef(uri), SKOS.definition, LiteralRDF(concept[f"Description Catégorie L{level}"], lang="fr")))
    #taxonomy.add((URIRef(uri), DCTERMS.identifier, LiteralRDF(concept[f"ID catégorie L{level}"])))
    taxonomy.add((URIRef(uri), SKOS.inScheme, URIRef(get_uri(namespace, concept, 2))))
    #taxonomy.add((URIRef(uri), DCTERMS.isReplacedBy, URIRef(get_uri(namespace, concept, level-1))))
    taxonomy.add((URIRef(uri), SKOS.prefLabel, LiteralRDF(cleaning_label(concept[f"Titre Catégorie L{level}"]), lang="fr")))
    #taxonomy.add((URIRef(uri), DCTERMS.replaces, URIRef(get_uri(namespace, concept, level-1))))
    taxonomy.add((URIRef(uri), URIRef("http://publications.europa.eu/ontology/euvoc#status"), URIRef("http://publications.europa.eu/resource/authority/concept-status/CURRENT")))
    taxonomy.add((URIRef(uri), SKOS.topConceptOf, URIRef(get_uri(namespace, concept, 2))))
    taxonomy.add((URIRef(uri), OWL.versionInfo, LiteralRDF("0.0.1")))

    taxonomy.add((URIRef(get_uri(namespace, concept, 2)), SKOS.hasTopConcept, URIRef(uri)))

def add_conceptScheme(taxonomy: Graph, namespace: str, concept:dict, level: int) -> None:
    uri = get_uri(namespace, concept, level)

    # Concept
    taxonomy.add((URIRef(uri), RDF.type, SKOS.ConceptScheme))
    
    taxonomy.add((URIRef(uri), DCTERMS.created, LiteralRDF("2024-12-18", datatype=XSD.date)))
    #taxonomy.add((URIRef(uri), DCTERMS.issued, LiteralRDF("")))
    #taxonomy.add((URIRef(uri), DCTERMS.modified, LiteralRDF("")))
    taxonomy.add((URIRef(uri), DCTERMS.identifier, LiteralRDF(concept[f"ID catégorie L{level}"])))
    #taxonomy.add((URIRef(uri), DCTERMS.isReplacedBy, URIRef(get_uri(namespace, concept, level-1))))
    taxonomy.add((URIRef(uri), SKOS.prefLabel, LiteralRDF(cleaning_label(concept[f"Titre Catégorie L{level}"]), lang="fr")))
    #taxonomy.add((URIRef(uri), DCTERMS.replaces, URIRef(get_uri(namespace, concept, level-1))))
    taxonomy.add((URIRef(uri), DCTERMS.title, LiteralRDF(concept[f"Titre Catégorie L{level}"], lang="fr")))
    taxonomy.add((URIRef(uri), OWL.versionInfo, LiteralRDF("0.0.1")))

def get_uri(namespace: str, concept:dict, level: int) -> str:
    slug = concept[f"Slug Catégorie L{level}"].lower().replace(" ", "_")
    uri = namespace + slug

    return uri

In [3]:
import pandas as pd 
import json

def rename_columns(excel: pd.DataFrame, excel_info: json, level: int) -> None:

    if excel_info["Information by level"]["altLabel"] == "":
        excel.rename(columns={excel_info["Information by level"]["Concept"] + str(level): f"Slug Catégorie L{level}", 
                            excel_info["Information by level"]["prefLabel"] + str(level): f"Titre Catégorie L{level}", 
                            excel_info["Information by level"]["Definition"] + str(level): f"Definition Catégorie L{level}"}, inplace=True)
    else:
        excel.rename(columns={excel_info["Information by level"]["Concept"] + str(level): f"Slug Catégorie L{level}", 
                            excel_info["Information by level"]["prefLabel"] + str(level): f"Titre Catégorie L{level}", 
                            excel_info["Information by level"]["Definition"] + str(level): f"Definition Catégorie L{level}",
                            excel_info["Information by level"]["altLabel"] + str(level): f"Autre Titre Catégorie L{level}"}, inplace=True)


In [46]:
import pandas as pd
import json

EXCEL_PATH = r"C:\Users\ecaudron001\Downloads\2024-10-08_D4W_taxxo-complete.xlsx"
NAMESPACE = "http://www.data4wallonia.be/Test-Taxonomy#"

# Open and read the JSON file
with open('excel_info.json', 'r', encoding="utf-8") as file:
    EXCEL_INFO = json.load(file)

# Read taxonomy from excel
taxo_excel = pd.read_excel(EXCEL_PATH)

# Create rdf version of taxonomy
taxo_graph = Graph()
taxo_graph.bind("d4w", NAMESPACE)
taxo_graph.bind("status", "http://publications.europa.eu/resource/authority/concept-status/")
taxo_graph.bind("eurovoc", "http://publications.europa.eu/ontology/euvoc#")

# Add categories to the rdf
for level in range(int(EXCEL_INFO["highest level"]), int(EXCEL_INFO["lowest level"])):

    try:     
        unique_concepts = taxo_excel.drop_duplicates(subset=f"Titre Catégorie L{level}")
        
        for index in unique_concepts.index:
            if level > 3: 
                add_concept(taxo_graph, NAMESPACE, unique_concepts.loc[index], level)
            elif level == 3: 
                add_topConcept(taxo_graph, NAMESPACE, unique_concepts.loc[index], level)
            else: 
                add_conceptScheme(taxo_graph, NAMESPACE, unique_concepts.loc[index], level)
    except:
        rename_columns(taxo_excel, EXCEL_INFO, level)
        unique_concepts = taxo_excel.drop_duplicates(subset=f"Titre Catégorie L{level}")
        
        for index in unique_concepts.index:
            if level > 3: 
                add_concept(taxo_graph, NAMESPACE, unique_concepts.loc[index], level)
            elif level == 3: 
                add_topConcept(taxo_graph, NAMESPACE, unique_concepts.loc[index], level)
            else: 
                add_conceptScheme(taxo_graph, NAMESPACE, unique_concepts.loc[index], level)

# Save rdf file
taxo_graph.serialize("taxo_2_rdf.ttl", format="ttl")
    

<Graph identifier=N16bdf2ba96134085b4b3f5b8d32d7bc4 (<class 'rdflib.graph.Graph'>)>

In [50]:
import logging
import requests  

turtle_data = taxo_graph.serialize(format="turtle")#.decode("utf-8")  
  
# Prepare the API request payload  
payload = {  
    "contentToValidate": turtle_data,  
    "contentSyntax": "text/turtle",  
    "validationType": "v1.0.0"  
}  

# Send the POST request  
response = requests.post("http://localhost:8080/shacl/d4wta-ap/api/validate", json=payload)  

# Check the response  
if response.status_code == 200:  
    logging.info("Validation successful")
    
    if response.json().get("sh:conforms"):
        logging.info("No errors in the taxonomy")
        print("No error in the taxonomy")
    else: 
        logging.info("Errors detected:\n" + response.text) 
        print("Errors detected:\n" + response.text)
else:  
    print("Validation failed:", response.status_code, response.text)

Errors detected:
{
    "@graph": [
        {
            "@id": "_:b0",
            "sh:resultMessage": "Property needs to have at least 1 value",
            "sh:resultPath": {
                "@id": "dc:identifier"
            },
            "sh:focusNode": {
                "@id": "d4w:filiere-du-bois"
            },
            "sh:sourceShape": {
                "@id": "https://DigitalWallonia.github.io/D4WTA-AP/releases/1.0.0/#ConceptShape/6c7c1bd2ab3b7761e5be759a078a9fd93f411a57"
            },
            "sh:sourceConstraintComponent": {
                "@id": "sh:MinCountConstraintComponent"
            },
            "sh:resultSeverity": {
                "@id": "sh:Violation"
            },
            "@type": "sh:ValidationResult"
        },
        {
            "@id": "_:b1",
            "sh:resultMessage": "Property needs to have at least 1 value",
            "sh:resultPath": {
                "@id": "dc:identifier"
            },
            "sh:focusNode": {
       

In [51]:
from tqdm import tqdm

for index in tqdm([1], desc="SHACL validation"):
    payload = {  
    "contentToValidate": turtle_data,  
    "contentSyntax": "text/turtle",  
    "validationType": "v1.0.0"  
}  

    # Send the POST request  
    response = requests.post("http://localhost:8080/shacl/d4wta-ap/api/validate", json=payload)  

# Check the response  
if response.status_code == 200:  
    logging.info("Validation successful")
    
    if response.json().get("sh:conforms"):
        logging.info("No errors in the taxonomy")
        print("No error in the taxonomy")
    else: 
        logging.info("Errors detected:\n" + response.text) 
        print("Errors detected:\n" + response.text)
else:  
    print("Validation failed:", response.status_code, response.text)

SHACL validation: 100%|██████████| 1/1 [00:00<00:00,  2.76it/s]

Errors detected:
{
    "@graph": [
        {
            "@id": "_:b0",
            "sh:resultMessage": "Property needs to have at least 1 value",
            "sh:resultPath": {
                "@id": "dc:identifier"
            },
            "sh:focusNode": {
                "@id": "d4w:nature"
            },
            "sh:sourceShape": {
                "@id": "https://DigitalWallonia.github.io/D4WTA-AP/releases/1.0.0/#ConceptShape/6c7c1bd2ab3b7761e5be759a078a9fd93f411a57"
            },
            "sh:sourceConstraintComponent": {
                "@id": "sh:MinCountConstraintComponent"
            },
            "sh:resultSeverity": {
                "@id": "sh:Violation"
            },
            "@type": "sh:ValidationResult"
        },
        {
            "@id": "_:b1",
            "sh:resultMessage": "Property needs to have at least 1 value",
            "sh:resultPath": {
                "@id": "dc:identifier"
            },
            "sh:focusNode": {
                




# OWLREADY2

In [33]:
from owlready2 import *

onto = get_ontology("http://test.org/onto2.owl")

### Creating the model

In [34]:
NAMESPACE = "http://www.data4wallonia.be/Test-Taxonomy#"

with onto:
    # Define a new namespace for SKOS
    skos = onto.get_namespace("http://www.w3.org/2004/02/skos/core#")
    owl = onto.get_namespace("http://www.w3.org/2002/07/owl#")
    dcterms = onto.get_namespace("http://purl.org/dc/terms/")
    eurovoc = onto.get_namespace("http://publications.europa.eu/ontology/euvoc#")
    status = onto.get_namespace("http://publications.europa.eu/resource/authority/concept-status/")
    d4w = onto.get_namespace(NAMESPACE)

    # Create the skos:ConceptScheme class
    class ConceptScheme(Thing):
        namespace = skos
    # Create the skos:Concept class
    class Concept(Thing):
        namespace = skos

    # Create the skos:ConceptScheme attributes
    class created(DataProperty):
        namespace = dcterms
        domain = [ConceptScheme]
        range = [datetime.date]
    class identifier(DataProperty):
        namespace = dcterms
        domain = [ConceptScheme]
        range = [str]
    class issued(DataProperty):
        namespace = dcterms
        domain = [ConceptScheme]
        range = [datetime.date]
    class modified(DataProperty):
        namespace = dcterms
        domain = [ConceptScheme]
        range = [datetime.date]
    class prefLabel(DataProperty):
        namespace = skos
        domain = [ConceptScheme, Concept]
        range = [str]
    class title(DataProperty):
        namespace = dcterms
        domain = [ConceptScheme]
        range = [str]
    class versionInfo(DataProperty):
        namespace = owl
        domain = [ConceptScheme, Concept]
        range = [str]

    # Create the skos:ConceptScheme properties
    class ReplacedBy(ObjectProperty):
        skos = dcterms
        domain = [ConceptScheme, Concept]
        range = [ConceptScheme, Concept]
    class Replaces(ObjectProperty):
        namespace = dcterms
        domain = [ConceptScheme, Concept]
        range = [ConceptScheme, Concept]
    class hasTopConcept(ObjectProperty):
        namespace = skos
        domain = [ConceptScheme]
        range = [Concept]

    # Create the skos:Concept attributes
    class altLabel(DataProperty):
        namespace = skos
        domain = [Concept]
        range = [str]
    class definition(DataProperty):
        namespace = skos
        domain = [Concept]
        range = [str]


    # Define the skos:inScheme property
    class inScheme(ObjectProperty):
        namespace = skos
        domain = [Concept]  # Specifies that the domain is skos:Concept
        range = [ConceptScheme]  # Specifies that the range is skos:ConceptScheme
    class topConceptOf(ObjectProperty):
        namespace = skos
        domain = [Concept]
        range = [ConceptScheme]
    class broader(ObjectProperty):
        namespace = skos
        domain = [Concept]
        range = [Concept]
    class status(ObjectProperty):
        namespace = eurovoc
        domain = [Concept]
        range = [Concept]

### Instantiating

In [35]:
def add_concept(onto: OWL, namespace: str, concept:dict, level: int) -> None:
    with onto:
        Concept = skos.Concept(concept[f"Slug Catégorie L{level}"])
        
        # Concept.altLabel.append(LiteralRDF("altLabel", lang="fr"))
        Concept.broader = [skos.Concept(concept[f"Slug Catégorie L{level-1}"])]
        Concept.definition.append(locstr(concept[f"Description Catégorie L{level}"], "fr"))
        Concept.inScheme.append(skos.ConceptScheme(concept[f"Slug Catégorie L2"]))
        # Concept.isReplacedBy.append(skos.concept())
        Concept.prefLabel.append(locstr(concept[f"Titre Catégorie L{level}"], "fr"))
        # Concept.replaces(skos.concept())
        Concept.status.append(skos.Concept(r"http://publications.europa.eu/resource/authority/concept-status/CURRENT"))
        Concept.versionInfo.append("0.0.1")

def add_topConcept(onto: OWL, namespace: str, concept:dict, level: int) -> None:
    with onto:
        Concept = skos.Concept(concept[f"Slug Catégorie L{level}"])
        
        # Concept.altLabel.append(LiteralRDF("altLabel", lang="fr"))
        Concept.definition.append(locstr(concept[f"Description Catégorie L{level}"], "fr"))
        Concept.inScheme.append(skos.ConceptScheme(concept[f"Slug Catégorie L2"]))
        # Concept.isReplacedBy.append(skos.concept())
        Concept.prefLabel.append(locstr(concept[f"Titre Catégorie L{level}"], "fr"))
        # Concept.replaces(skos.concept())
        Concept.status.append(skos.Concept(r"http://publications.europa.eu/resource/authority/concept-status/CURRENT"))
        Concept.topConceptOf.append(skos.ConceptScheme(concept[f"Slug Catégorie L2"]))
        Concept.versionInfo.append("0.0.1")

def add_conceptScheme(onto: OWL, namespace: str, concept:dict, level: int) -> None:
    with onto:
        conceptScheme = skos.ConceptScheme(concept[f"Slug Catégorie L{level}"])

        conceptScheme.created.append(datetime.date(2024, 12, 18))
        # conceptScheme.issued.append(LiteralRDF("2024-12-18", datatype=XSD.date))
        # conceptScheme.modified.append(LiteralRDF("2024-12-18", datatype=XSD.date))
        conceptScheme.identifier.append(locstr(concept[f"ID catégorie L{level}"], "fr"))
        # conceptScheme.isReplacedBy.append(skos.ConceptScheme())
        conceptScheme.prefLabel.append(locstr(concept[f"Titre Catégorie L{level}"],"fr")) 
        # conceptScheme.replaces(skos.ConceptScheme())
        conceptScheme.title.append(locstr(concept[f"Titre Catégorie L{level}"],"fr"))
        conceptScheme.versionInfo.append("0.0.1")

In [36]:
import pandas as pd
import json

EXCEL_PATH = r"C:\Users\ecaudron001\Downloads\2024-10-08_D4W_taxxo-complete.xlsx"
NAMESPACE = "http://www.data4wallonia.be/Test-Taxonomy#"

# Open and read the JSON file
with open('excel_info.json', 'r', encoding="utf-8") as file:
    EXCEL_INFO = json.load(file)

# Read taxonomy from excel
taxo_excel = pd.read_excel(EXCEL_PATH)

# Add categories to the rdf
for level in range(int(EXCEL_INFO["highest level"]), int(EXCEL_INFO["lowest level"])):

    unique_concepts = taxo_excel.drop_duplicates(subset=f"Titre Catégorie L{level}")
    
    for index in unique_concepts.index:
        if level > 3: 
            add_concept(onto, NAMESPACE, unique_concepts.loc[index], level)
        elif level == 3: 
            add_topConcept(onto, NAMESPACE, unique_concepts.loc[index], level)
        else: 
            add_conceptScheme(onto, NAMESPACE, unique_concepts.loc[index], level)

In [37]:
#onto.bind("owl", "http://www.w3.org/2002/07/owl#", override=True)
onto.save(file = "d4w_taxo2", format = "rdfxml")

In [32]:
onto.destroy()