In [7]:
from rdflib import Graph, RDF, RDFS, OWL, BNode, Literal, URIRef, Namespace
from collections import defaultdict
import pandas as pd

SKOS = Namespace("http://www.w3.org/2004/02/skos/core#")

# --------------------------------------------------
# Einstellungen
# --------------------------------------------------

TTL_PATH = "ontology.ttl"  # Pfad zu eurer Ontologie-Datei

ONLY_LOCAL_PREFIX = "https://agriculture.ld.admin.ch/plant-protection/"
# → setze auf None, wenn du ALLE Klassen willst (inkl. schema/chebi/wikidata)

INCLUDE_LABELS = True
OUTPUT_CSV = "ontology_class_docs_full.csv"

LANGS = ["de", "en", "fr", "it"]

# Wenn True: anonyme OWL-Ausdrücke (Restrictions, unionOf, oneOf, ...) als Text exportieren
INCLUDE_ANON_EXPRESSIONS = True

SKOS = Namespace("http://www.w3.org/2004/02/skos/core#")

# --------------------------------------------------
# Hilfsfunktionen
# --------------------------------------------------

def literal_map(literals):
    """Mappt Literale nach Sprache (de/en/fr/it)"""
    out = defaultdict(list)
    for lit in literals:
        if isinstance(lit, Literal) and lit.language in LANGS:
            txt = str(lit).strip()
            if txt:
                out[lit.language].append(txt)
    return out

def join_vals(values):
    """Mehrere Werte stabil und CSV-freundlich zusammenführen"""
    return "\n---\n".join(dict.fromkeys(values))

def as_text(node):
    """Kompakte Textdarstellung für CSV (IRI oder Literal oder Blank Node)"""
    if isinstance(node, URIRef):
        return str(node)
    if isinstance(node, Literal):
        return str(node).strip()
    if isinstance(node, BNode):
        return f"_:{str(node)}"
    return str(node)

def rdf_list_items(g, head):
    """Liest RDF-Listen (rdf:first/rest)"""
    items = []
    cur = head
    while cur and cur != RDF.nil:
        first = next(g.objects(cur, RDF.first), None)
        if first is None:
            break
        items.append(first)
        cur = next(g.objects(cur, RDF.rest), None)
    return items

def describe_anon_expr(g, bnode):
    """
    Versucht anonyme OWL-Ausdrücke knapp zu beschreiben:
    - owl:Restriction
    - owl:unionOf
    - owl:oneOf
    Sonst: generischer Platzhalter
    """
    # Restriction?
    if (bnode, RDF.type, OWL.Restriction) in g:
        on_prop = next(g.objects(bnode, OWL.onProperty), None)
        card = next(g.objects(bnode, OWL.cardinality), None)
        minc = next(g.objects(bnode, OWL.minCardinality), None)
        maxc = next(g.objects(bnode, OWL.maxCardinality), None)
        allv = next(g.objects(bnode, OWL.allValuesFrom), None)
        somev = next(g.objects(bnode, OWL.someValuesFrom), None)

        parts = []
        if on_prop: parts.append(f"onProperty {as_text(on_prop)}")
        if card:   parts.append(f"cardinality {as_text(card)}")
        if minc:   parts.append(f"minCardinality {as_text(minc)}")
        if maxc:   parts.append(f"maxCardinality {as_text(maxc)}")
        if allv:   parts.append(f"allValuesFrom {as_text(allv)}")
        if somev:  parts.append(f"someValuesFrom {as_text(somev)}")

        return "Restriction(" + "; ".join(parts) + ")" if parts else "Restriction"

    # unionOf?
    union = next(g.objects(bnode, OWL.unionOf), None)
    if union:
        items = [as_text(x) for x in rdf_list_items(g, union)]
        return "unionOf(" + ", ".join(items) + ")"

    # oneOf?
    oneof = next(g.objects(bnode, OWL.oneOf), None)
    if oneof:
        items = [as_text(x) for x in rdf_list_items(g, oneof)]
        return "oneOf(" + ", ".join(items) + ")"

    # Some ontologies use intersections etc. (optional)
    inter = next(g.objects(bnode, OWL.intersectionOf), None)
    if inter:
        items = [as_text(x) for x in rdf_list_items(g, inter)]
        return "intersectionOf(" + ", ".join(items) + ")"

    return "AnonymousClassExpression"


def export_multilang_literal_cols(g, subject, predicate, prefix):
    """
    Gibt dict mit prefix_de/en/fr/it und prefix_nolang zurück
    für ein gegebenes subject/predicate.
    """
    lits = list(g.objects(subject, predicate))
    lm = literal_map(lits)
    out = {}
    for lang in LANGS:
        out[f"{prefix}_{lang}"] = join_vals(lm.get(lang, []))
    nolang = [
        str(l).strip()
        for l in lits
        if isinstance(l, Literal) and l.language is None and str(l).strip()
    ]
    out[f"{prefix}_nolang"] = join_vals(nolang)
    return out


def export_named_and_anon(g, subject, predicate, col_named, col_anon):
    """
    Exportiert Objektwerte eines Prädikats getrennt:
    - benannte (URI/Literal)
    - anonyme (BNode) → kurze Beschreibung
    """
    named = []
    anon = []
    for obj in g.objects(subject, predicate):
        if isinstance(obj, BNode):
            if INCLUDE_ANON_EXPRESSIONS:
                anon.append(describe_anon_expr(g, obj))
        else:
            named.append(as_text(obj))
    return {
        col_named: join_vals(named),
        col_anon: join_vals(anon),
    }


# --------------------------------------------------
# Ontologie laden
# --------------------------------------------------

g = Graph()
g.parse(TTL_PATH, format="turtle")

rows = []

# --------------------------------------------------
# Klassen extrahieren
# --------------------------------------------------

for cls in sorted(set(g.subjects(RDF.type, OWL.Class)), key=lambda x: str(x)):
    if isinstance(cls, BNode):
        continue

    iri = str(cls)
    if ONLY_LOCAL_PREFIX and not iri.startswith(ONLY_LOCAL_PREFIX):
        continue

    row = {"class_iri": iri}

    # rdfs:label
    if INCLUDE_LABELS:
        row.update(export_multilang_literal_cols(g, cls, RDFS.label, "label"))

    # rdfs:comment
    row.update(export_multilang_literal_cols(g, cls, RDFS.comment, "comment"))

    # rdfs:scopeNote
    row.update(export_multilang_literal_cols(g, cls, SKOS.scopeNote, "scopeNote"))

    # rdfs:seeAlso
    see_also = list(g.objects(cls, RDFS.seeAlso))
    row["seeAlso"] = join_vals([as_text(x) for x in see_also])

    # rdfs:isDefinedBy
    is_def = list(g.objects(cls, RDFS.isDefinedBy))
    row["isDefinedBy"] = join_vals([as_text(x) for x in is_def])

    # rdfs:subClassOf (named + anonymous)
    row.update(export_named_and_anon(g, cls, RDFS.subClassOf, "subClassOf_named", "subClassOf_anonymous"))

    # owl:disjointWith (named + anonymous)
    row.update(export_named_and_anon(g, cls, OWL.disjointWith, "disjointWith_named", "disjointWith_anonymous"))

    # owl:equivalentClass (named + anonymous)
    row.update(export_named_and_anon(g, cls, OWL.equivalentClass, "equivalentClass_named", "equivalentClass_anonymous"))

    # skos:altLabel (mehrsprachig)
    row.update(export_multilang_literal_cols(g, cls, SKOS.altLabel, "altLabel"))

    # skos:definition (mehrsprachig)
    row.update(export_multilang_literal_cols(g, cls, SKOS.definition, "definition"))

    rows.append(row)

df = pd.DataFrame(rows).sort_values("class_iri").reset_index(drop=True)
df.to_csv(OUTPUT_CSV, index=False, encoding="utf-8")

df, f"{len(df)} Klassen exportiert → {OUTPUT_CSV}"


(                                            class_iri  \
 0   https://agriculture.ld.admin.ch/plant-protecti...   
 1   https://agriculture.ld.admin.ch/plant-protecti...   
 2   https://agriculture.ld.admin.ch/plant-protecti...   
 3   https://agriculture.ld.admin.ch/plant-protecti...   
 4   https://agriculture.ld.admin.ch/plant-protecti...   
 5   https://agriculture.ld.admin.ch/plant-protecti...   
 6   https://agriculture.ld.admin.ch/plant-protecti...   
 7   https://agriculture.ld.admin.ch/plant-protecti...   
 8   https://agriculture.ld.admin.ch/plant-protecti...   
 9   https://agriculture.ld.admin.ch/plant-protecti...   
 10  https://agriculture.ld.admin.ch/plant-protecti...   
 11  https://agriculture.ld.admin.ch/plant-protecti...   
 12  https://agriculture.ld.admin.ch/plant-protecti...   
 13  https://agriculture.ld.admin.ch/plant-protecti...   
 14  https://agriculture.ld.admin.ch/plant-protecti...   
 15  https://agriculture.ld.admin.ch/plant-protecti...   
 16  https://a

In [None]:
#!conda install -y rdflib

Retrieving notices: ...working... done
Channels:
 - defaults
 - conda-forge
Platform: linux-64
Collecting package metadata (repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /home/martin.hertach@blv.admin.ch/miniconda3/envs/lumpy-skin

  added / updated specs:
    - rdflib


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ca-certificates-2025.12.2  |       h06a4308_0         125 KB
    certifi-2026.01.04         |  py313h06a4308_0         149 KB
    rdflib-7.4.0               |  py313h06a4308_0         3.9 MB
    ------------------------------------------------------------
                                           Total:         4.2 MB

The following NEW packages will be INSTALLED:

  rdflib             pkgs/main/linux-64::rdflib-7.4.0-py313h06a4308_0 

The following packages will be UPDATED:

  ca-certificates    conda-forge/noarch::ca-certific