# Creating the WordNet Knowledge Base

In [18]:
import types
from nltk.corpus import wordnet as wn
from owlready2 import get_ontology, Thing, AnnotationProperty

onto = get_ontology("http://example.org/wordnet.owl")

with onto:
    # Define SKOS and DCTERMS annotation properties
    class altLabel(AnnotationProperty):
        namespace = onto
        iri = "http://www.w3.org/2004/02/skos/core#altLabel"

    class definition(AnnotationProperty):
        namespace = onto
        iri = "http://www.w3.org/2004/02/skos/core#definition"

    class example(AnnotationProperty):
        namespace = onto
        iri = "http://www.w3.org/2004/02/skos/core#example"

    class notation(AnnotationProperty):
        namespace = onto
        iri = "http://www.w3.org/2004/02/skos/core#notation"

    class identifier(AnnotationProperty):
        namespace = onto
        iri = "http://purl.org/dc/terms/identifier"

    # Cache by WordNet ID
    synset_to_class = {}

    def normalize_variants(lemma: str):
        """Generate altLabel variations for a lemma"""
        forms = set()
        # raw lemma
        forms.add(lemma)
        # replace underscores with space
        spaced = lemma.replace("_", " ")
        forms.add(spaced)
        # lowercase / capitalized / uppercase
        forms.add(spaced.lower())
        return forms

    def get_or_create_class(synset):
        wn_id = f"{synset.offset():08d}-{synset.pos()}"  # stable WordNet ID
        if wn_id in synset_to_class:
            return synset_to_class[wn_id]

        cls = types.new_class(f"C_{wn_id}", (Thing,))

        # Labels and synonyms
        lemmas = list(dict.fromkeys(synset.lemma_names()))
        if lemmas:
            # main label as a string
            main_label = lemmas[0].replace("_", " ")
            cls.label = main_label

            # All lemmas (including first) → altLabel variations
            alt_labels = set()
            for lemma in lemmas:
                alt_labels |= normalize_variants(lemma)

            # Remove the main label (so it doesn't appear both as rdfs:label and skos:altLabel)
            alt_labels.discard(main_label)

            for variant in sorted(alt_labels):
                cls.altLabel.append(variant)

        # Definition
        if synset.definition():
            cls.definition.append(synset.definition())

        # Examples
        for ex in synset.examples():
            cls.example.append(ex)

        # Notation (WordNet synset name, e.g. "dog.n.01")
        cls.notation.append(synset.name())

        # Identifier (WordNet offset-pos)
        cls.identifier.append(wn_id)

        synset_to_class[wn_id] = cls
        return cls


    # Build ontology
    for synset in list(wn.all_synsets()):
        cls = get_or_create_class(synset)
        for hypernym in synset.hypernyms():
            parent_cls = get_or_create_class(hypernym)
            if parent_cls not in cls.is_a:
                cls.is_a.append(parent_cls)

# --- Cleanup redundant Thing axioms ---
for cls in list(onto.classes()):
    if Thing in cls.is_a and len(cls.is_a) > 1:
        cls.is_a.remove(Thing)

onto.save(file="wn.owl", format="rdfxml")
print("Ontology saved to wn.owl")


Ontology saved to wn.owl


In [12]:
tree = wn.synset('tree.n.01')
car = wn.synset('car.n.01')
wn.synonyms('car')
tree.hypernyms()

[Synset('woody_plant.n.01')]

In [13]:
print(tree.part_meronyms())
print(tree.substance_meronyms())


print(car.part_meronyms())
print(car.substance_meronyms())

[Synset('stump.n.01'), Synset('crown.n.07'), Synset('burl.n.02'), Synset('trunk.n.01'), Synset('limb.n.02')]
[Synset('sapwood.n.01'), Synset('heartwood.n.01')]
[Synset('car_window.n.01'), Synset('grille.n.02'), Synset('accelerator.n.01'), Synset('car_mirror.n.01'), Synset('first_gear.n.01'), Synset('stabilizer_bar.n.01'), Synset('car_door.n.01'), Synset('bumper.n.02'), Synset('reverse.n.02'), Synset('car_seat.n.01'), Synset('high_gear.n.01'), Synset('window.n.02'), Synset('tail_fin.n.02'), Synset('third_gear.n.01'), Synset('running_board.n.01'), Synset('air_bag.n.01'), Synset('automobile_engine.n.01'), Synset('hood.n.09'), Synset('luggage_compartment.n.01'), Synset('roof.n.02'), Synset('gasoline_engine.n.01'), Synset('auto_accessory.n.01'), Synset('sunroof.n.01'), Synset('automobile_horn.n.01'), Synset('rear_window.n.01'), Synset('buffer.n.06'), Synset('fender.n.01'), Synset('glove_compartment.n.01'), Synset('floorboard.n.02')]
[]


# Ontology with Membership and HasPart relations

In [30]:
import types
from nltk.corpus import wordnet as wn
from owlready2 import get_ontology, Thing, AnnotationProperty, ObjectProperty

onto = get_ontology("http://example.org/wordnet.owl")

with onto:
    # Define SKOS and DCTERMS annotation properties
    class altLabel(AnnotationProperty):
        namespace = onto
        iri = "http://www.w3.org/2004/02/skos/core#altLabel"

    class definition(AnnotationProperty):
        namespace = onto
        iri = "http://www.w3.org/2004/02/skos/core#definition"

    class example(AnnotationProperty):
        namespace = onto
        iri = "http://www.w3.org/2004/02/skos/core#example"

    class notation(AnnotationProperty):
        namespace = onto
        iri = "http://www.w3.org/2004/02/skos/core#notation"

    class identifier(AnnotationProperty):
        namespace = onto
        iri = "http://purl.org/dc/terms/identifier"


    # --- Object properties for meronym/holonym relations ---
    class partOf(ObjectProperty):
        namespace = onto

    class hasPart(ObjectProperty):
        namespace = onto
        inverse_property = partOf

    class substanceOf(ObjectProperty):
        namespace = onto

    class hasSubstance(ObjectProperty):
        namespace = onto
        inverse_property = substanceOf

    class memberOf(ObjectProperty):
        namespace = onto

    class hasMember(ObjectProperty):
        namespace = onto
        inverse_property = memberOf

    # Cache by WordNet ID
    synset_to_class = {}

    def normalize_variants(lemma: str):
        """Generate altLabel variations for a lemma"""
        forms = set()
        # raw lemma
        forms.add(lemma)
        # replace underscores with space
        spaced = lemma.replace("_", " ")
        forms.add(spaced)
        # lowercase / capitalized / uppercase
        forms.add(spaced.lower())
        return forms

    def get_or_create_class(synset):
        wn_id = f"{synset.offset():08d}-{synset.pos()}"  # stable WordNet ID
        if wn_id in synset_to_class:
            return synset_to_class[wn_id]

        cls = types.new_class(f"C_{wn_id}", (Thing,))

        # Labels and synonyms
        lemmas = list(dict.fromkeys(synset.lemma_names()))
        if lemmas:
            # main label as a string
            main_label = lemmas[0].replace("_", " ")
            cls.label = main_label

            # All lemmas (including first) → altLabel variations
            alt_labels = set()
            for lemma in lemmas:
                alt_labels |= normalize_variants(lemma)

            # Remove the main label (so it doesn't appear both as rdfs:label and skos:altLabel)
            alt_labels.discard(main_label)

            for variant in sorted(alt_labels):
                cls.altLabel.append(variant)

        # Definition
        if synset.definition():
            cls.definition.append(synset.definition())

        # Examples
        for ex in synset.examples():
            cls.example.append(ex)

        # Notation (WordNet synset name, e.g. "dog.n.01")
        cls.notation.append(synset.name())

        # Identifier (WordNet offset-pos)
        cls.identifier.append(wn_id)

        synset_to_class[wn_id] = cls
        return cls


    # Build ontology
    for synset in list(wn.all_synsets()):
        cls = get_or_create_class(synset)
        for hypernym in synset.hypernyms():
            parent_cls = get_or_create_class(hypernym)
            if parent_cls not in cls.is_a:
                cls.is_a.append(parent_cls)

# --- Add part-of relations ---
def add_partof():
    for synset in wn.all_synsets():
        wn_id = f"{synset.offset():08d}-{synset.pos()}"
        cls = synset_to_class[wn_id]

        # Part meronyms → cls_part partOf cls
        for m in synset.part_meronyms():
            syn_id = f"{m.offset():08d}-{m.pos()}"
            if syn_id in synset_to_class:
                partOf[synset_to_class[syn_id]].append(cls)
                hasPart[cls].append(synset_to_class[syn_id])

        # Substance meronyms
        for m in synset.substance_meronyms():
            syn_id = f"{m.offset():08d}-{m.pos()}"
            if syn_id in synset_to_class:
                substanceOf[synset_to_class[syn_id]].append(cls)
                hasSubstance[cls].append(synset_to_class[syn_id])

        # Part holonyms → cls hasPart cls_whole
        for h in synset.part_holonyms():
            syn_id = f"{h.offset():08d}-{h.pos()}"
            if syn_id in synset_to_class:
                hasPart[cls].append(synset_to_class[syn_id])
                partOf[synset_to_class[syn_id]].append(cls)

        # Substance holonyms
        for h in synset.substance_holonyms():
            syn_id = f"{h.offset():08d}-{h.pos()}"
            if syn_id in synset_to_class:
                hasSubstance[cls].append(synset_to_class[syn_id])
                substanceOf[synset_to_class[syn_id]].append(cls)

# --- Add membership relations ---
def add_membership():
    for synset in wn.all_synsets():
        wn_id = f"{synset.offset():08d}-{synset.pos()}"
        cls = synset_to_class[wn_id]

        # Member meronyms
        for m in synset.member_meronyms():
            syn_id = f"{m.offset():08d}-{m.pos()}"
            if syn_id in synset_to_class:
                memberOf[synset_to_class[syn_id]].append(cls)

        # Member holonyms
        for h in synset.member_holonyms():
            syn_id = f"{h.offset():08d}-{h.pos()}"
            if syn_id in synset_to_class:
                hasMember[cls].append(synset_to_class[syn_id])

add_partof()
add_membership()


# --- Cleanup redundant Thing axioms ---
for cls in list(onto.classes()):
    if Thing in cls.is_a and len(cls.is_a) > 1:
        cls.is_a.remove(Thing)

onto.save(file="wn_full.owl", format="rdfxml")
print("Ontology saved to wn_full.owl")


Ontology saved to wn_full.owl
