# Loading the animal ontology into python

Our goal here is to:
- load the ontology from the owl file
- extract a list of animals and their features
- create a mapping from features to numbers
- map each species name to a feature vector
- create a function that takes a feature vector and returns a species name

In [56]:
import owlready2
import os
import itertools
import numpy

owlready2.onto_path.append(os.path.abspath("."))

animals_onto = owlready2.get_ontology("https://raw.githubusercontent.com/Flo3171/DS51_Project/master/code/animal_ontologie.rdf").load()

In [67]:
animals = animals_onto.search(subclass_of = animals_onto["Animal"])
properties = animals_onto.search(subclass_of = animals_onto["a_pour_caractéristique"])

animal_features = dict()

for animal in animals:
    if animal == animals_onto["Animal"]:
        continue
    
    animal_features[animal.name] = set()
    # Prop is for instance `a_sur_la_peau`
    for prop in animal.get_class_properties():
        # Get the restriction defined for the class, for instance `Fourrure`
        prop_range_restriction = prop[animal]
        assert len(prop_range_restriction) == 1
        feature = prop_range_restriction[0].name
        # store it
        animal_features[animal.name].add(feature)

# Get the list of all features found
all_features = set(itertools.chain.from_iterable(animal_features.values()))
print(all_features)

# Remove animals with no features (optional)
# animal_features = dict(filter(lambda animal: len(animal[1]) > 0, animal_features.items()))
animal_features

{'Terrestre', 'Aérien', 'Poiles', 'Omnivore', 'Carnivore', 'Ecailles', 'Plumes', 'Marin'}


{'Aigle': {'Aérien', 'Carnivore', 'Plumes'},
 'Loup': {'Carnivore', 'Poiles', 'Terrestre'},
 'Ours': {'Omnivore', 'Poiles', 'Terrestre'},
 'Passerin': {'Aérien', 'Omnivore', 'Plumes'},
 'Poisson_Rouge': {'Ecailles', 'Marin', 'Omnivore'},
 'Requin': {'Carnivore', 'Ecailles', 'Marin'}}

In [68]:
# Assign numbers to each caracteristic
feature_indices = dict(enumerate(all_caracteristics))

# Generate the class name -> feature vector dict
def get_vector(features: set[str]) -> list[int]:
    res = numpy.zeros(len(all_caracteristics))
    for index, name in feature_indices.items():
        if name in features:
            res[index] = 1
    
    return res

feature_dict = {name: get_vector(features) for name, features in animal_caracteristics.items()}
feature_dict

{'Aigle': array([0., 1., 0., 0., 1., 0., 1., 0.]),
 'Loup': array([1., 0., 1., 0., 1., 0., 0., 0.]),
 'Ours': array([1., 0., 1., 1., 0., 0., 0., 0.]),
 'Passerin': array([0., 1., 0., 1., 0., 0., 1., 0.]),
 'Poisson_Rouge': array([0., 0., 0., 1., 0., 1., 0., 1.]),
 'Requin': array([0., 0., 0., 0., 1., 1., 0., 1.])}

In [96]:
# Method 1: round values and pick the animal with matching characteristics.
# This method has O(n*c) time complexity, but can be implemented in O(max(c, log(n))) time
# with careful sorting of the features and with a binary tree.
def get_animal_from_features(input_features: list[float]) -> str | None:
    rounded = numpy.round(input_features)
    for animal, features in feature_dict.items():
        if numpy.array_equal(features, rounded):
            return animal
    
    return None

get_animal_from_features(feature_dict["Aigle"])

'Aigle'

In [97]:
import scipy

# Method 2: find the value with the least euclidean distance
# This method has O(n*c) time complexity, but can be optimized down to O(log(n) * c)
# by applying pruning to a tree traversal algorithm.
names_list = list(feature_dict.keys())
features_list = numpy.array(list(feature_dict.values()))

def get_animal_from_features2(input_features: list[float]) -> str | None:
    dists = scipy.spatial.distance.cdist(features_list, [input_features], metric="euclidean")
    
    return names_list[numpy.argmin(dists)]

get_animal_from_features2(feature_dict["Aigle"])

'Aigle'

## Putting it all together

In [2]:
import owlready2
import itertools
import numpy
import scipy
import typing

# Returns the mapping dictionary from species name to feature vector,
# a function that returns the closest species given a feature vector,
# and the names of each feature.
def load_features_mapping(
    iri: str = "https://raw.githubusercontent.com/Flo3171/DS51_Project/master/code/animal_ontologie.rdf"
) -> typing.Tuple[
    dict[str, numpy.array],
    typing.Callable[[numpy.array], str | None],
    dict[int, str]
]:
    animals_onto = owlready2.get_ontology(iri)

    animals = animals_onto.search(subclass_of = animals_onto["Animal"])
    properties = animals_onto.search(subclass_of = animals_onto["a_pour_caractéristique"])

    animal_features = dict()

    for animal in animals:
        if animal == animals_onto["Animal"]:
            continue

        animal_features[animal.name] = set()
        # Prop is for instance `a_sur_la_peau`
        for prop in animal.get_class_properties():
            # Get the restriction defined for the class, for instance `Fourrure`
            prop_range_restriction = prop[animal]
            assert len(prop_range_restriction) == 1
            feature = prop_range_restriction[0].name
            # store it
            animal_features[animal.name].add(feature)

    # Get the list of all features found
    all_features = set(itertools.chain.from_iterable(animal_features.values()))

    # Assign numbers to each caracteristic
    caracteristic_indices = dict(enumerate(all_caracteristics))

    # Generate the class name -> feature vector dict
    def get_vector(features: set[str]) -> list[int]:
        res = numpy.zeros(len(all_caracteristics))
        for index, name in caracteristic_indices.items():
            if name in features:
                res[index] = 1

        return res

    feature_dict = {name: get_vector(features) for name, features in animal_caracteristics.items()}

    names_list = list(feature_dict.keys())
    features_list = numpy.array(list(feature_dict.values()))

    def get_animal_from_features2(input_features: list[float]) -> str | None:
        dists = scipy.spatial.distance.cdist(features_list, [input_features], metric="euclidean")

        return names_list[numpy.argmin(dists)]

    return (feature_dict, get_animal_from_features2, feature_indices)