In [1]:
from gensim.models import KeyedVectors
from SPARQLWrapper import SPARQLWrapper, JSON

sparql = SPARQLWrapper("http://data.odeuropa.eu/repositories/odeuropa")
sparql.setReturnFormat(JSON)

In [2]:
def label(uri):
    q = '''
        PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
        PREFIX onto: <http://www.ontotext.com/>
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

        SELECT ?label
        FROM onto:explicit
        WHERE {
            <%s> skos:prefLabel|rdfs:label ?label
        }
    ''' % uri
    sparql.setQuery(q)
    ret = sparql.queryAndConvert()
    data = [l['label'] for l in ret['results']['bindings']]
    if len(data) < 1:
        return "smell"
    data.sort(key=lambda l: ('aaa' if l['xml:lang']== 'en' else l['xml:lang']) if 'xml:lang' in l else 0)
    return data[0]['value']

In [3]:
voc_emb_path = './embeddings/voc/embeddings.txt'
voc_emb = KeyedVectors.load_word2vec_format(voc_emb_path, binary=False, no_header=True)

In [4]:
res = voc_emb.most_similar('http://data.odeuropa.eu/vocabulary/olfactory-objects/269', topn=10) # incense
['%.4f   %s   %s' % (r[1], r[0], label(r[0])) for r in res]

['0.9865   http://data.odeuropa.eu/vocabulary/olfactory-objects/399   Reukwerk',
 '0.9706   http://data.odeuropa.eu/vocabulary/olfactory-objects/172   Patchouli',
 '0.9704   http://data.odeuropa.eu/vocabulary/olfactory-objects/25   Candle',
 '0.9695   http://data.odeuropa.eu/vocabulary/olfactory-objects/48   Geosmin',
 '0.9690   http://data.odeuropa.eu/vocabulary/olfactory-objects/245   Wet earth',
 '0.9678   http://data.odeuropa.eu/vocabulary/olfactory-objects/533   Body',
 '0.9668   http://vocab.getty.edu/aat/300011851   parchment (animal material)',
 '0.9660   http://data.odeuropa.eu/vocabulary/olfactory-objects/15   Beer',
 '0.9648   http://data.odeuropa.eu/vocabulary/olfactory-gestures/26   Garbage collecting',
 '0.9646   http://data.odeuropa.eu/vocabulary/olfactory-objects/166   Vomit']

In [5]:
smell_emb_path = './embeddings/smells/embeddings.txt'
smell_emb = KeyedVectors.load_word2vec_format(smell_emb_path, binary=False, no_header=True)

In [6]:
res = smell_emb.most_similar('http://data.odeuropa.eu/smell/b6cdd9fe-a1a1-5aa3-bf4c-162a5c2d1ead', topn=10)
['%.4f   %s   %s' % (r[1], r[0], label(r[0])) for r in res]

['0.4490   http://data.odeuropa.eu/smell/3943e0da-86c5-5a89-9d5e-3dcaf2b2cc35   odours',
 '0.4081   http://data.odeuropa.eu/smell/e6bf7e6d-3ac7-5b1f-a662-e45813eaa2cd   smell',
 '0.4077   http://data.odeuropa.eu/smell/e40db11a-1250-5d92-bbf0-34359070b89b   stoffel',
 '0.4021   http://data.odeuropa.eu/smell/d8b49a00-d3be-5dd1-bd20-8b564b929ce0   smell',
 '0.4000   http://data.odeuropa.eu/smell/d83c1772-b006-5931-ba2e-937dc1ecb4c5   stinke',
 '0.3855   http://data.odeuropa.eu/smell/14e51303-d75e-5b38-b5b9-394a1d2f0e60   scent',
 '0.3808   http://data.odeuropa.eu/smell/c8d43d04-56ac-5949-bdcd-e0c1be33e5a9   fragrance',
 '0.3803   http://data.odeuropa.eu/smell/5e037566-5c74-5829-85a2-92cab03682f8   odour',
 '0.3794   http://data.odeuropa.eu/smell/1a1bc6df-6938-51d8-a59f-2cf1f0685dfa   Embassadour',
 '0.3785   http://data.odeuropa.eu/smell/061a7641-cfd8-5bf6-8187-4870f88301e4   odour']