In [3]:
from SPARQLWrapper import SPARQLWrapper, JSON
from sentence_transformers import SentenceTransformer, util

# Load a pre-trained Sentence Transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')  # Lightweight and fast

# Define your SPARQL endpoint and query
sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
headword = "Lund"
definition = "city in the province of Scania"

query = f"""
SELECT ?item ?itemLabel ?description WHERE {{
  ?item rdfs:label "{headword}"@en.
  ?item schema:description ?description.
  FILTER(LANG(?description) = "en").
  SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}
}}
LIMIT 10
"""
sparql.setQuery(query)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

# Extract results
items = []
for result in results["results"]["bindings"]:
    items.append({
        "uri": result["item"]["value"],
        "label": result["itemLabel"]["value"],
        "description": result["description"]["value"]
    })

# Encode definition and descriptions into semantic embeddings
definition_embedding = model.encode(definition, convert_to_tensor=True)
descriptions = [item["description"] for item in items]
description_embeddings = model.encode(descriptions, convert_to_tensor=True)

# Compute cosine similarity between definition and descriptions
cosine_scores = util.cos_sim(definition_embedding, description_embeddings).squeeze()

# Attach scores to items and sort by similarity
for i, score in enumerate(cosine_scores):
    items[i]["score"] = score.item()
items = sorted(items, key=lambda x: x["score"], reverse=True)

# Display results
for item in items:
    print(f"URI: {item['uri']}")
    print(f"Label: {item['label']}")
    print(f"Description: {item['description']}")
    print(f"Score: {item['score']:.4f}")
    print()


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

URI: http://www.wikidata.org/entity/Q2167
Label: Lund
Description: city in the southern Swedish province of Scania
Score: 0.7734

URI: http://www.wikidata.org/entity/Q2636551
Label: Lund
Description: town in Nevada, USA
Score: 0.3837

URI: http://www.wikidata.org/entity/Q2110120
Label: Lund
Description: småort in Järfälla Municipality, Sweden
Score: 0.2177

URI: http://www.wikidata.org/entity/Q103952968
Label: Lund
Description: encyclopedia article
Score: 0.2170

URI: http://www.wikidata.org/entity/Q2275546
Label: Lund
Description: former småort in Skellefteå Municipality, Sweden
Score: 0.2144

URI: http://www.wikidata.org/entity/Q231412
Label: Lund
Description: Wikimedia disambiguation page
Score: 0.2103

URI: http://www.wikidata.org/entity/Q105590218
Label: Lund
Description: unisex given name
Score: 0.1499

URI: http://www.wikidata.org/entity/Q29599
Label: Lund
Description: family name
Score: 0.1485

URI: http://www.wikidata.org/entity/Q2599743
Label: Lund
Description: village and ci