In [2]:
import spacy
from SPARQLWrapper import SPARQLWrapper, JSON

# Load the English NLP model from spaCy
nlp = spacy.load("en_core_web_sm")

# Function to convert natural language to a SPARQL query for DBpedia
def convert_to_sparql_query(query_text):
    # Perform natural language processing on the query
    doc = nlp(query_text)

    # Extract entities and relationships from the query
    entities = [token.text for token in doc if token.ent_type_]
    relationships = [token.text for token in doc if token.dep_ == "ROOT"]

    # Build a SPARQL query template
    sparql_query_template = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX dbo: <http://dbpedia.org/ontology/>

        SELECT ?subject ?property ?object
        WHERE {{
            ?subject dbo:{entity} ?object.
            OPTIONAL {{ ?object ?property ?value. }}
        }}
        LIMIT 1
    """

    # Populate the template with the extracted entities
    sparql_query = sparql_query_template.format(entity=entities[0])

    return sparql_query

# Function to execute the SPARQL query and return the result
def execute_sparql_query(query):
    # Set up the SPARQL endpoint for DBpedia
    sparql = SPARQLWrapper("http://dbpedia.org/sparql")
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)

    # Execute the query
    results = sparql.query().convert()

    return results['results']['bindings']

# Example usage
query_text = "What is the capital of France?"
sparql_query = convert_to_sparql_query(query_text)
query_result = execute_sparql_query(sparql_query)

# Print the result
for result in query_result:
    subject = result['subject']['value'].split('/')[-1]
    property_name = result.get('property', {}).get('value', '').split('/')[-1]
    object_value = result['object']['value'].split('/')[-1]
    print(f"{subject} {property_name}: {object_value}")


In [9]:
import spacy
from SPARQLWrapper import SPARQLWrapper, JSON

# Load the English NLP model from spaCy
nlp = spacy.load("en_core_web_sm")

# Function to process a natural language query and return the results from DBpedia
def query_dbpedia(query_text):
    # Perform natural language processing on the query
    doc = nlp(query_text)

    # Extract entities from the query
    entities = [token.text for token in doc if token.ent_type_]
    print(entities)
    # Build and execute the SPARQL query
    sparql_query = f"""
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX dbo: <http://dbpedia.org/ontology/>

    SELECT ?property ?value
    WHERE {{
        dbo:{entities[0]} ?property ?value.
    }}
    """

    # Set up the SPARQL endpoint for DBpedia
    sparql = SPARQLWrapper("http://dbpedia.org/sparql")
    sparql.setQuery(sparql_query)
    sparql.setReturnFormat(JSON)

    # Execute the query
    results = sparql.query().convert()

    # Return the results
    return results['results']['bindings']

# Example usage
query_result = query_dbpedia("Tell me about Barack Obama.")
for result in query_result:
    property_name = result['property']['value'].split('/')[-1]
    value = result['value']['value']
    print(f"{property_name}: {value}")


['Barack', 'Obama']


https://medium.com/mlearning-ai/semantic-search-via-knowledge-graphs-cc9da99dab4d

In [6]:
import spacy

nlp = spacy.load('en_core_web_sm')

def extract_triplets(sentence):
    doc = nlp(sentence)  # Parse the sentence with spaCy
    triplets = []
    for noun_chunk in doc.noun_chunks:  # Iterate over the noun chunks in the sentence
        # Extract the text of the noun chunk and use it as the subject of the triplet
        subject = noun_chunk.text
        # Find the verb that governs the noun chunk and use it as the predicate of the triplet
        for token in noun_chunk.root.children:
            if token.dep_ == "ROOT":
                predicate = token.text
                print(predicate)
        # Find the direct object of the verb and use it as the object of the triplet
        for token in noun_chunk.root.children:
            if token.dep_ == "dobj":
                object = token.text
                print(object)
        triplets.append((subject, predicate, object))
    return triplets

# Define a sentence to parse
sentence = "Music bands based in Jönköping"
doc = nlp(sentence)

for token in noun_chunk.root.children:
    print(token.text)
# for tok in doc:
#   print(tok.text, "...", tok.dep_)
# Extract the triplets from the sentence
# triplets = extract_triplets(sentence)

# # Print the triplets
# for triplet in triplets:
#     print(triplet)