In [6]:
import spacy
from SPARQLWrapper import SPARQLWrapper, JSON

# Load the English NLP model from spaCy
nlp = spacy.load("en_core_web_sm")

# Function to convert natural language to a SPARQL query for DBpedia
def convert_to_sparql_query(query_text):
    # Perform natural language processing on the query
    doc = nlp(query_text)

    # Extract entities and relationships from the query
    entities = [token.text for token in doc if token.ent_type_]
    relationships = [token.text for token in doc if token.dep_ == "ROOT"]

    # Build a SPARQL query template
    sparql_query_template = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX dbo: <http://dbpedia.org/ontology/>

        SELECT ?subject ?property ?object
        WHERE {{
            ?subject dbo:{entity} ?object.
            OPTIONAL {{ ?object ?property ?value. }}
        }}
        LIMIT 1
    """

    # Populate the template with the extracted entities
    sparql_query = sparql_query_template.format(entity=entities[0])

    return sparql_query

# Function to execute the SPARQL query and return the result
def execute_sparql_query(query):
    # Set up the SPARQL endpoint for DBpedia
    sparql = SPARQLWrapper("http://dbpedia.org/sparql")
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)

    # Execute the query
    results = sparql.query().convert()

    return results['results']['bindings']

# Example usage
query_text = "What is the capital of France?"
sparql_query = convert_to_sparql_query(query_text)
query_result = execute_sparql_query(sparql_query)

# Print the result
for result in query_result:
    subject = result['subject']['value'].split('/')[-1]
    property_name = result.get('property', {}).get('value', '').split('/')[-1]
    object_value = result['object']['value'].split('/')[-1]
    print(f"{subject} {property_name}: {object_value}")





In [9]:
import spacy
from SPARQLWrapper import SPARQLWrapper, JSON

# Load the English NLP model from spaCy
nlp = spacy.load("en_core_web_sm")

# Function to process a natural language query and return the results from DBpedia
def query_dbpedia(query_text):
    # Perform natural language processing on the query
    doc = nlp(query_text)

    # Extract entities from the query
    entities = [token.text for token in doc if token.ent_type_]
    print(entities)
    # Build and execute the SPARQL query
    sparql_query = f"""
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX dbo: <http://dbpedia.org/ontology/>

    SELECT ?property ?value
    WHERE {{
        dbo:{entities[0]} ?property ?value.
    }}
    """

    # Set up the SPARQL endpoint for DBpedia
    sparql = SPARQLWrapper("http://dbpedia.org/sparql")
    sparql.setQuery(sparql_query)
    sparql.setReturnFormat(JSON)

    # Execute the query
    results = sparql.query().convert()

    # Return the results
    return results['results']['bindings']

# Example usage
query_result = query_dbpedia("Tell me about Barack Obama.")
for result in query_result:
    property_name = result['property']['value'].split('/')[-1]
    value = result['value']['value']
    print(f"{property_name}: {value}")


['Barack', 'Obama']


https://medium.com/mlearning-ai/semantic-search-via-knowledge-graphs-cc9da99dab4d

In [6]:
import spacy

nlp = spacy.load('en_core_web_sm')

def extract_triplets(sentence):
    doc = nlp(sentence)  # Parse the sentence with spaCy
    triplets = []
    for noun_chunk in doc.noun_chunks:  # Iterate over the noun chunks in the sentence
        # Extract the text of the noun chunk and use it as the subject of the triplet
        subject = noun_chunk.text
        # Find the verb that governs the noun chunk and use it as the predicate of the triplet
        for token in noun_chunk.root.children:
            if token.dep_ == "ROOT":
                predicate = token.text
                print(predicate)
        # Find the direct object of the verb and use it as the object of the triplet
        for token in noun_chunk.root.children:
            if token.dep_ == "dobj":
                object = token.text
                print(object)
        triplets.append((subject, predicate, object))
    return triplets

# Define a sentence to parse
sentence = "Music bands based in Jönköping"
doc = nlp(sentence)

for token in noun_chunk.root.children:
    print(token.text)
# for tok in doc:
#   print(tok.text, "...", tok.dep_)
# Extract the triplets from the sentence
# triplets = extract_triplets(sentence)

# # Print the triplets
# for triplet in triplets:
#     print(triplet)

OpenAI api

In [1]:
import openai

# Set your OpenAI API key
openai.api_key = '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'

def generate_sparql_query(prompt):
    # Make a request to the ChatGPT API for generating SPARQL queries
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=prompt,
        max_tokens=150,
        temperature=0.7,
        stop=None
    )

    # Extract the generated SPARQL query from the API response
    generated_query = response['choices'][0]['text'].strip()

    return generated_query

# Example usage
natural_language_input = "List books written by George Orwell."
sparql_query = generate_sparql_query(natural_language_input)

print("Natural Language Input:", natural_language_input)
print("Generated SPARQL Query:", sparql_query)

RateLimitError: You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.

Transformers pretrained models

In [2]:
from transformers import pipeline
from SPARQLWrapper import SPARQLWrapper, JSON

# Set up the question-answering pipeline using a pre-trained model
nlp_qa = pipeline("question-answering", model='distilbert-base-cased-distilled-squad')

def convert_question_to_query(question):
    try:
        # Use the question-answering model to get an answer
        answer = nlp_qa(question=question, context="DBpedia is a large multi-domain ontology...")

        # Extract the answer and construct a simple SPARQL query
        if answer['answer']:
            entity_name = answer['answer']
            query_string = f"""
                SELECT ?property ?value
                WHERE {{
                    dbpedia:{entity_name} ?property ?value.
                }}
            """

            return query_string
        else:
            print("Unable to extract entity from the question.")
            return None

    except Exception as e:
        print(f"Error: {e}")
        return None

def execute_query(query):
    try:
        # Set the DBpedia endpoint URL
        sparql = SPARQLWrapper("http://dbpedia.org/sparql")

        # Set the query string
        sparql.setQuery(query)

        # Set the return format to JSON
        sparql.setReturnFormat(JSON)

        # Execute the query and parse the results
        results = sparql.query().convert()

        return results

    except Exception as e:
        print(f"Error executing SPARQL query: {e}")
        return None

# Example usage
question = "What is the capital of Germany?"
query = convert_question_to_query(question)

if query:
    results = execute_query(query)
    if results:
        # Process and print the query results
        for result in results["results"]["bindings"]:
            for var in result:
                print(f"{var}: {result[var]['value']}")
    else:
        print("No results.")


All PyTorch model weights were used when initializing TFDistilBertForQuestionAnswering.

All the weights of TFDistilBertForQuestionAnswering were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertForQuestionAnswering for predictions without further training.


Error executing SPARQL query: QueryBadFormed: A bad request has been sent to the endpoint: probably the SPARQL query is badly formed. 

Response:
b"Virtuoso 37000 Error SP030: SPARQL compiler, line 5: Undefined namespace prefix in prefix:localpart notation at 'dbpedia:DBpedia' before '?property'\n\nSPARQL query:\n#output-format:application/sparql-results+json\n\n                SELECT ?property ?value\n                WHERE {\n                    dbpedia:DBpedia ?property ?value.\n                }\n            \n"
No results.


In [5]:
from transformers import T5ForConditionalGeneration, T5Tokenizer

def generate_sparql(query, model, tokenizer):
    input_text = f"translate English to SPARQL: {query}"
    input_ids = tokenizer.encode(input_text, return_tensors="pt")

    output = model.generate(input_ids)
    sparql_query = tokenizer.decode(output[0], skip_special_tokens=True)

    return sparql_query

# Load pre-trained T5 model and tokenizer
model_name = "t5-base"
model = T5ForConditionalGeneration.from_pretrained(model_name)
tokenizer = T5Tokenizer.from_pretrained(model_name)

# Example natural language query
natural_language_query = "List all the songs of Beatles."

# Generate SPARQL query
sparql_query = generate_sparql(natural_language_query, model, tokenizer)

# Print the result
print("Natural Language Query:", natural_language_query)
print("SPARQL Query:", sparql_query)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Natural Language Query: List all the songs of Beatles.
SPARQL Query: SPARQL: Liste alle Lieder von Beatles.
