In [1]:
from SPARQLWrapper import SPARQLWrapper, JSON
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from tqdm.autonotebook import tqdm, trange

  from tqdm.autonotebook import tqdm, trange


In [2]:
import faiss
import numpy as np

In [3]:
from transformers import AutoModelForCausalLM, AutoTokenizer

In [4]:
sparql = SPARQLWrapper("http://localhost:7200/repositories/amd_repo")

In [5]:
sparql.setQuery("""
    SELECT ?subject ?predicate ?object
    WHERE {
        ?subject ?predicate ?object
    }
    LIMIT 1000
""")

In [6]:
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

In [7]:
kg_triples = []
for result in results["results"]["bindings"]:
    subject = result["subject"]["value"]
    predicate = result["predicate"]["value"]
    object = result["object"]["value"]
    kg_triples.append(f"{subject} {predicate} {object}")

In [8]:
embeddings = HuggingFaceEmbeddings()

In [9]:
kg_embeddings = []
for kg_triple in kg_triples:
    emb = embeddings.embed_query(kg_triple)
    kg_embeddings.append(emb)

In [10]:
kg_embeddings = np.array(kg_embeddings, dtype=np.float32)

In [11]:
d = kg_embeddings.shape[1]  # Dimension of embeddings
index = faiss.IndexFlatL2(d)
index.add(kg_embeddings)  # Add embeddings to the index

# Save the index and mappings
faiss.write_index(index, 'kg_index.faiss')
np.save('kg_triples.npy', kg_triples)

In [12]:
index = faiss.read_index('kg_index.faiss')
kg_triples = np.load('kg_triples.npy', allow_pickle=True)

In [13]:
def search_kg(query, top_k=5):
    query_embedding = embeddings.embed_query(query)
    query_embedding = np.array(query_embedding, dtype=np.float32)
    if len(query_embedding.shape) == 1:
        query_embedding = np.expand_dims(query_embedding, axis=0)
    
    distances, indices = index.search(query_embedding, top_k)
    return [kg_triples[i] for i in indices[0]]

In [14]:
query = "How is AMD related to blindness?"
relevant_kg_elements = search_kg(query)

In [15]:
relevant_kg_elements

['http://www.semanticweb.org/lecualexandru/ontologies/2024/1/untitled-ontology-6#Age_-_related_macular_degeneration_(_AMD_) http://www.semanticweb.org/lecualexandru/ontologies/2024/1/untitled-ontology-6#cause http://www.semanticweb.org/lecualexandru/ontologies/2024/1/untitled-ontology-6#irreversible_severe_central_visual_loss',
 'http://www.semanticweb.org/lecualexandru/ontologies/2024/1/untitled-ontology-6#Age_-_related_macular_degeneration_(_AMD_) http://www.semanticweb.org/lecualexandru/ontologies/2024/1/untitled-ontology-6#cause http://www.semanticweb.org/lecualexandru/ontologies/2024/1/untitled-ontology-6#permanent_loss_of_central_vision',
 'http://www.semanticweb.org/lecualexandru/ontologies/2024/1/untitled-ontology-6#Age_-_related_macular_degeneration_(_AMD_) http://www.semanticweb.org/lecualexandru/ontologies/2024/1/untitled-ontology-6#cause http://www.semanticweb.org/lecualexandru/ontologies/2024/1/untitled-ontology-6#severe_vision_loss',
 'http://www.semanticweb.org/lecualexa

In [16]:
# Construct prompt
prompt = f"Query: {query}\nRelevant Information: {', '.join(relevant_kg_elements)}\nAnswer:"

In [17]:
from transformers import AutoTokenizer, AutoModelForCausalLM

falcon_tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-7b-instruct")
falcon_model = AutoModelForCausalLM.from_pretrained("tiiuae/falcon-7b-instruct", device_map="auto")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some parameters are on the meta device device because they were offloaded to the cpu.


In [18]:
def generate_response(prompt, max_length=1000):
    inputs = falcon_tokenizer(prompt, return_tensors="pt")
    outputs = falcon_model.generate(inputs.input_ids, max_length=max_length, num_return_sequences=1)
    return falcon_tokenizer.decode(outputs[0], skip_special_tokens=True)

In [19]:
# Example query
query = "How is AMD related to blindness?"
relevant_kg_elements = search_kg(query)

In [20]:
response = generate_response(prompt)
print(response)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token.As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Query: How is AMD related to blindness?
Relevant Information: http://www.semanticweb.org/lecualexandru/ontologies/2024/1/untitled-ontology-6#Age_-_related_macular_degeneration_(_AMD_) http://www.semanticweb.org/lecualexandru/ontologies/2024/1/untitled-ontology-6#cause http://www.semanticweb.org/lecualexandru/ontologies/2024/1/untitled-ontology-6#irreversible_severe_central_visual_loss, http://www.semanticweb.org/lecualexandru/ontologies/2024/1/untitled-ontology-6#Age_-_related_macular_degeneration_(_AMD_) http://www.semanticweb.org/lecualexandru/ontologies/2024/1/untitled-ontology-6#cause http://www.semanticweb.org/lecualexandru/ontologies/2024/1/untitled-ontology-6#permanent_loss_of_central_vision, http://www.semanticweb.org/lecualexandru/ontologies/2024/1/untitled-ontology-6#Age_-_related_macular_degeneration_(_AMD_) http://www.semanticweb.org/lecualexandru/ontologies/2024/1/untitled-ontology-6#cause http://www.semanticweb.org/lecualexandru/ontologies/2024/1/untitled-ontology-6#sever