In [1]:
from SPARQLWrapper import SPARQLWrapper, JSON
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from tqdm.autonotebook import tqdm, trange

  from tqdm.autonotebook import tqdm, trange


In [2]:
import faiss
import numpy as np

In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer

In [4]:
sparql = SPARQLWrapper("http://localhost:7200/repositories/amd_repo")

In [5]:
sparql.setQuery("""
    SELECT ?subject ?predicate ?object
    WHERE {
        ?subject ?predicate ?object
    }
    LIMIT 1000
""")

In [6]:
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

In [7]:
kg_triples = []
for result in results["results"]["bindings"]:
    subject = result["subject"]["value"]
    predicate = result["predicate"]["value"]
    object = result["object"]["value"]
    kg_triples.append(f"{subject} {predicate} {object}")

In [8]:
embeddings = HuggingFaceEmbeddings()

In [9]:
kg_embeddings = []
for kg_triple in kg_triples:
    emb = embeddings.embed_query(kg_triple)
    kg_embeddings.append(emb)

In [10]:
kg_embeddings = np.array(kg_embeddings, dtype=np.float32)

In [11]:
d = kg_embeddings.shape[1]  # Dimension of embeddings
index = faiss.IndexFlatL2(d)
index.add(kg_embeddings)  # Add embeddings to the index

# Save the index and mappings
faiss.write_index(index, 'kg_index.faiss')
np.save('kg_triples.npy', kg_triples)

In [13]:
index = faiss.read_index('kg_index.faiss')
kg_triples = np.load('kg_triples.npy', allow_pickle=True)

In [14]:
def search_kg(query, top_k=5):
    query_embedding = embeddings.embed_query(query)
    query_embedding = np.array(query_embedding, dtype=np.float32)
    if len(query_embedding.shape) == 1:
        query_embedding = np.expand_dims(query_embedding, axis=0)
    
    distances, indices = index.search(query_embedding, top_k)
    return [kg_triples[i] for i in indices[0]]

In [15]:
query = "How is AMD related to blindness?"
relevant_kg_elements = search_kg(query)

In [16]:
relevant_kg_elements

['http://www.semanticweb.org/lecualexandru/ontologies/2024/1/untitled-ontology-6#Age_-_related_macular_degeneration_(_AMD_) http://www.semanticweb.org/lecualexandru/ontologies/2024/1/untitled-ontology-6#cause http://www.semanticweb.org/lecualexandru/ontologies/2024/1/untitled-ontology-6#irreversible_severe_central_visual_loss',
 'http://www.semanticweb.org/lecualexandru/ontologies/2024/1/untitled-ontology-6#Age_-_related_macular_degeneration_(_AMD_) http://www.semanticweb.org/lecualexandru/ontologies/2024/1/untitled-ontology-6#cause http://www.semanticweb.org/lecualexandru/ontologies/2024/1/untitled-ontology-6#permanent_loss_of_central_vision',
 'http://www.semanticweb.org/lecualexandru/ontologies/2024/1/untitled-ontology-6#Age_-_related_macular_degeneration_(_AMD_) http://www.semanticweb.org/lecualexandru/ontologies/2024/1/untitled-ontology-6#cause http://www.semanticweb.org/lecualexandru/ontologies/2024/1/untitled-ontology-6#severe_vision_loss',
 'http://www.semanticweb.org/lecualexa

In [17]:
# Construct prompt
prompt = f"Query: {query}\nRelevant Information: {', '.join(relevant_kg_elements)}\nAnswer:"

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

falcon_tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-7b-instruct")
falcon_model = AutoModelForCausalLM.from_pretrained("tiiuae/falcon-7b-instruct")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]