In [2]:
import os
import networkx as nx
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [3]:
# Load HuggingFace LLM (e.g., a T5 model)
MODEL_NAME = "t5-small"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)

# Load SentenceTransformer for embedding generation
EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"
embedder = SentenceTransformer(EMBEDDING_MODEL_NAME)

# Example knowledge base (a set of documents or facts)
knowledge_base = {
    "doc1": "GraphRAG is a library for creating graph-based retrieval systems.",
    "doc2": "HuggingFace Transformers provides state-of-the-art NLP models.",
    "doc3": "Retrieval-Augmented Generation improves the accuracy of LLMs by using external knowledge sources."
}

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [4]:
# Precompute embeddings for the knowledge base
kb_embeddings = {doc_id: embedder.encode(content) for doc_id, content in knowledge_base.items()}

# Build a simple graph using NetworkX
graph = nx.Graph()

# Add nodes for each document
for doc_id, content in knowledge_base.items():
    graph.add_node(doc_id, data=content)

# Add edges based on similarity (here we connect every node to every other node)
node_ids = list(knowledge_base.keys())
for i, id1 in enumerate(node_ids):
    for j, id2 in enumerate(node_ids):
        if i != j:
            similarity = cosine_similarity(
                [kb_embeddings[id1]], [kb_embeddings[id2]]
            )[0][0]
            graph.add_edge(id1, id2, weight=similarity)

## 1. Version 1

In [22]:
# Function to retrieve relevant documents
def retrieve_relevant_documents(query, top_k=2):
    query_embedding = embedder.encode(query)
    similarities = {
        doc_id: cosine_similarity([query_embedding], [embedding])[0][0]
        for doc_id, embedding in kb_embeddings.items()
    }
    sorted_docs = sorted(similarities.items(), key=lambda x: x[1], reverse=True)
    return [knowledge_base[doc_id] for doc_id, _ in sorted_docs[:top_k]]

# Function to generate an answer using the LLM
def generate_answer(query, context):
    input_text = f"question: {query} context: {context}"
    inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)
    outputs = model.generate(**inputs, max_length=150)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [8]:
# Example Q&A interaction
def main():
    query = "What is RAG?"

    # Retrieve relevant documents
    relevant_docs = retrieve_relevant_documents(query)
    context = " ".join(relevant_docs)

    # Generate an answer
    answer = generate_answer(query, context)
    print(f"Question: {query}")
    print(f"Answer: {answer}")

if __name__ == "__main__":
    main()

Question: What is RAG?
Answer: a library for creating graph-based retrieval systems


In [23]:
query = "What is RAG?"

# Retrieve relevant documents
relevant_docs = retrieve_relevant_documents(query)
context = " ".join(relevant_docs)

# Generate an answer
print(context)
answer = generate_answer(query, context)

HuggingFace Transformers provides state-of-the-art NLP models. GraphRAG is a library for creating graph-based retrieval systems.


In [12]:
query_embedding = embedder.encode(query)
similarities = {
    doc_id: cosine_similarity([query_embedding], [embedding])[0][0]
    for doc_id, embedding in kb_embeddings.items()
}
sorted_docs = sorted(similarities.items(), key=lambda x: x[1], reverse=True)

In [15]:
kb_embeddings.keys()

dict_keys(['doc1', 'doc2', 'doc3'])

## 2. Version 2

In [26]:
# Function to retrieve relevant documents using the graph
def retrieve_relevant_documents(query, top_k=2):
    query_embedding = embedder.encode(query)
    # Compute similarity of query to all nodes
    similarities = {
        doc_id: cosine_similarity([query_embedding], [kb_embeddings[doc_id]])[0][0]
        for doc_id in kb_embeddings
    }

    # Sort nodes by similarity
    sorted_docs = sorted(similarities.items(), key=lambda x: x[1], reverse=True)

    # Start with the top document and expand using the graph
    top_doc = sorted_docs[0][0]
    neighbors = sorted(
        graph[top_doc].items(), key=lambda x: x[1]['weight'], reverse=True
    )

    # Collect top_k documents including neighbors
    retrieved_docs = [top_doc] + [neighbor[0] for neighbor in neighbors[:top_k - 1]]

    return [knowledge_base[doc_id] for doc_id in retrieved_docs]

# Function to generate an answer using the LLM
def generate_answer(query, context):
    input_text = f"question: {query} context: {context}"
    inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)
    outputs = model.generate(**inputs, max_length=150)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [27]:
# Example Q&A interaction
def main():
    query = "What is RAG?"

    # Retrieve relevant documents
    relevant_docs = retrieve_relevant_documents(query)
    context = " ".join(relevant_docs)

    # Generate an answer
    answer = generate_answer(query, context)
    print(f"Question: {query}")
    print(f"Answer: {answer}")

if __name__ == "__main__":
    main()

Question: What is RAG?
Answer: NLP models


In [28]:
query = "What is RAG?"

# Retrieve relevant documents
relevant_docs = retrieve_relevant_documents(query)
context = " ".join(relevant_docs)

# Generate an answer
print(context)
answer = generate_answer(query, context)
print(f"Question: {query}")
print(f"Answer: {answer}")

HuggingFace Transformers provides state-of-the-art NLP models. Retrieval-Augmented Generation improves the accuracy of LLMs by using external knowledge sources.
Question: What is RAG?
Answer: NLP models
