In [2]:
!pip install faiss-cpu # Install FAISS library

Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl (30.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m40.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.10.0


In [2]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
import faiss
import numpy as np

# Step 1: Create a small knowledge base (document collection)
knowledge_base = [
    "The capital of France is Paris.",
    "The Great Wall of China is over 13,000 miles long.",
    "Python is a popular programming language for machine learning.",
    "The Eiffel Tower is located in Paris, France.",
    "The Mona Lisa is displayed in the Louvre Museum."
]

# Step 2: Set up FAISS (Retriever)
# Convert documents to embeddings (for simplicity, use numeric indices as embeddings here)
document_embeddings = np.array([i for i in range(len(knowledge_base))], dtype="float32").reshape(-1, 1)
faiss_index = faiss.IndexFlatL2(1)
faiss_index.add(document_embeddings)

# Step 3: Initialize the generator (Hugging Face model)
tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")
generator = pipeline("text2text-generation", model=model, tokenizer=tokenizer)

# Step 4: Define a RAG pipeline
def rag_pipeline(query):
    # Fake query embedding (for simplicity, map the query length to an embedding)
    query_embedding = np.array([[len(query.split())]], dtype="float32")

    # Retrieve top-k documents
    top_k = 2
    distances, indices = faiss_index.search(query_embedding, top_k)
    retrieved_docs = [knowledge_base[idx] for idx in indices[0]]

    # Combine retrieved documents as context
    context = " ".join(retrieved_docs)

    # Generate response using the context and query
    input_text = f"Context: {context} Question: {query}"
    response = generator(input_text, max_length=50, truncation=True)[0]['generated_text']

    return response

# Step 5: Test the RAG system
query = "Where is the Eiffel Tower located?"
response = rag_pipeline(query)
print(f"Query: {query}")
print(f"Response: {response}")


Device set to use cpu
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Query: Where is the Eiffel Tower located?
Response: The Mona Lisa is displayed in the Louvre Museum. The Eiffel Tower is located in Paris, France. Where is the Eiffer Tower located? Visit CNN.com/EifferTower for more information.


In [None]:
!pip install -U sentence-transformers faiss-cpu

In [1]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer

# Step 1: Create a small knowledge base (document collection)
knowledge_base = [
    "The capital of France is Paris.",
    "The Great Wall of China is over 13,000 miles long.",
    "Python is a popular programming language for machine learning.",
    "The Eiffel Tower is located in Paris, France.",
    "The Mona Lisa is displayed in the Louvre Museum."
]

# Step 2: Set up FAISS (Retriever) with Sentence-BERT embeddings
# Initialize Sentence-BERT model
model_name = 'all-mpnet-base-v2'  # Choose a suitable Sentence-BERT model
encoder = SentenceTransformer(model_name)

# Generate document embeddings
document_embeddings = encoder.encode(knowledge_base, convert_to_tensor=True)
document_embeddings = document_embeddings.cpu().numpy() # Convert to numpy array

# Create FAISS index
faiss_index = faiss.IndexFlatL2(document_embeddings.shape[1])
faiss_index.add(document_embeddings)

# Step 3: Initialize the generator (Hugging Face model)
tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")
generator = pipeline("text2text-generation", model=model, tokenizer=tokenizer)

# Step 4: Define a RAG pipeline
def rag_pipeline(query):
    # Generate query embedding using Sentence-BERT
    query_embedding = encoder.encode(query, convert_to_tensor=True)
    query_embedding = query_embedding.cpu().numpy() # Convert to numpy array

    # Retrieve top-k documents
    top_k = 2
    distances, indices = faiss_index.search(query_embedding.reshape(1, -1), top_k) # Reshape query embedding
    retrieved_docs = [knowledge_base[idx] for idx in indices[0]]

    # Combine retrieved documents as context
    context = " ".join(retrieved_docs)

    # Generate response using the context and query
    input_text = f"Context: {context} Question: {query}"
    response = generator(input_text, max_length=50, truncation=True)[0]['generated_text']

    return response

# Step 5: Test the RAG system
query = "Where is the Eiffel Tower located?"
response = rag_pipeline(query)
print(f"Query: {query}")
print(f"Response: {response}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Device set to use cpu
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Query: Where is the Eiffel Tower located?
Response: The Eiffel Tower is located in Paris, France. The capital of France is Paris. It is located on the banks of the River Seine, in the center of the city. It was built in 1903 and is one of
