# Quickstart with RAGStack

This notebook demonstrates how to set up a simple RAG pipeline with RAGStack. At the end of this notebook, you will have a fully functioning Question/Answer model that can answer questions using your supplied documents.

A RAG pipeline requires, at minimum, a vector store, an embedding model, and an LLM. In this tutorial, you will use an Astra DB vector store, an OpenAI embedding model, an OpenAI LLM, and LangChain to orchestrate it all together.

In [None]:
! pip install -q ragstack-ai datasets

In [None]:
!pip install --upgrade langchain-core langchain langserve langgraph

In [None]:
# {
#   "clientId": "DTISDoqYjaOriMsNubisQKXp",
#   "secret": "xCwq33CYToeiYEaDEW+kkYfPXO.h6Y4Tr-.e9WYfKw.WQu-y9bAMtu.+orE3oqtWtRS4DkZG+2718CAt-nN,1gyICM69+GZj,9xz0ujtQP.DYiNj9KU54Kzax4UuM3yn",
#   "token": ""
# }

In [None]:
import cassio
import os
from dotenv import load_dotenv

load_dotenv()

ASTRA_DB_API_ENDPOINT = os.getenv("ASTRA_DB_API_ENDPOINT")
ASTRA_DB_APPLICATION_TOKEN = os.getenv("ASTRA_DB_APPLICATION_TOKEN")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

## Create RAG Pipeline

In [None]:
from sentence_transformers import SentenceTransformer
from langchain_astradb import AstraDBVectorStore
from langchain_core.embeddings import Embeddings
import os

# Use SentenceTransformer directly for embeddings (avoids langchain_community import issues)
class CustomHuggingFaceEmbeddings(Embeddings):
    def __init__(self, model_name="all-MiniLM-L6-v2"):
        self.model = SentenceTransformer(model_name)
    
    def embed_documents(self, texts):
        return self.model.encode(texts).tolist()
    
    def embed_query(self, text):
        return self.model.encode([text])[0].tolist()

embedding = CustomHuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

print(f"Using embedding model: {type(embedding).__name__}")

vstore = AstraDBVectorStore(
    collection_name="test",
    embedding=embedding,
    token=os.getenv("ASTRA_DB_APPLICATION_TOKEN"),
    api_endpoint=os.getenv("ASTRA_DB_API_ENDPOINT"),
)
print("Astra vector store configured with HuggingFaceEmbeddings")

This code block replaces the `OpenAIEmbeddings` with `HuggingFaceEmbeddings` using the `all-MiniLM-L6-v2` model, which will be loaded locally. Make sure you have the `sentence-transformers` library installed.

In [None]:
from datasets import load_dataset

# Load a sample dataset
philo_dataset = load_dataset("datastax/philosopher-quotes")["train"]
print("An example entry:")
print(philo_dataset[16])

# Search for quotes about sleep, brain, or restore
print("\n" + "="*80)
print("Searching for quotes about sleep, brain, or restore...")
print("="*80)
keywords = ["sleep", "brain", "restore"]
for keyword in keywords:
    matching = [q for q in philo_dataset if keyword.lower() in q["quote"].lower()]
    print(f"\nQuotes containing '{keyword}': {len(matching)}")
    for quote in matching[:3]:  # Show first 3
        print(f"  - {quote['quote']}")
        print(f"    Author: {quote['author']}, Tags: {quote['tags']}")

In [None]:
from langchain_core.documents import Document

# Constructs a set of documents from your data. Documents can be used as inputs to your vector store.
docs = []
for entry in philo_dataset:
    metadata = {"author": entry["author"]}
    if entry["tags"]:
        # Add metadata tags to the metadata dictionary
        for tag in entry["tags"].split(";"):
            metadata[tag] = "y"
    # Create a LangChain document with the quote and metadata tags
    doc = Document(page_content=entry["quote"], metadata=metadata)
    docs.append(doc)

In [None]:
docs

In [None]:
# Test if you can connect to Astra DB
try:
    from astrapy.db import AstraDB
    db = AstraDB(
        token=os.environ["ASTRA_DB_APPLICATION_TOKEN"],
        api_endpoint=os.environ["ASTRA_DB_API_ENDPOINT"]
    )
    print("Connection successful!")
except Exception as e:
    print(f"Connection failed: {e}")

In [None]:
# Create embeddings by inserting your documents into the vector store.
import time

max_retries = 3
for attempt in range(max_retries):
    try:
        inserted_ids = vstore.add_documents(docs)
        print(f"\nInserted {len(inserted_ids)} documents.")
        break
    except Exception as e:
        if attempt < max_retries - 1:
            print(f"Attempt {attempt + 1} failed: {type(e).__name__}")
            print(f"Retrying in 5 seconds...")
            time.sleep(5)
        else:
            print(f"Failed after {max_retries} attempts: {e}")
            raise

In [None]:
# Checks your collection to verify the documents are embedded.
print(vstore.astra_db.collection("test").find())

### Basic Retrieval

Retrieve context from your vector database, and pass it to the model with a prompt.

In [None]:
## can us chatopenai but dont have any free api keys so using chatgroq

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from groq import Groq
import os
from getpass import getpass

# Get Groq API Key
if "GROQ_API_KEY" not in os.environ:
    os.environ["GROQ_API_KEY"] = getpass("Enter your Groq API Key: ")

# Initialize Groq client
client = Groq(api_key=os.environ["GROQ_API_KEY"])

retriever = vstore.as_retriever(search_kwargs={"k": 3})

prompt_template = """
Answer the question based only on the supplied context. If you don't know the answer, say you don't know the answer.
Context: {context}
Question: {question}
Your answer:
"""
prompt = ChatPromptTemplate.from_template(prompt_template)

# Execute RAG chain
question = "What is the importance of sleep for the brain?"
retrieved_docs = retriever.invoke(question)

print(f"Retrieved {len(retrieved_docs)} documents:")
print("-" * 80)
for i, doc in enumerate(retrieved_docs):
    print(f"\nDocument {i+1}:")
    print(f"Content: {doc.page_content}")
    print(f"Metadata: {doc.metadata}")

context = "\n".join([doc.page_content for doc in retrieved_docs])
formatted_prompt = prompt_template.format(context=context, question=question)

# Call Groq API using groq Python client
response = client.chat.completions.create(
    messages=[{"role": "user", "content": formatted_prompt}],
    model="llama-3.1-8b-instant",
    temperature=0.3
)

result = response.choices[0].message.content
print("\n" + "="*80)
print("RAG Response:")
print("="*80)
print(result)

# Extract and display tags from first retrieved document
if retrieved_docs:
    print("\n" + "="*80)
    print("Tags from source:")
    print("="*80)
    tags = [key for key in retrieved_docs[0].metadata.keys() if retrieved_docs[0].metadata[key] == 'y']
    print(f"Tags: {', '.join(tags)}")
    print(f"Author: {retrieved_docs[0].metadata.get('author', 'Unknown')}")