# Lab 5: Putting It Together — Simple RAG**Estimated Time:** 9 minutes---

## Step 1: Set Up OCI Generative AI

In [None]:
import os
import oci

# OCI Generative AI configuration
genai_client = oci.generative_ai_inference.GenerativeAiInferenceClient(
    config=oci.config.from_file(os.getenv("OCI_CONFIG_PATH", "~/.oci/config")),
    service_endpoint=os.getenv("ENDPOINT")
)

COMPARTMENT_ID = os.getenv("COMPARTMENT_OCID")

print("OCI Generative AI client ready.")

In [None]:
def generate_response(prompt, temperature=0.0):
    """Call OCI Generative AI to generate a response."""
    chat_detail = oci.generative_ai_inference.models.ChatDetails(
        compartment_id=COMPARTMENT_ID,
        chat_request=oci.generative_ai_inference.models.GenericChatRequest(
            messages=[oci.generative_ai_inference.models.UserMessage(
                content=[oci.generative_ai_inference.models.TextContent(text=prompt)]
            )],
            temperature=temperature,
            top_p=0.9
        ),
        serving_mode=oci.generative_ai_inference.models.OnDemandServingMode(
            model_id="meta.llama-3.2-90b-vision-instruct"
        )
    )
    response = genai_client.chat(chat_detail)
    return response.data.chat_response.choices[0].message.content[0].text

# Quick test
test = generate_response("Respond with exactly: 'LLM connection verified.'")
print(f"Test response: {test}")

## Step 2: Build the Retrieval Function

In [None]:
def retrieve_chunks(question, top_k=3):
    """Retrieve the top-K most relevant chunks using vector search."""
    with connection.cursor() as cursor:
        cursor.execute("""
            SELECT c.chunk_id,
                   c.chunk_text,
                   kb.title,
                   ROUND(VECTOR_DISTANCE(c.embedding,
                       VECTOR_EMBEDDING(doc_model USING :question),
                       COSINE), 4) AS distance
            FROM city_knowledge_chunks c
            JOIN city_knowledge_base kb ON c.doc_id = kb.doc_id
            ORDER BY distance
            FETCH APPROXIMATE FIRST :top_k ROWS ONLY
        """, {"question": question, "top_k": top_k})

        results = []
        for row in cursor.fetchall():
            chunk_text = row[1].read() if hasattr(row[1], 'read') else row[1]
            results.append({
                "chunk_id": row[0], "text": chunk_text,
                "source": row[2], "distance": row[3]
            })
        return results

# Test retrieval
test_results = retrieve_chunks("bridge vibration anomaly response")
print(f"Retrieved {len(test_results)} chunks:")
for r in test_results:
    print(f"  [{r['distance']}] {r['source'][:60]}")

## Step 3: Build the RAG Pipeline

In [None]:
def rag_query(question, top_k=3):
    """Full RAG pipeline: retrieve context, build prompt, generate answer."""
    print(f"Question: {question}\n")

    # Step 1: Retrieve relevant chunks
    print("1. Retrieving relevant context...")
    chunks = retrieve_chunks(question, top_k=top_k)
    print(f"   Found {len(chunks)} relevant chunks.\n")

    # Step 2: Build context from retrieved chunks
    context_parts = []
    for i, chunk in enumerate(chunks, 1):
        context_parts.append(f"[Source: {chunk['source']}]\n{chunk['text']}")
    context = "\n\n".join(context_parts)

    # Step 3: Build the prompt
    print("2. Building prompt with retrieved context...")
    prompt = f"""You are a CityPulse operations assistant. Answer the question using ONLY
the provided context below. Be specific and reference relevant details from the
source documents. If the context doesn't fully answer the question, say so.

CONTEXT:
{context}

QUESTION: {question}

ANSWER:"""

    # Step 4: Generate response
    print("3. Generating response with OCI Generative AI...\n")
    answer = generate_response(prompt)

    return answer, chunks

print("RAG pipeline ready.")

## Step 4: Test the RAG Pipeline

In [None]:
answer, sources = rag_query(
    "What should I do if a bridge sensor shows elevated vibration readings?"
)

print("=" * 70)
print("ANSWER:")
print("=" * 70)
print(answer)
print("\n" + "=" * 70)
print("SOURCES USED:")
print("=" * 70)
for s in sources:
    print(f"  • {s['source']} (distance: {s['distance']})")

In [None]:
answer, sources = rag_query(
    "How do we detect and respond to a possible water main break?"
)

print("ANSWER:")
print(answer)
print("\nSOURCES:")
for s in sources:
    print(f"  • {s['source']}")

In [None]:
answer, sources = rag_query(
    "What infrastructure depends on Substation Gamma and what happens if it fails?"
)

print("ANSWER:")
print(answer)
print("\nSOURCES:")
for s in sources:
    print(f"  • {s['source']}")

In [None]:
answer, sources = rag_query(
    "How do I calibrate a vibration sensor and how often should it be done?"
)

print("ANSWER:")
print(answer)
print("\nSOURCES:")
for s in sources:
    print(f"  • {s['source']}")

**Congratulations!** You've built a complete RAG pipeline.Raw text → Chunks → Embeddings → HNSW Index → Vector Search → RAG Pipeline