<a href="https://colab.research.google.com/github/SujanKarna/RAG_ChatModel/blob/master/Langchain_chatModel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install chromadb sentence-transformers

In [None]:
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.config import Settings
import openai
import os

In [None]:
# Initiaize the ChromaDb Client
client = chromadb.Client(Settings(persist_directory="./rag_store"))

In [None]:
# Create a collection
collection = client.get_or_create_collection(name="rag_docs")

In [None]:
# Prepare Documents
documents = [
    "Retrieval-Augmented Generation (RAG) combines search and generation.",
    "FAISS is a fast similarity search library developed by Facebook.",
    "ChromaDB supports metadata filtering and persistent storage.",
    "SentenceTransformers can convert text into semantic embeddings."
]

In [None]:
# Embedd Documents
embedder = SentenceTransformer("all-MiniLM-L6-v2")
doc_embeddings = embedder.encode(documents).tolist()

In [None]:
# Add Documents to ChromaDB
collection.add(
    documents=documents,
    embeddings=doc_embeddings,
    ids=[f"doc_{i}" for i in range(len(documents))]
)

In [None]:
# Retrieve top-K relevant documents
def retrieve(query, k=3):
    query_embedding = embedder.encode([query]).tolist()[0]
    results = collection.query(query_embeddings = [query_embedding], n_results = k)
    return results['documents'][0]

### For Open-Source LLMs - Mistral

In [None]:
# using local llm from huggingface
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM
import torch
from huggingface_hub import login

In [None]:
# Generate answer with llm model
# Load Mistral model for generation

login(token= userdata.get("HUGGINGFACE_TOKEN"))
gen_model_name = "mistralai/Mistral-7B-Instruct-v0.2"
gen_tokenizer = AutoTokenizer.from_pretrained(gen_model_name)
gen_model = AutoModelForCausalLM.from_pretrained(gen_model_name, torch_dtype=torch.float16, device_map="auto")




# Format prompt
def generate_answer_local(context, query):
    prompt = f"""### Instruction:
              Use the following context to answer the question.
              ### Context:{context}
              ### Question: {query}
              ### Answer:"""
    inputs = gen_tokenizer(prompt, return_tensors="pt").to(gen_model.device)
    with torch.no_grad():
        outputs = gen_model.generate(**inputs, max_new_tokens=512, do_sample=True, temperature=0.7, top_p=0.9)
    return gen_tokenizer.decode(outputs[0], skip_special_tokens=True).split("### Answer:")[-1].strip()



In [None]:
context = "\n".join(retrieve("What is RAG and how does it work?"))
answer = generate_answer_local(context, "What is RAG and how does it work?")
print("🔍 Answer:\n", answer)

### For openai API

In [None]:
from google.colab import userdata
openai.api_key = userdata.get("OPENAI_API_KEY")

In [None]:
pip install --upgrade openai

In [None]:
# ✅ Securely retrieve API key from Colab Secrets
api_key = userdata.get("OPENAI_API_KEY")
client = openai.OpenAI(api_key=api_key)

# 🧾 Generate answer using OpenAI GPT
def generate_answer(query):
    retrieved_docs = retrieve(query)
    context = "\n".join(retrieved_docs)
    prompt = f"Use the following context to answer the question:\n{context}\n\nQuestion: {query}"

    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}]
    )
    return response['choices'][0]['message']['content']


In [None]:
# Embedd the query
query = "What is RAG and how does it work?"
answer = generate_answer(query)
print("Answer:", answer)