In [None]:
import os
os.environ["OPENAI_API_KEY"] = "sk-proj-ORV1HwUGs8R8vWlaRYLdAzJfhxLH9NYWyb5GDGGm9Il4JLsPWQX5L1I8A9hR"

**Step 1: Setup Environment & Dependencies**

In [None]:
## Install libraries
!pip install langchain openai tiktoken chromadb faiss-cpu unstructured
!pip install sentence-transformers  # or
!pip install openai  # if using OpenAI embeddings
!pip install langchain openai chromadb tiktoken
!pip install sentence-transformers

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate

**Step 2: Load and Preprocess the JSON Data**

In [None]:
import json
from langchain.schema import Document

# Load the structured policy
with open('/content/drive/MyDrive/RAG/json/structured_policy1.json', 'r') as f:
    data = json.load(f)

# Convert to LangChain Documents
docs = []
for section in data:
    metadata = {"section": section["section"]}
    content = section["content"]
    docs.append(Document(page_content=content, metadata=metadata))


**Step 3: Embed Documents and Store in Vector DB (Chroma)**

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

vectordb = Chroma.from_documents(
    documents=docs,
    embedding=embedding,
    persist_directory="/content/chroma_store"
)
vectordb.persist()


**Step 4: Setup Retriever and LLM Chain**

In [None]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI

retriever = vectordb.as_retriever()

llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.2)

rag_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True
)


**Step 5: Ask Questions**

In [None]:
template = "You are a helpful assistant.Answer ONLY from the provided transcript context. If the context is insufficient, just say you dont know. Question:"
query = template + "What is the purpose of the policy?"
result = rag_chain(query)

print("Answer:", result['result'])
for doc in result['source_documents']:
    print("\nSource:", doc.metadata['section'])
    print(doc.page_content[:300])


**Building a Chain**

In [None]:
# Step 1: Install required packages
!pip install -q langchain faiss-cpu sentence-transformers huggingface_hub

# Step 2: Imports
import json
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFaceHub

# Step 3: Load and parse structured_policy1.json
def load_policy_json(json_path):
    with open(json_path, "r", encoding="utf-8") as f:
        data = json.load(f)
    docs = []
    for section in data:
        content = section["content"]
        metadata = {"section": section.get("section", section.get("title", "N/A"))}
        docs.append(Document(page_content=content, metadata=metadata))
    return docs

json_path = "/content/drive/MyDrive/RAG/json/structured_policy1.json"
documents = load_policy_json(json_path)

# Step 4: Text splitting
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
split_docs = splitter.split_documents(documents)

# Step 5: Embeddings + Vector Store
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(split_docs, embedding_model)
retriever = vectorstore.as_retriever(search_type="similarity", k=3)

# Step 6: Setup RAG chain with HuggingFaceHub LLM (Flan-T5)
from huggingface_hub import login
login("YOUR_HUGGINGFACE_TOKEN")  # 🔐 paste your token here

llm = HuggingFaceHub(
    repo_id="google/flan-t5-base",
    model_kwargs={"temperature": 0.2, "max_length": 512}
)

rag_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True,
    chain_type="stuff"
)

# Step 7: Ask questions interactively
while True:
    user_query = input("\n🔍 Ask your policy question (or type 'exit' to quit): ")
    if user_query.lower() == "exit":
        break
    result = rag_chain(user_query)
    print("\n🧠 Answer:")
    print(result['result'])
    print("\n📄 Sources:")
    for doc in result["source_documents"]:
        print(f"\n→ Section: {doc.metadata.get('section', 'N/A')}")
        print(doc.page_content[:300] + "...")
