In [None]:


import os
from dotenv import load_dotenv
from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_together import ChatTogether
from pinecone import Pinecone, ServerlessSpec
from langchain_pinecone import PineconeVectorStore
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

# Load environment variables
load_dotenv()
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY")
INDEX_NAME = "policy-index"
PINECONE_REGION = "us-east-1"
DATA_DIR = "data/"  # your folder with PDFs

# Load and split PDFs
loader = DirectoryLoader(DATA_DIR, glob="*.pdf", loader_cls=PyPDFLoader)
docs = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=64)
texts = splitter.split_documents(docs)

# Pinecone setup
embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
pc = Pinecone(api_key=PINECONE_API_KEY)

if INDEX_NAME not in [i.name for i in pc.list_indexes()]:
    pc.create_index(name=INDEX_NAME, dimension=384, metric="cosine",
                    spec=ServerlessSpec(cloud="aws", region=PINECONE_REGION))
    PineconeVectorStore.from_documents(texts, embedding=embedding, index_name=INDEX_NAME)

vectorstore = PineconeVectorStore.from_existing_index(index_name=INDEX_NAME, embedding=embedding)
retriever = vectorstore.as_retriever()

# Load Together AI model
llm = ChatTogether(
    model="mistralai/Mixtral-8x7B-Instruct-v0.1",
    api_key=TOGETHER_API_KEY,
    temperature=0.3,
    max_tokens=512
)

# Prompt templates
parse_prompt = ChatPromptTemplate.from_template("""
Extract key details from the following query and return JSON:
- Age
- Gender
- Procedure
- Location
- Policy Duration

Query: {input}
""")

decision_prompt = ChatPromptTemplate.from_template("""
You are a policy decision assistant.

Based on the extracted details:
{parsed}

And the following policy clauses:
{context}

Determine:
- decision: approve/reject
- amount: (in ₹)
- justification: cite relevant clause(s)

Return JSON with these keys.
""")

parser_chain = create_stuff_documents_chain(llm, parse_prompt)
qa_chain = create_stuff_documents_chain(llm, decision_prompt)
rag_chain = create_retrieval_chain(retriever, qa_chain)

# Sample input
query = "46-year-old male, knee surgery in Pune, 3-month-old insurance policy"

parsed_result = parser_chain.invoke({"input": query})
print("Parsed:", parsed_result)

final_result = rag_chain.invoke({"input": str(parsed_result)})
print("Decision:", final_result)
