In [46]:
# %pip install langchain_community
# %pip install langchain_experimental
# %pip install pypdf
# %pip install sentence-transformers
# %pip install langchain_pinecone pinecone

In [None]:
# Library imports
from langchain_community.document_loaders import PyPDFLoader
from langchain_experimental.text_splitter import SemanticChunker
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser
from langchain_pinecone import PineconeVectorStore
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
import pinecone
import os

In [48]:
# Load environment variables
load_dotenv()
API_KEY = os.getenv('OPENROUTER_API_KEY')
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')


In [49]:
# Path to your PDF file
file_path = r"E:\chatbotProject\chatbotProject\backend\WebsiteContent\Institutional_information.pdf"

# Load the PDF
loader = PyPDFLoader(file_path)
documents = loader.load()

# Store the content in the form of string
content = documents[0].page_content

In [50]:
# Create embeddings using Hugging Face model
embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name)

In [51]:
# Generate one embedding and check its length
sample_vector = embeddings.embed_query("hello world")
len(sample_vector)

384

In [52]:
# Split into chunks
text_splitter = SemanticChunker(embeddings)
docs = text_splitter.split_documents(documents)

In [53]:
# Init Pinecone client
pc = pinecone.Pinecone(api_key=PINECONE_API_KEY)

index_name = "chatbotproject"

# Store docs in Pinecone
vectorstore = PineconeVectorStore.from_documents(
    documents=docs,
    embedding=embeddings,
    index_name=index_name
)

In [54]:
# Initialize chat_model
chat_model = ChatOpenAI(
    model="openai/gpt-3.5-turbo",
    api_key=API_KEY,
    base_url="https://openrouter.ai/api/v1"
)

In [55]:
# Build prompt template
prompt = PromptTemplate(
    template="""
      You are a helpful assistent.
      Answer ONLY from the provided context.
      If the context is insufficient, just say don't know.
      {context}
      Question: {question}
    """,
    input_variables=["context", "question"]
)

In [56]:
# Retreive relevant documents
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 4})

In [57]:
# Combine retriever and chat model in a chain
def format_docs(retrived_docs):
  context_text = "\n\n".join(docs.page_content for docs in retrived_docs)
  return context_text

parallel_chain = RunnableParallel({
  'context': retriever | RunnableLambda(format_docs),
  'question': RunnablePassthrough()
})

parser = StrOutputParser()
main_chain = parallel_chain | prompt | chat_model | parser

In [59]:
# Test the chain
main_chain.invoke("Fee submition?")

'A: Fees can be paid online through net banking, UPI, or credit/debit cards. Offline payments are accepted at the campus accounts office.'