# Document Q&A Chatbot -- Langchain + GROQ

In [9]:
pip install langchain langchain-groq sentence-transformers pinecone-client pdfplumber

Collecting pdfplumber
  Downloading pdfplumber-0.11.7-py3-none-any.whl.metadata (42 kB)
Collecting pypdfium2>=4.18.0 (from pdfplumber)
  Downloading pypdfium2-4.30.1-py3-none-win_amd64.whl.metadata (48 kB)
Downloading pdfplumber-0.11.7-py3-none-any.whl (60 kB)
Downloading pypdfium2-4.30.1-py3-none-win_amd64.whl (3.0 MB)
   ---------------------------------------- 0.0/3.0 MB ? eta -:--:--
   -------------- ------------------------- 1.0/3.0 MB 7.1 MB/s eta 0:00:01
   ----------------- ---------------------- 1.3/3.0 MB 3.2 MB/s eta 0:00:01
   ----------------------------------- ---- 2.6/3.0 MB 4.3 MB/s eta 0:00:01
   ---------------------------------------- 3.0/3.0 MB 4.2 MB/s eta 0:00:00
Installing collected packages: pypdfium2, pdfplumber

   ---------------------------------------- 0/2 [pypdfium2]
   ---------------------------------------- 0/2 [pypdfium2]
   ---------------------------------------- 0/2 [pypdfium2]
   ---------------------------------------- 0/2 [pypdfium2]
   --------

## PDF Loader + Chunking

In [11]:
import pdfplumber
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document

def load_and_split_pdf(file_path):
    # Load PDF
    with pdfplumber.open(file_path) as pdf:
        text = "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])

    # Split text
    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    docs = splitter.create_documents([text])
    return docs

## Embed and Store in Pinecone or FAISS

In [None]:
from sentence_transformers import SentenceTransformer
from langchain.vectorstores import FAISS  # or use Pinecone from langchain_community.vectorstores
from langchain.embeddings import HuggingFaceEmbeddings

# Embedding
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
docs = load_and_split_pdf("MLBOOK.pdf")

# FAISS store
vectorstore = FAISS.from_documents(docs, embedding_model)