## Load Data 

In [1]:
from langchain_community.document_loaders import PyMuPDFLoader
pdf_load = PyMuPDFLoader("ML Engineer.pdf")
pdf_docs = pdf_load.load()
print(len(pdf_docs))


2


## Splitting Data

In [2]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
recursive_splitter = RecursiveCharacterTextSplitter(
    separators = ["\n\n","\n"," "],
    chunk_size = 200,
    chunk_overlap = 40
)
docs = recursive_splitter.split_documents(pdf_docs)

## Enviorments

In [3]:
from dotenv import load_dotenv
load_dotenv()
import os 
api_key_groq = os.getenv('GROQ_API_KEY')
api_key_hf = os.getenv('HF_TOKEN')
# Quick debug print to ensure presence (do NOT print actual key!)
print('GROQ key present:', bool(api_key_groq))
print('HF token present:', bool(api_key_hf))

GROQ key present: True
HF token present: True


## Embedding 

In [5]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2",
                                   model_kwargs={"device": "cpu"})

In [6]:
# Embeddings test - verify embedding vector shape (requires HF_TOKEN)
try:
    sample_embedding = embeddings.embed_documents(["Test sentence for embeddings"])
    print('Embedding success, first vector length:', len(sample_embedding[0]))
except Exception as e:
    print('Embedding test failed:', e)

Embedding success, first vector length: 768


## Vectore Store Data Base

In [7]:
from langchain_community.vectorstores import FAISS
vector = FAISS.from_documents(
    documents = docs,
    embedding = embeddings
)

## Retrieve

In [8]:
retriever = vector.as_retriever(
    search_type = "similarity",
    search_kwargs = {"k":3}
)

## Prompt 

In [9]:
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template(
    """
    Answer the following Question according to the context given below
    context: {context}
    Question: {input}
    Answer:
    """
)

## LLM Define 

In [10]:
from langchain_groq import ChatGroq
llm = ChatGroq(model_name="llama-3.1-8b-instant", groq_api_key=api_key_groq)
llm

ChatGroq(profile={'max_input_tokens': 131072, 'max_output_tokens': 8192, 'image_inputs': False, 'audio_inputs': False, 'video_inputs': False, 'image_outputs': False, 'audio_outputs': False, 'video_outputs': False, 'reasoning_output': False, 'tool_calling': True}, client=<groq.resources.chat.completions.Completions object at 0x000001239E08A810>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x000001239E16B6E0>, model_name='llama-3.1-8b-instant', model_kwargs={}, groq_api_key=SecretStr('**********'))

## Aware The the LLM with the Prompt

In [None]:
from langchain.chains.combine_documents import create_stuff_documents_chain
document_chain = create_stuff_documents_chain(llm=llm, prompt=prompt)

ModuleNotFoundError: No module named 'langchain.chains'

## RAG chain Using LCEL 

In [None]:
# Correct runnable imports: use 'RunnablePassthrough' (not 'RunnablePassThrough')
from langchain_core.runnables import RunnablePassthrough
from langchain_core.runnables import RunnableWithMessageHistory


ImportError: cannot import name 'RunnablePassThrough' from 'langchain_core.runnables' (c:\Users\Prime Laptops\Desktop\Gen\venv\Lib\site-packages\langchain_core\runnables\__init__.py)

In [None]:
# Test the RAG chain
test_question = "What is machine learning?"
try:
    response = rag_chain.invoke(test_question)
    print("Question:", test_question)
    print("\nAnswer:")
    print(response)
except Exception as e:
    print("Error testing RAG chain:", e)


In [None]:
# Build RAG chain using LCEL (LangChain Expression Language)
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

# Format documents function
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Create the RAG chain using LCEL
rag_chain = (
    {"context": retriever | format_docs, "input": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

print("RAG chain created successfully using LCEL!")
print("You can now use rag_chain.invoke('your question here') to query")

ERROR: create_stuff_documents_chain failed: No module named 'langchain_core.chains'
ERROR: create_retrieval_chain failed: No module named 'langchain_core.chains'


In [None]:
# Quick test: runnable imports
try:
    from langchain_core.runnables import RunnablePassthrough
    from langchain_core.runnables.history import RunnableWithMessageHistory
    print('Runnable imports: OK')
except Exception as e:
    print('ERROR: Runnable imports failed:', e)