# **RAG Application** with LangChain and HuggingFace LLM

In [None]:
# Install the necessary packages
!pip install torch -q
!pip install transformers -q
!pip install numpy -q
!pip install langchain -q
!pip install langchain_community -q
!pip install langchain-chroma -q
!pip install sentence_transformers -q

In [None]:
import os
from google.colab import userdata

### Initialize HuggingFace LLM

Model repo url: https://huggingface.co/mistralai/Mistral-7B-v0.1

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain_community.llms import HuggingFacePipeline
import torch

# Define the model ID
model_id = "mistralai/Mistral-7B-v0.1"

# Get your API token
huggingface_api_token = "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"

# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_id, token=huggingface_api_token)
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.float16, token=huggingface_api_token)

# Create a text generation pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=256,
    do_sample=True,
    temperature=0.1, # Use temperature here as before
    trust_remote_code=True,
    device_map="auto",
)

# Initialize the HuggingFace llm using the pipeline
llm = HuggingFacePipeline(pipeline=pipe)

# Delete the model and tokenizer to free up GPU memory
del model
del tokenizer
del pipe
import gc
gc.collect()
torch.cuda.empty_cache()


### Initialize Embedding Model

Model url: https://sbert.net/

In [None]:
from langchain_community.embeddings import HuggingFaceEmbeddings
import torch

# Ensure CUDA cache is empty before trying to load embedding model
torch.cuda.empty_cache()

embedding_model = HuggingFaceEmbeddings(
  model_name="sentence-transformers/all-mpnet-base-v2",
  model_kwargs={'device': 'cpu'} # Explicitly load the embedding model to CPU
)

### Initialize Output Parser

In [None]:
from langchain_core.output_parsers import StrOutputParser

output_parser=StrOutputParser()

### Load PDF Document

In [None]:
!pip install pypdf -qU

In [None]:
from langchain_community.document_loaders import PyPDFLoader

# Load the PDF document
loader = PyPDFLoader("/content/2025-S1-IT3021-Lecture-01-Introduction.pdf")

docs = loader.load()

In [None]:
len(docs)

**Output:** `27`

In [None]:
docs[0]

**Sample Output:**
```
Document(metadata={'producer': 'Microsoft® PowerPoint® for Microsoft 365', 'creator': 'Microsoft® PowerPoint® for Microsoft 365', 'creationdate': '2023-02-13T22:59:45+05:30', 'moddate': '2023-02-13T22:59:45+05:30', 'title': 'IT3021: Data Warehousing and Business Intelligence', 'source': '/content/2025-S1-IT3021-Lecture-01-Introduction.pdf', 'total_pages': 27, 'page': 0, 'page_label': '1'}, page_content='BSc in IT: Specialising in Data Science\nIT3021: Data Warehousing \nand Business Intelligence\nLecture 01\nIntroduction to DW & BI')
```

### Split Documents into Chunks

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Initialize the text splitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=50)

# Split the documents into chunks
splits = text_splitter.split_documents(docs)

In [None]:
len(splits)

**Output:** `42` (document chunks after splitting)

### Create Vector Store and Retriever

In [None]:
from langchain_chroma import Chroma

# Create a vector store from the document chunks
vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model)

In [None]:
# Create a retriever from the vector store
retriever = vectorstore.as_retriever()

In [None]:
from langchain_core.runnables import RunnablePassthrough, RunnableLambda

# Extract only the text from Document objects
def docs_to_text(docs):
    return "\n\n".join([doc.page_content for doc in docs])

# Make a Runnable that converts list of Documents → string
doc_text_runnable = RunnableLambda(docs_to_text)

### Define Prompt Template

In [None]:
from langchain_core.prompts import ChatPromptTemplate

# Define prompt template
template = """
Answer this question using the provided context only.

{question}

Context:
{context}

Answer:
"""

prompt=ChatPromptTemplate.from_template(template)

In [None]:
prompt

### Chain Retriever and Prompt Template with LLM

In [None]:
# Build the RAG chain with proper document formatting
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | RunnablePassthrough.assign(context=lambda x: doc_text_runnable.invoke(x["context"]))
    | prompt
    | llm
    | output_parser
)

print("✓ RAG chain created successfully")

**Output:** `✓ RAG chain created successfully`

#### Invoke RAG Chain with Example Questions

In [None]:
question = "what is data warehousing?"
response = chain.invoke(question)

print("Question:", question)
print("\nAnswer:")
print(response.strip())

**Expected Output:**
```
Question: what is data warehousing?

Answer:
Data warehousing is a set of processes, architectures and technologies for collecting and managing data from various sources to support deriving meaningful business insights from raw data. Data collection involves data gathering, transforming and storing. It also includes database creation and data integration process development.
```

In [None]:
# Test with another question
question2 = "What does data collection involve?"
response2 = chain.invoke(question2)

print("Question:", question2)
print("\nAnswer:")
print(response2.strip())

**Expected Output:**
```
Question: What does data collection involve?

Answer:
Data collection involves data gathering, transforming and storing. It includes the processes of collecting data from various sources, processing and organizing it for storage in a data warehouse.
```