# **RAG Application** with LangChain and HuggingFace LLM

In [94]:
# Install the necessary packages
!pip install torch -q
!pip install transformers -q
!pip install numpy -q
!pip install langchain -q
!pip install langchain_community -q
!pip install langchain-chroma -q
!pip install sentence_transformers -q

In [95]:
import os
from google.colab import userdata

### Initialize HuggingFace LLM

Model repo url: https://huggingface.co/mistralai/Mistral-7B-v0.1

In [96]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain_community.llms import HuggingFacePipeline
import torch

# Define the model ID
model_id = "mistralai/Mistral-7B-v0.1"

# Get your API token
huggingface_api_token = "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"

# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_id, token=huggingface_api_token)
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.float16, token=huggingface_api_token)

# Create a text generation pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=256,
    do_sample=True,
    temperature=0.1, # Use temperature here as before
    trust_remote_code=True,
    device_map="auto",
)

# Initialize the HuggingFace llm using the pipeline
llm = HuggingFacePipeline(pipeline=pipe)

# Delete the model and tokenizer to free up GPU memory
del model
del tokenizer
del pipe
import gc
gc.collect()
torch.cuda.empty_cache()


Loading weights:   0%|          | 0/291 [00:00<?, ?it/s]



### Initialize Embedding Model

Model url: https://sbert.net/

In [97]:
from langchain_community.embeddings import HuggingFaceEmbeddings
import torch

# Ensure CUDA cache is empty before trying to load embedding model
torch.cuda.empty_cache()

embedding_model = HuggingFaceEmbeddings(
  model_name="sentence-transformers/all-mpnet-base-v2",
  model_kwargs={'device': 'cpu'} # Explicitly load the embedding model to CPU
)

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

MPNetModel LOAD REPORT from: sentence-transformers/all-mpnet-base-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


### Initialize Output Parser

In [100]:
from langchain_core.output_parsers import StrOutputParser

output_parser=StrOutputParser()

### Load PDF Document

In [101]:
!pip install pypdf -qU

In [102]:
from langchain_community.document_loaders import PyPDFLoader

# Load the PDF document
loader = PyPDFLoader("/content/2025-S1-IT3021-Lecture-01-Introduction.pdf")

docs = loader.load()

In [103]:
len(docs)

27

In [104]:
docs[0]

Document(metadata={'producer': 'Microsoft® PowerPoint® for Microsoft 365', 'creator': 'Microsoft® PowerPoint® for Microsoft 365', 'creationdate': '2023-02-13T22:59:45+05:30', 'moddate': '2023-02-13T22:59:45+05:30', 'title': 'IT3021: Data Warehousing  and Business Intelligence', 'source': '/content/2025-S1-IT3021-Lecture-01-Introduction.pdf', 'total_pages': 27, 'page': 0, 'page_label': '1'}, page_content='BSc in IT: Specialising in Data Science\nIT3021: Data Warehousing \nand Business Intelligence\nLecture 01\nIntroduction to DW & BI')

### Split Documents into Chunks

In [105]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Initialize the text splitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=50)

# Split the documents into chunks
splits = text_splitter.split_documents(docs)

In [106]:
len(splits)

42

### Create Vector Store and Retriever

In [108]:
from langchain_chroma import Chroma

# Create a vector store from the document chunks
vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model)

In [109]:
# Create a retriever from the vector store
retriever = vectorstore.as_retriever()

### Define Prompt Template

In [87]:
from langchain_core.prompts import ChatPromptTemplate

# Define prompt template
template = """
Answer this question using the provided context only.

{question}

Context:
{context}

Answer:
"""

prompt=ChatPromptTemplate.from_template(template)

In [110]:
prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='\nAnswer this question using the provided context only.\n\n{question}\n\nContext:\n{context}\n\nAnswer:\n'), additional_kwargs={})])

### Chain Retriever and Prompt Template with LLM

In [111]:
from langchain_core.runnables import RunnablePassthrough

chain = (
    {"context": retriever,  "question": RunnablePassthrough()}
    | prompt
    | llm
    | output_parser
)

#### Invoke RAG Chain with Example Questions

In [112]:
response = chain.invoke("what is data warehousing?")
print(response)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Both `max_new_tokens` (=256) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Human: 
Answer this question using the provided context only.

what is data warehousing?

Context:
[Document(id='ff1bacba-4d2b-40da-860b-62fe27c3ce1f', metadata={'page_label': '10', 'source': '/content/All in One Lectures - IT3 021-DWBI.pdf', 'creationdate': '', 'page': 9, 'producer': 'iLovePDF', 'creator': 'PyPDF', 'total_pages': 338, 'moddate': '2025-05-20T15:45:31+00:00'}, page_content='11\nWhat is DW & BI?\n• Data Warehousing(DW)\n• It is a set of processes, architectures and technologies for collecting and\nmanaging data from various sources to support deriving meaningful business\ninsights from raw data\n• Data collection involvesdata gathering, transformingand storing\n• It also includes database creation and data integration process development'), Document(id='8fcfb7ae-3ae1-4ad1-997b-6d6014a3f778', metadata={'page': 9, 'moddate': '2023-02-13T22:59:45+05:30', 'source': '/content/2025-S1-IT3021-Lecture-01-Introduction.pdf', 'producer': 'Microsoft® PowerPoint® for Microsoft 365', 