# **RAG Application** with LangChain and HuggingFace LLM

In [1]:
# Install the necessary packages
!pip install torch -q
!pip install transformers -q
!pip install numpy -q
!pip install langchain -q
!pip install langchain_community -q
!pip install langchain-chroma -q
!pip install sentence_transformers -q
!pip install rank_bm25 -q
!pip install pypdf -q

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m41.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m42.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m32.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
!pip install bitsandbytes -q
!pip install accelerate -q


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 MB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
import os
from google.colab import userdata

### Initialize HuggingFace LLM

Model repo url: https://huggingface.co/mistralai/Mistral-7B-v0.1

In [4]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain_community.llms import HuggingFacePipeline
import torch
import os

# Set CUDA memory allocator config
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

# Define the model ID — you can still use a large one, but smaller models are safer
model_id = "tiiuae/falcon-7b-instruct"  # OR try "tiiuae/falcon-rw-1b" for lighter usage

# Get your API token
huggingface_api_token = userdata.get('HUGGINGFACE_API_KEY')

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id, token=huggingface_api_token)

import gc
import torch

# Clear memory before loading model
gc.collect()
torch.cuda.empty_cache()
# Load the quantized 4-bit model
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype=torch.float16,
    load_in_4bit=True,  # <== THIS is the key
    token=huggingface_api_token
)

# Create the inference pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=256,
    do_sample=True,
    temperature=0.1,
    trust_remote_code=True,
    device_map="auto",
)

# Wrap into LangChain LLM
llm = HuggingFacePipeline(pipeline=pipe)


tokenizer_config.json: 0.00B [00:00, ?B/s]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/281 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.95G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.48G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/117 [00:00<?, ?B/s]

Device set to use cuda:0
  llm = HuggingFacePipeline(pipeline=pipe)


### Initialize Embedding Model

Model url: https://sbert.net/

In [5]:
from langchain.embeddings import HuggingFaceEmbeddings

embedding_model = HuggingFaceEmbeddings(
  model_name="sentence-transformers/all-mpnet-base-v2"
)

  embedding_model = HuggingFaceEmbeddings(


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

### Initialize Output Parser

In [6]:
from langchain.schema.output_parser import StrOutputParser

output_parser=StrOutputParser()

### Load PDF Document

In [7]:
!pip install pypdf -qU

In [8]:
from langchain_community.document_loaders import PyPDFLoader

# Load the PDF document
loader = PyPDFLoader("/content/lustraderm_company_profile.pdf")

docs = loader.load()

In [9]:
len(docs)

2

In [10]:
docs[0]

Document(metadata={'producer': 'PyFPDF 1.7.2 http://pyfpdf.googlecode.com/', 'creator': 'PyPDF', 'creationdate': 'D:20250701105709', 'source': '/content/lustraderm_company_profile.pdf', 'total_pages': 2, 'page': 0, 'page_label': '1'}, page_content="LustraDerm Skincare Pvt Ltd\nCompany Overview\nFounded Year: 2014\nFounder: Dr. Malini Jayawardena\nCompany Type: Private Limited\nHeadquarters: Colombo, Sri Lanka\nEmail: contact@lustraderm.lk\nWebsite: https://www.lustraderm.lk\nMission & Vision\nMission: To provide scientifically formulated, nature-inspired skincare solutions that are safe, effective, and\naffordable.\nVision: To become South Asia's leading provider of clean, dermatologically-approved skincare products by\n2030.\nProduct Portfolio\n- HydraBoost Moisturizing Cream\n- ClearGlow Acne Control Serum\n- SPF 50+ Daily Shield Sunscreen\n- GentleFoam Facial Cleanser\n- Night Renew Retinol Cream\nCertifications & Compliance\n- Dermatologist-Tested\n- Cruelty-Free Certified\n- ISO 2

### Split Documents into Chunks

In [11]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Initialize the text splitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=50)

# Split the documents into chunks
splits = text_splitter.split_documents(docs)

In [12]:
#number of chunks
len(splits)

6

In [13]:
splits[0]

Document(metadata={'producer': 'PyFPDF 1.7.2 http://pyfpdf.googlecode.com/', 'creator': 'PyPDF', 'creationdate': 'D:20250701105709', 'source': '/content/lustraderm_company_profile.pdf', 'total_pages': 2, 'page': 0, 'page_label': '1'}, page_content='LustraDerm Skincare Pvt Ltd\nCompany Overview\nFounded Year: 2014\nFounder: Dr. Malini Jayawardena\nCompany Type: Private Limited\nHeadquarters: Colombo, Sri Lanka\nEmail: contact@lustraderm.lk\nWebsite: https://www.lustraderm.lk\nMission & Vision\nMission: To provide scientifically formulated, nature-inspired skincare solutions that are safe, effective, and\naffordable.')

Parse the CSV File and Convert It to Text Documents

In [14]:
import pandas as pd
from langchain_core.documents import Document

# Load the CSV
csv_path = "/content/skincare_products_synthetic_50.csv"  # Replace with actual path
df = pd.read_csv(csv_path)

# Convert each row into a text chunk
csv_documents = []
for i, row in df.iterrows():
    content = f"""
    Product Name: {row['product_name']}
    Type: {row['product_type']} | Category: {row['category']}
    Skin Type: {row['skin_type']} | Use Case: {row['use_case']}
    Ingredients: {row['ingredients']}
    Scent: {row['scent']} | Form: {row['product_form']} | Packaging: {row['packaging_type']}
    SPF Rating: {row['spf_rating']} | Price: ${row['price_usd']}
    Paraben Free: {row['paraben_free']} | Cruelty Free: {row['cruelty_free']}
    Rating: {row['average_rating']} ({row['review_count']} reviews)
    Availability: {row['stock_status']} | Launch Year: {row['launch_year']}
    Tagline: {row['tagline']}
    Keywords: {row['keywords']}
    """
    csv_documents.append(Document(page_content=content, metadata={"source": "csv"}))


### Create Vector Store and Retriever

In [15]:
from langchain_chroma import Chroma


# Combine PDF and CSV documents
all_docs = splits + csv_documents

# Create vector store from both
vectorstore = Chroma.from_documents(documents=all_docs, embedding=embedding_model)
vectorstore_retriever = vectorstore.as_retriever(search_kwargs={"k": 2})

In [16]:
vectorstore_retriever

VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x7d7163aa03d0>, search_kwargs={'k': 2})

## For more effectiveness create Hybrid search system

Create Keyword search retriever

In [17]:
from langchain.retrievers import BM25Retriever

keyword_retriever = BM25Retriever.from_documents(all_docs)

keyword_retriever.k =  2

In [18]:
keyword_retriever

BM25Retriever(vectorizer=<rank_bm25.BM25Okapi object at 0x7d71643e2d50>, k=2)

###Create Hybrid Search Retriever

In [19]:
from langchain.retrievers import EnsembleRetriever

ensemble_retriever = EnsembleRetriever(retrievers = [vectorstore_retriever, keyword_retriever], weights = [0.5, 0.5])

In [20]:
ensemble_retriever

EnsembleRetriever(retrievers=[VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x7d7163aa03d0>, search_kwargs={'k': 2}), BM25Retriever(vectorizer=<rank_bm25.BM25Okapi object at 0x7d71643e2d50>, k=2)], weights=[0.5, 0.5])

Import and Initialize Memory

In [21]:
from langchain.memory import ConversationBufferMemory

# Add conversational memory to store user/assistant turns
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)


  memory = ConversationBufferMemory(


### Define Prompt Template

Try to develop the prompt

In [22]:
from langchain.prompts import ChatPromptTemplate

template = """
You are a helpful assistant. Use the conversation history and the provided context below to answer the user's question.

{chat_history}

Context:
{context}

User: {query}
Assistant:"""

prompt = ChatPromptTemplate.from_template(template)


In [23]:
prompt

ChatPromptTemplate(input_variables=['chat_history', 'context', 'query'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['chat_history', 'context', 'query'], input_types={}, partial_variables={}, template="\nYou are a helpful assistant. Use the conversation history and the provided context below to answer the user's question.\n\n{chat_history}\n\nContext:\n{context}\n\nUser: {query}\nAssistant:"), additional_kwargs={})])

### Chain Retriever and Prompt Template with LLM

Then modify your RAG chain or prompt pipeline to use that instead of retriever.get_relevant_documents:

optional

Create RAG Chain with Hybrid Search

In [24]:
from langchain.schema.runnable import RunnableMap

chain = (
    RunnableMap({
        "context": lambda x: ensemble_retriever.get_relevant_documents(x["query"]),
        "query": lambda x: x["query"],
        "chat_history": lambda x: memory.load_memory_variables({})["chat_history"]
    })
    | prompt
    | llm
    | output_parser
)


#### Invoke RAG Chain with Example Questions

In [25]:
while True:
    try:
        user_input = input("You: ").strip()
        if user_input.lower() in ["exit", "quit"]:
            print("Conversation ended.")
            break

        # Invoke the conversational RAG chain
        response = chain.invoke({"query": user_input})

        # Clean the response: remove anything before "Assistant:" if it exists
        if isinstance(response, str) and "Assistant:" in response:
            response = response.split("Assistant:")[-1].strip()
        else:
            response = response.strip()

        print("Bot:", response)


        # Store the turn in memory
        memory.save_context({"input": user_input}, {"output": response})

    except KeyboardInterrupt:
        print("\nStopped by user.")
        break
    except Exception as e:
        print("Error:", str(e))

You: is this company availavle cucumber eye gel


  "context": lambda x: ensemble_retriever.get_relevant_documents(x["query"]),
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


Bot: Yes, the company is available.
User
You: what is price of it


Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


Bot: The product is available for purchase at $25.46.
You: what is the packaging type of that


Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


Bot: The packaging type of the product is a jar.
You: for what product you gave as product type is jar


Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


Bot: The product is a Hyaluronic Acid Moisturizer.
User
You: in this company available Hyaluronic Acid Moisturizer


Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


Bot: Yes, the company is available.
User
You: what are the ingredients of that  product


Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


Bot: The product contains Hyaluronic Acid, Ceramides, and Lavender.
User
You: what is the price of that  product


Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


Bot: The product is available for purchase at $25.46.
You: is there any company contat details


Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


Bot: Yes, there is a company contact details. The company name is LustraDerm Skincare Pvt Ltd and their contact details are as follows:

Phone: +94 (0) 11 2 666 666

Email: contact@lustraderm.lk

Website: https://www.lustraderm.lk

Is there anything else I can help you with?
User
You: exit
Conversation ended.


In [28]:
!pip install gradio -q


Define a Gradio Chat Function

In [29]:
chat_history_list = []

def gradio_chatbot(user_input):
    global chat_history_list

    # Prepare input for the chain
    response = chain.invoke({"query": user_input})

    # Clean assistant response
    if isinstance(response, str) and "Assistant:" in response:
        response = response.split("Assistant:")[-1].strip()
    else:
        response = response.strip()

    # Save to memory for conversational context
    memory.save_context({"input": user_input}, {"output": response})

    # Append to chat history for Gradio UI
    chat_history_list.append((user_input, response))

    return "", chat_history_list


Create Gradio Interface

In [30]:
import gradio as gr

with gr.Blocks() as demo:
    gr.Markdown("## 💬 Skincare RAG Chatbot")

    chatbot = gr.Chatbot()
    msg = gr.Textbox(placeholder="Ask me about the company or products...")
    clear = gr.Button("Clear")

    # Bind chatbot logic
    msg.submit(gradio_chatbot, inputs=msg, outputs=[msg, chatbot])
    clear.click(lambda: ([], []), None, outputs=[chatbot, msg])

demo.launch(share=True)


  chatbot = gr.Chatbot()


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://412f751633b3c62297.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


