## Installing libraries

In [1]:
!pip install transformers datasets langchain-core langchain langchain-community langchain-google-genai langchain_chroma jq langchain-text-splitters sentence-transformers langchain-huggingface

Collecting datasets
  Downloading datasets-2.20.0-py3-none-any.whl (547 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m547.8/547.8 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-core
  Downloading langchain_core-0.2.10-py3-none-any.whl (332 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m332.8/332.8 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain
  Downloading langchain-0.2.6-py3-none-any.whl (975 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m975.5/975.5 kB[0m [31m13.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-community
  Downloading langchain_community-0.2.6-py3-none-any.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m21.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-google-genai
  Downloading langchain_google_genai-1.0.7-py3-none-any.whl (36 kB)
Collecting langchain_chroma
  Downloading langchai

## Necessary Import

In [1]:
import json
from datasets import load_dataset
from langchain import hub
from langchain_chroma import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.document_loaders import JSONLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

## loading dataset from huggingface

In [2]:
# Load the dataset from Hugging Face
dataset = load_dataset("MakTek/Customer_support_faqs_dataset")

dataset

Downloading readme:   0%|          | 0.00/2.65k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/46.5k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/200 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['question', 'answer'],
        num_rows: 200
    })
})

* Dataset consist of 2 features `Questions` and `Answers`
* Total examples are 200

* Randomly print some rows for examination

In [3]:
import random

# Get the number of rows in the dataset.
num_rows = dataset["train"].num_rows

# Choose a random sample of rows.
random_rows = random.sample(range(num_rows), 5)

# Print the selected rows.
for row in random_rows:
  print(f"Question: {dataset['train'][row]['question']}")
  print(f"Answer: {dataset['train'][row]['answer']}")
  print("-"*50)

Question: How can I apply for a job at your company?
Answer: To apply for a job at our company, visit our Careers page, where you can find current job openings and submit your application.
--------------------------------------------------
Question: What is the warranty on your products?
Answer: The warranty on our products varies by item. Please refer to the product page for specific warranty information or contact our customer support team.
--------------------------------------------------
Question: How do I unsubscribe from your newsletter?
Answer: To unsubscribe from our newsletter, click on the 'Unsubscribe' link at the bottom of any of our newsletter emails or update your preferences in your account settings.
--------------------------------------------------
Question: What is the warranty on your products?
Answer: The warranty on our products varies by item. Please refer to the product page for specific warranty information or contact our customer support team.
----------------

* Load the data (The data is already is loaded we need to convert it into relevant format)

## Our data seems fine next we will follow RAG pipline

In [4]:
# convert and save the data in relevant formate
custom_json_data = []
for example in dataset['train']:
    custom_json_data.append({"question": example["question"], "answer": example["answer"]})

import json
with open("FAQ.json", "w") as f:
    json.dump(custom_json_data, f)

## Load

In [5]:
import json
from pathlib import Path
from pprint import pprint
# load the FAQ data from directory
file_path='/content/FAQ.json'
data = json.loads(Path(file_path).read_text())

pprint(data[:10])

[{'answer': "To create an account, click on the 'Sign Up' button on the top "
            'right corner of our website and follow the instructions to '
            'complete the registration process.',
  'question': 'How can I create an account?'},
 {'answer': 'We accept major credit cards, debit cards, and PayPal as payment '
            'methods for online orders.',
  'question': 'What payment methods do you accept?'},
 {'answer': 'You can track your order by logging into your account and '
            "navigating to the 'Order History' section. There, you will find "
            'the tracking information for your shipment.',
  'question': 'How can I track my order?'},
 {'answer': 'Our return policy allows you to return products within 30 days of '
            'purchase for a full refund, provided they are in their original '
            'condition and packaging. Please refer to our Returns page for '
            'detailed instructions.',
  'question': 'What is your return policy?'},

## Split

### Using `JSON` loader

In [6]:
from langchain_community.document_loaders import JSONLoader
from langchain_core.documents import Document

loader = JSONLoader(
    file_path='FAQ.json',
    jq_schema='.[]',
    text_content=False)

data = loader.load()

# Process the data
data = [Document(page_content=doc.page_content) for doc in data]

# Print the processed data
pprint(data[:10])

[Document(page_content='{"question": "How can I create an account?", "answer": "To create an account, click on the \'Sign Up\' button on the top right corner of our website and follow the instructions to complete the registration process."}'),
 Document(page_content='{"question": "What payment methods do you accept?", "answer": "We accept major credit cards, debit cards, and PayPal as payment methods for online orders."}'),
 Document(page_content='{"question": "How can I track my order?", "answer": "You can track your order by logging into your account and navigating to the \'Order History\' section. There, you will find the tracking information for your shipment."}'),
 Document(page_content='{"question": "What is your return policy?", "answer": "Our return policy allows you to return products within 30 days of purchase for a full refund, provided they are in their original condition and packaging. Please refer to our Returns page for detailed instructions."}'),
 Document(page_content=

## Store

### Generate Embeddings

* Load embedding Model from HuggingFaceHub

In [7]:
# load embedding model from Hugging Face
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

### Chroma Vector Store

* Convert FAQs into embeddings
* Save the vector store using Croma
* `retriever` retrieves similar 5 documents to query.
* Then evaluated.

In [8]:
# save to disk
db2 = Chroma.from_documents(data, embeddings, persist_directory="./chroma_db")

## Retriever and Generation: Retrieve

In [9]:
retriever = db2.as_retriever(search_type="similarity", search_kwargs={"k": 5})
retrieved_docs = retriever.invoke("What should I do if my package is lost or damaged?")
pprint(retrieved_docs)

[Document(page_content='{"question": "What should I do if my package is lost or damaged?", "answer": "If your package is lost or damaged during transit, please contact our customer support team immediately. We will initiate an investigation and take the necessary steps to resolve the issue."}'),
 Document(page_content='{"question": "Can I return a product if it was damaged during shipping?", "answer": "If your product was damaged during shipping, please contact our customer support team immediately. We will guide you through the return and replacement process."}'),
 Document(page_content='{"question": "Can I return a product if it was damaged due to mishandling during shipping?", "answer": "If your product was damaged due to mishandling during shipping, please contact our customer support team immediately. We will assist you with the necessary steps for return and replacement."}'),
 Document(page_content='{"question": "Can I return a product if I no longer have the original packaging?"

## Retrieval and Generation: Generate


Let’s put it all together into a chain that takes a question, retrieves relevant documents, constructs a prompt, passes that to a model, and parses the output.

We’ll use the GOOGLE `Gemini` chat model.

In [46]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash",
                             temperature=0.1,
                             max_tokens=100,
                             top_p=0.9,
                             google_api_key="AIzaSyAwFJZW0I1gA_954Wih96vDb3T0b-L9p84")

system_prompt = (
"""
**System Prompt:**

As a virtual assistant for our bustling online marketplace, your role is to provide swift and accurate responses to customer inquiries. Our customers are the lifeblood of our business, and we want to ensure they feel valued and supported. Your mission is to enhance their experience by offering immediate assistance with order status updates, guiding them through our various payment methods, and clarifying our return policies. With your help, our customer support team can focus their energies on addressing more complex issues, knowing that you've got the basics covered.

Remember, clarity and brevity are key. Please provide concise responses in no more than three sentences, and always remain friendly and professional. The satisfaction of our customers depends on it!

**Context:**
    - For detailed guidance use the context below.
    - Look for {{"Question": "question asked", "Answer": "answer to the question"}} in the context.
    - Remainder: if you don't know Simply say You don't know with appropriate tone.
    \n\n
    {context}
    """
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{Question}"),
    ]
)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "Question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Example user query
user_query = "Can I change my order after it has been placed?"

# Create a prompt template for correcting the user query
correction_prompt_template = PromptTemplate(
    input_variables=["query"],
    template="Please correct the following user query as a standalone question for grammar and spelling if there are any errors. Otherwise, return the same query: `{query}`",
)

# Use the LLM to create a standalone question
standalone_question = llm.invoke(
    correction_prompt_template.format(query=user_query)
).content

# # Process the standalone question with the RAG chain
result = rag_chain.invoke(standalone_question)

print(f"Standalone question: {standalone_question}\nResult: {result}")

# for chunk in rag_chain.stream("What should I do if my package is lost or damaged?"):
#     print(chunk, end="", flush=True)

# rag_chain.invoke("Can I change my order after it has been placed?")

Can I change my order after it has been placed? 

If you need to change your order, please contact our customer support team as soon as possible. We will do our best to accommodate your request if the order has not been processed yet. 



In [30]:
retrieved_docs = retriever.invoke("How long shipping take?")

In [31]:
pprint(retrieved_docs)

[Document(page_content='{"question": "How long does shipping take?", "answer": "Shipping times vary depending on the destination and the shipping method chosen. Standard shipping usually takes 3-5 business days, while express shipping can take 1-2 business days."}'),
 Document(page_content='{"question": "What is your price adjustment policy?", "answer": "If a product you purchased goes on sale within 7 days of your purchase, we offer a one-time price adjustment. Please contact our customer support team with your order details to request the adjustment."}'),
 Document(page_content='{"question": "Do you offer international shipping?", "answer": "Yes, we offer international shipping to select countries. The availability and shipping costs will be calculated during the checkout process based on your location."}'),
 Document(page_content='{"question": "Do you offer expedited shipping?", "answer": "Yes, we offer expedited shipping options for faster delivery. During the checkout process, you

In [43]:
rag_chain.invoke("How to track my order?")

'Where can I find the status of my order? \n'

### Now, the code uses an LLM to create a standalone question from the user query before processing it with the RAG chain.

In [42]:
from langchain import LLMChain, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI

# Create the LLM model
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash",
                             temperature=0.1,
                             max_tokens=100,
                             top_p=0.9,
                             google_api_key="AIzaSyAwFJZW0I1gA_954Wih96vDb3T0b-L9p84")

# Define the system prompt
system_prompt = (
    """
    **System Prompt:**

    As a virtual assistant for our bustling online marketplace, your role is to provide swift and accurate responses to customer inquiries. Our customers are the lifeblood of our business, and we want to ensure they feel valued and supported. Your mission is to enhance their experience by offering immediate assistance with order status updates, guiding them through our various payment methods, and clarifying our return policies. With your help, our customer support team can focus their energies on addressing more complex issues, knowing that you've got the basics covered.

    Remember, clarity and brevity are key. Please provide concise responses in no more than three sentences, and always remain friendly and professional. The satisfaction of our customers depends on it!
    """
)

# Define the chat prompt template
prompt_template = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "Please rephrase the following user query as a standalone question: {query}"),
    ]
)

# Define the RAG chain
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "query": RunnablePassthrough()}
    | prompt_template
    | llm
    | StrOutputParser()
)

# Example user query
user_query = "Can I change my order after it has been placed?"

# Create a prompt template for correcting the user query
correction_prompt_template = PromptTemplate(
    input_variables=["query"],
    template="Please correct the following user query as a standalone question for grammar and spelling if there are any errors. Otherwise, return the same query: `{query}`",
)

# Use the LLM to create a standalone question
standalone_question = llm.invoke(
    correction_prompt_template.format(query=user_query)
).content

# Process the standalone question with the RAG chain
result = rag_chain.invoke(standalone_question)

print(f"Standalone question: {standalone_question}\nResult: {result}")

Result: Is it possible to modify an order after it has been placed? 



'Is it possible to modify an order after it has been placed? \n'