In [1]:
import os
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.chat_models import ChatHuggingFace
from langchain.llms import HuggingFaceHub
from langchain.prompts import PromptTemplate
from langchain.document_loaders import CSVLoader 
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain_groq import ChatGroq
from dotenv import load_dotenv 

In [2]:
load_dotenv()

True

In [3]:
loader = CSVLoader('train_expanded.csv')
data = loader.load()

In [4]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=512,
    chunk_overlap=150
)

In [5]:
chunks = text_splitter.split_documents(data)
len(chunks)

200

In [None]:
docs = [chunk.page_content for chunk in chunks]
docs

In [6]:
## Embedding Using Huggingface
huggingface_embeddings=HuggingFaceBgeEmbeddings(
    model_name="BAAI/bge-small-en-v1.5",  
    model_kwargs={'device':'cpu'},
    encode_kwargs={'normalize_embeddings':True}

)

  from tqdm.autonotebook import tqdm, trange


In [7]:
huggingface_embeddings.embed_query(chunks[0].page_content)

[-0.01635267212986946,
 -0.04538872465491295,
 -0.03867955505847931,
 0.004661990329623222,
 -0.05257056653499603,
 0.09981826692819595,
 0.04248807951807976,
 0.009214378893375397,
 0.01786765083670616,
 -0.015138610266149044,
 0.02676539309322834,
 -0.01927202381193638,
 -0.002228539204224944,
 -0.011905294843018055,
 0.04070347175002098,
 0.05208386108279228,
 -0.07902739942073822,
 0.007584760896861553,
 -0.03155503422021866,
 0.0566803403198719,
 -0.04390163719654083,
 0.009192083962261677,
 0.028469102457165718,
 0.0011489397147670388,
 0.010585298761725426,
 -0.039674315601587296,
 -0.0009086871868930757,
 0.045808710157871246,
 -0.036040209233760834,
 -0.053454820066690445,
 0.012678314931690693,
 -0.06173388659954071,
 0.0521811805665493,
 -0.027669573202729225,
 0.0060242242179811,
 -0.011176963336765766,
 0.008166760206222534,
 -0.0017111579654738307,
 -0.016264716163277626,
 -0.0038718378636986017,
 -0.002357251476496458,
 -0.022841207683086395,
 -0.03287297487258911,
 -0.0

In [19]:
# Assuming 'chunks' is your list of documents and 'huggingface_embeddings' is your embedding function
# embeddings = huggingface_embeddings.embed_documents(docs)

# Create the FAISS vectorstore
vectorstore = FAISS.from_documents(chunks, embedding=huggingface_embeddings)
vectorstore.save_local("faiss_index")

In [18]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})

In [21]:
vectorstore = FAISS.load_local("faiss_index", embeddings=huggingface_embeddings, allow_dangerous_deserialization=True)

In [22]:
## Query using Similarity Search
query="Can I return a product if I changed my mind?"
relevant_docments=vectorstore.similarity_search(query)
# relevant_docments=retriever

print(relevant_docments[0].page_content)

question: Can I return a product if I changed my mind?
answer: Yes, you can return a product if you changed your mind. Please ensure the product is in its original condition and packaging, and refer to our return policy for instructions.


In [23]:
chat = ChatGroq(
    temperature=0,
    model="llama3-70b-8192"
)

In [12]:
from langchain_community.llms import HuggingFaceHub

llm=HuggingFaceHub(repo_id="meta-llama/Meta-Llama-3-8B")
# llm = HuggingFaceHub(repo_id="Tayyab-44/Phi-3-8B-Instruct-Customer-Support-RAG") 
chat_model = ChatHuggingFace(llm=llm)

  warn_deprecated(


tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [24]:
PROMPT_TEMPLATE =  """Virtual Assistant: You're in a bustling virtual marketplace where customers seek assistance. Your task is to help customers by providing accurate answers to their inquiries using the given information.

Context:
{context}

Customer's Question:
{question}
"""

prompt=PromptTemplate(template=PROMPT_TEMPLATE,input_variables=["context","question"])

In [25]:
retrievalQA=RetrievalQA.from_chain_type(
    llm=chat,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt":prompt}
)

In [26]:
query="""Can I order a product if it is listed as 'coming soon' and not available for pre-order?"""

In [27]:
# Call the QA chain with our query.
result = retrievalQA.invoke({"query": query})
print(result['result'])

If a product is listed as 'coming soon' but not available for pre-order, you will need to wait until it is officially released and becomes available for purchase.
