In [1]:
import openai
import pandas as pd
import streamlit as st
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.chains import RetrievalQA
import langchain
langchain.verbose = True

In [2]:
openai.api_key = st.secrets["OPENAI_API_KEY"]

In [3]:
pd.set_option("display.max_colwidth", None)
file_name = "./documents/customer_review.csv"
df = pd.read_csv(file_name)

df.head()

Unnamed: 0,Product Name,Customer Review
0,Radiant Glow Highlighting Powder,This powder is awful! It clumps and looks chalky on the skin. Not the radiant glow I was hoping for.
1,LushLips Matte Lipstick in Cherry Bliss,"The color is beautiful, but it's so drying on the lips. Will not be repurchasing."
2,VividEyes Intense Mascara in Jet Black,Clumpy and smudges easily. I looked like a raccoon by the end of the day. Highly disappointed.
3,SkinPerfector BB Cream in Medium,It's okay. Doesn't provide much coverage and feels a bit oily. Good for a light makeup day.
4,ShineNoMore Oil Control Setting Spray,"My makeup stayed put all day! However, it does have a strong chemical smell."


In [4]:
loader = CSVLoader(file_path=file_name)
docs = loader.load()

In [5]:
chunk_size = 128
chunk_overlap = 32

r_text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = chunk_size,
    chunk_overlap = chunk_overlap,
    length_function = len,
    add_start_index = True
)
pages = r_text_splitter.split_documents(docs)

In [6]:
print(docs[0])
print(pages[0])
print(pages[1])

page_content='Product Name: Radiant Glow Highlighting Powder\nCustomer Review: This powder is awful! It clumps and looks chalky on the skin. Not the radiant glow I was hoping for.' metadata={'source': './documents/customer_review.csv', 'row': 0}
page_content='Product Name: Radiant Glow Highlighting Powder' metadata={'source': './documents/customer_review.csv', 'row': 0, 'start_index': 0}
page_content='Customer Review: This powder is awful! It clumps and looks chalky on the skin. Not the radiant glow I was hoping for.' metadata={'source': './documents/customer_review.csv', 'row': 0, 'start_index': 47}


In [7]:
# Creating the Vector DB
embedding = OpenAIEmbeddings(api_key="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")
persist_directory = 'persist_chroma'

vectordb = Chroma.from_documents(
    documents = pages,
    embedding = embedding,
    persist_directory = persist_directory
)

In [8]:
print(vectordb._collection.count())

246


In [9]:
question = "What is the review for Radiant Glow Highlighting Powder?"
docs = vectordb.similarity_search(question)
docs

[Document(page_content='Product Name: Radiant Glow Highlighting Powder', metadata={'row': 0, 'source': './documents/customer_review.csv', 'start_index': 0}),
 Document(page_content='Product Name: Radiant Glow Highlighting Powder', metadata={'row': 70, 'source': './documents/customer_review.csv', 'start_index': 0}),
 Document(page_content='Product Name: Radiant Glow Highlighting Powder', metadata={'row': 50, 'source': './documents/customer_review.csv', 'start_index': 0}),
 Document(page_content='Product Name: Radiant Glow Highlighting Powder', metadata={'row': 60, 'source': './documents/customer_review.csv', 'start_index': 0})]

In [10]:
def pretty_print(text, words_per_line = 15):
  words = text.split()

  for i in range(0, len(words), words_per_line):
    line = ' '.join(words[i:i+words_per_line])
    print(line)

In [11]:
llm_name = "gpt-3.5-turbo"
llm = ChatOpenAI(api_key="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", model=llm_name, temperature=0)

qa_chain_default = RetrievalQA.from_chain_type(
    llm,
    retriever = vectordb.as_retriever(search_kwargs={"k":3}),
    chain_type="stuff",
    return_source_documents=True,
)

In [12]:
question = "What do customers say about Radiant Glow Highlighting Powder?"
result = qa_chain_default({"query": question})

  warn_deprecated(




[1m> Entering new RetrievalQA chain...[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mSystem: Use the following pieces of context to answer the user's question. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
Product Name: Radiant Glow Highlighting Powder

Product Name: Radiant Glow Highlighting Powder

Product Name: Radiant Glow Highlighting Powder
Human: What do customers say about Radiant Glow Highlighting Powder?[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


In [13]:
result

{'query': 'What do customers say about Radiant Glow Highlighting Powder?',
 'result': "I don't have access to specific customer reviews or feedback for the Radiant Glow Highlighting Powder. You may want to check online beauty websites, social media platforms, or beauty forums for customer reviews and opinions on this product.",
 'source_documents': [Document(page_content='Product Name: Radiant Glow Highlighting Powder', metadata={'row': 0, 'source': './documents/customer_review.csv', 'start_index': 0}),
  Document(page_content='Product Name: Radiant Glow Highlighting Powder', metadata={'row': 50, 'source': './documents/customer_review.csv', 'start_index': 0}),
  Document(page_content='Product Name: Radiant Glow Highlighting Powder', metadata={'row': 60, 'source': './documents/customer_review.csv', 'start_index': 0})]}

In [14]:
pretty_print(result.get("result"))

I don't have access to specific customer reviews or feedback for the Radiant Glow Highlighting
Powder. You may want to check online beauty websites, social media platforms, or beauty forums
for customer reviews and opinions on this product.
