In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys
from dotenv import load_dotenv
load_dotenv()

True

### Data

In [2]:
import  pandas as pd

In [9]:
df_reviews = pd.read_csv("dummy_data_new.csv", index_col=0)

In [10]:
print(df_reviews.shape, df_reviews.columns)
df_reviews.head(3)

(109, 4) Index(['Product Name', 'Product Description', 'Review Text', 'Rating'], dtype='object')


Unnamed: 0,Product Name,Product Description,Review Text,Rating
0,iPhone 15,The Apple iPhone 15 redefines smartphone innov...,The iPhone 15 is a masterpiece! The sleek desi...,"{""durability"": 5, ""ease of use"": 5, ""pleasant ..."
1,MacBook Pro 2023,Experience the ultimate in computing power wit...,The MacBook Pro 2023 is a game-changer! The pe...,"{""durability"": 5, ""ease of use"": 5, ""pleasant ..."
2,Kindle Paperwhite,"Enjoy reading your favorite books anytime, any...",The Kindle Paperwhite is a must-have for book ...,"{""durability"": 5, ""ease of use"": 5, ""pleasant ..."


In [11]:
# Check out some descriptions to use as input
df_reviews["Product Name"].sample(1).iloc[0]

'Olay Regenerist Micro-Sculpting Cream'

### Review Generation

## a) Langchain - OpenAI

In [12]:
!pip --quiet install langchain langchain-community langchain-openai chromadb


In [13]:
from langchain.chains import RetrievalQA
# from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_text_splitters import Language
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import Chroma
from langchain.schema.document import Document

In [14]:
def get_text_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
    docs = [Document(page_content=x) for x in text_splitter.split_text(text)]
    return docs

def embed_texts_openai(texts, openai_api_key):
    print(f"Embedding {len(texts)} texts...", end=' ')
    # Instantiate an embedder
    embedder = OpenAIEmbeddings(openai_api_key=openai_api_key)

    # Use the embedder to populate a Chroma vector store with our texts.
    doc_search = Chroma.from_documents(texts, embedder)
    print("✅")
    return doc_search

def run_qa(doc_search, prompt, openai_api_key):
    print(f"Running QA...", end=' ')

    # Retrieval QA
    # - chain_type="stuff": the model 'stuffs' all our texts into a single prompt (sufficiently small)
    # - model: latest GPT-3.5-Turbo model.
    qa = RetrievalQA.from_chain_type(
        llm=ChatOpenAI(model_name="gpt-3.5-turbo", openai_api_key=openai_api_key),
        chain_type="stuff",
        retriever=doc_search.as_retriever(search_kwargs={"k": 1})  # 1 doc to return max
    )

    answer = qa.invoke(prompt)
    print("✅")
    return answer["result"]

In [15]:
OPEN_API_KEY = os.environ.get('OPENAI_API_KEY')
PRODUCT_DESCRIPTION = "Nike Men's Revolution 5 Running Shoes"
RATINGS = {"Material": 3,  "Fit": 5, "Packaging": 2, "Design": 1, "Comfort": 4}


In [16]:
# Get text chunks
chunks = get_text_chunks(PRODUCT_DESCRIPTION)

In [17]:
# get Chroma vector store
doc_search = embed_texts_openai(chunks, OPEN_API_KEY)
doc_search

Embedding 1 texts... ✅


<langchain_community.vectorstores.chroma.Chroma at 0x7f23aed92da0>

In [22]:
# Review generation with template-based prompt generation

def generate_review(product_description, ratings, doc_search, openai_api_key):
    prompt_template = """
    Product Description: {}
    Ratings:
    {}
    Based on the description and ratings provided, generate 3 different reviews for this product. \
    Write them from a personal perspective as someone who has bought the product \
    without being too informal and \
    without explicitly referring to the fact that the reviews are based on a rating or provided information. \
    Add a different tone of speech to each review so that they do not sound equally.
    """

    rating_text = ''.join([f"{criteria}: {rating}\n" for criteria, rating in ratings.items()])
    prompt = prompt_template.format(product_description, rating_text)
    #print ('prompt ---> ', prompt)
    for criteria, rating in ratings.items():
        if rating >= 4:
            prompt += f"The {product_description} excels in {criteria.lower()} as it offers exceptional {criteria.lower()}.\n"
        elif rating >= 3:
            prompt += f"The {product_description} performs well in terms of {criteria.lower()} with {criteria.lower()} that meet expectations.\n"
        elif rating >= 2:
            prompt += f"The {product_description} has average {criteria.lower()}, providing satisfactory {criteria.lower()}.\n"
        else:
            prompt += f"The {product_description} could improve its {criteria.lower()} as the current {criteria.lower()} is below expectations.\n"
        #print ('prompt ---> ', prompt)
    return run_qa(doc_search, prompt, openai_api_key)



# Generate review
review = generate_review(PRODUCT_DESCRIPTION, RATINGS, doc_search, OPEN_API_KEY)

print(f"Product: {PRODUCT_DESCRIPTION}\n")
print(f"Generated Reviews:\n{review}")

Running QA... ✅
Product: Nike Men's Revolution 5 Running Shoes

Generated Reviews:
1. The Nike Men's Revolution 5 Running Shoes showcase a solid build with materials that deliver on durability and performance. I found the material quality to be top-notch, ensuring a reliable running experience.

2. When it comes to fit, the Nike Men's Revolution 5 Running Shoes truly stand out. The shoes provide a snug and comfortable fit that enhances my runs and keeps my feet secure throughout my workout sessions.

3. While the packaging of the Nike Men's Revolution 5 Running Shoes was decent, the design left something to be desired. I believe an upgrade in the design department would make these shoes a true standout in both style and function.
