In [36]:
# from langchain.chat_models import ChatOpenAI
from langchain.agents import Tool, initialize_agent
from langchain.vectorstores import FAISS
# from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import PromptTemplate
# from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
# from langchain.tools import tool
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_groq import ChatGroq
from langchain_community.document_loaders import JSONLoader
from langchain import HuggingFaceHub

In [37]:
import os
from dotenv import load_dotenv
load_dotenv()

# Load environment variables
HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_TOKEN")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
LANGSMITH_API_KEY = os.getenv("LANGSMITH_API_KEY")
LANGSMITH_PROJECT = os.getenv("LANGSMITH_PROJECT")
LANGSMITH_TRACING = os.getenv("LANGSMITH_TRACING")
LANGSMITH_ENDPOINT = os.getenv("LANGSMITH_ENDPOINT")
os.environ["HUGGINGFACE_API_KEY"] = HUGGINGFACE_API_KEY
os.environ["GROQ_API_KEY"] = "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"
os.environ["LANGSMITH_API_KEY"] = LANGSMITH_API_KEY
os.environ["LANGSMITH_PROJECT"] = LANGSMITH_PROJECT
os.environ["LANGSMITH_TRACING"] = LANGSMITH_TRACING
os.environ["LANGSMITH_ENDPOINT"] = LANGSMITH_ENDPOINT


In [24]:
import json
import numpy as np
from langchain_core.documents import Document

reviews = []
# Load review JSON
with open("All_Beauty.jsonl", "r") as f:
    for line in f:
        review = json.loads(line)
        reviews.append({
            "asin": review["asin"],
            "user_id": review["user_id"],
            "timestamp": review["timestamp"],
            "title": review["title"],
            "text": review["text"]
        })

# Convert to LangChain Documents with metadata
documents = []
for r in reviews:
    title = r["title"]
    text = r["text"]
    metadata = {
        "asin": r["asin"],
        "user_id": r["user_id"],
        "timestamp": r["timestamp"]
    }
    documents.append(Document(page_content=title + ", " + text, metadata=metadata))
    if(len(documents) > 10000):
        break

# Generate embeddings using OpenAI or other LangChain-compatible embedder
embedding_model = HuggingFaceEmbeddings()

# Create FAISS index from the documents
faiss_index = FAISS.from_documents(documents, embedding_model)

# Save index locally
faiss_index.save_local("faiss_review_index")

  embedding_model = HuggingFaceEmbeddings()


In [38]:
from datetime import datetime, timedelta
from langchain.vectorstores import FAISS
from langchain.schema import Document

def add_embedding(review_title, review_text, metadata):
    # vectorstore = FAISS.load_local("faiss_review_index", HuggingFaceEmbeddings())
    review_doc = Document(
        page_content=f"{review_title}\n{review_text}",
        metadata=metadata
    )
    faiss_index = FAISS.load_local("faiss_review_index", HuggingFaceEmbeddings(), allow_dangerous_deserialization=True)
    ### check if the review is already in the index
    existing_docs = faiss_index.similarity_search(
        query=review_title,
        k=1,
    )

    if len(existing_docs) == 0:
        faiss_index.add_documents([review_doc])

def check_review_spike(metadata):
    user_id = metadata.get("user_id")
    # vectorstore = FAISS.load_local("faiss_review_index", HuggingFaceEmbeddings())
    # faiss_review_index = FAISS(vectorstore=vectorstore)
    faiss_index = FAISS.load_local("faiss_review_index", HuggingFaceEmbeddings(), allow_dangerous_deserialization=True)
    user_reviews = faiss_index.similarity_search(
        query=".",
        k=100,
        filter={"user_id": user_id}
    )

    currtime = datetime.now()
    time_threshold = currtime - timedelta(minutes=60)
    recent_reviews = [
        review for review in user_reviews
        if datetime.fromisoformat(review.metadata["timestamp"]) >= time_threshold
    ]

    if len(recent_reviews) > 10:
        return True, f"User {user_id} has posted {len(recent_reviews)} reviews in the last hour, which is more than the allowed limit."
    return False, f"User {user_id} has posted {len(recent_reviews)} reviews in the last hour, which is within the allowed limit."


In [39]:
import re
from datetime import datetime

def extract_review_fields(review_str):
    def extract_value(key, text):
        pattern = rf"{key}:\s*(.+)"
        match = re.search(pattern, text)
        return match.group(1).strip() if match else ""
    
    return {
        "title": extract_value("Title", review_str),
        "text": extract_value("Text", review_str),
        "user_id": extract_value("User ID", review_str),
        "asin": extract_value("ASIN", review_str),
        "timestamp": extract_value("Timestamp", review_str) or datetime.now().isoformat()
    }


In [40]:
def sentiment_analysis_tool(review_str):
    from langchain_core.prompts import ChatPromptTemplate
    from langchain_groq import ChatGroq

    fields = extract_review_fields(review_str)
    full_review = fields["title"] + ". " + fields["text"]

    prompt = ChatPromptTemplate.from_messages([
        ("system", "You are a helpful assistant that analyzes product reviews."),
        ("user", "Analyze the sentiment of the following review: {review} and classify it as Positive, Negative, or Neutral.")
    ])

    chain = prompt | ChatGroq(model_name="llama3-8b-8192", temperature=0.1)
    sentiment = chain.invoke({"review": full_review})
    return sentiment.content


In [41]:
def ai_detection_tool(review_str):
    from langchain_core.prompts import ChatPromptTemplate
    from langchain_groq import ChatGroq

    fields = extract_review_fields(review_str)
    full_review = fields["title"] + ". " + fields["text"]

    prompt = ChatPromptTemplate.from_messages([
        ("system", "You are a helpful assistant that detects AI-generated text."),
        ("user", "Determine if the following review is AI-generated or human-written: {review} and classify it as AI-generated or Human-written.")
    ])

    chain = prompt | ChatGroq(model_name="llama3-8b-8192", temperature=0.1)
    ai_verdict = chain.invoke({"review": full_review})
    return ai_verdict.content


In [42]:
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

# assume faiss_index is globally available or injected

def similarity_search_tool(review_str):
    fields = extract_review_fields(review_str)
    full_review = fields["title"] + ". " + fields["text"]

    embedding = HuggingFaceEmbeddings().embed_query(full_review)
    faiss_index = FAISS.load_local("faiss_review_index", HuggingFaceEmbeddings(), allow_dangerous_deserialization=True)
    results = faiss_index.similarity_search_by_vector(embedding, k=100)
    similar_reviews = [res.page_content for res in results]

    return {
        "flagged": len(similar_reviews) > 0,
        "matches": similar_reviews
    }


In [43]:
def spike_detection_tool(review_str):
    fields = extract_review_fields(review_str)
    metadata = {
        "user_id": fields["user_id"],
        "timestamp": fields["timestamp"]
    }

    # Placeholder for your own spike logic
    spike = check_review_spike(metadata)  # assumed to be implemented
    return spike


In [44]:
def add_embedding_tool(review_str):
    fields = extract_review_fields(review_str)
    full_review = fields["title"] + ". " + fields["text"]
    
    metadata = {
        "user_id": fields["user_id"],
        "asin": fields["asin"],
        "timestamp": fields["timestamp"]
    }

    add_embedding(fields["title"], fields["text"], metadata)  # your existing method
    return "Embedding added successfully."


In [45]:
tools = [
    Tool(name="SentimentAnalysis", func=sentiment_analysis_tool, description="Analyze review sentiment."),
    Tool(name="AIDetection", func=ai_detection_tool, description="Detect if a review is AI-generated."),
    Tool(name="SimilarityCheck", func=similarity_search_tool, description="Check review similarity."),
    Tool(name="SpikeDetection", func=spike_detection_tool, description="Detect review spike by user."),
    Tool(name="AddEmbedding", func=add_embedding_tool, description="Store review in vector DB."),
]


In [47]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq

test_title = "Such a lovely scent but not overpowering."
test_review = "This spray is really nice. It smells really good, goes on really fine, and does the trick. I will say it feels like you need a lot of it though to get the texture I want. I have a lot of hair, medium thickness. I am comparing to other brands with yucky chemicals so I'm gonna stick with this. Try it!"
test_user_id = "AGKHLEW2SOWHNMFQIJGBECAF7INQ"
metadata = {
    "asin": "B000123456",
    "user_id": "AGKHLEW2SOWHNMFQIJGBECAF7INQ",
    "timestamp": datetime.now().isoformat()
}

review = {
    "title": test_title,
    "text": test_review,
    "metadata": metadata
}

review_input = f"""Review Analysis Request:
Title: {review['title']}
Text: {review['text']}
User ID: {review['metadata']['user_id']}
ASIN: {review['metadata']['asin']}
Timestamp: {review['metadata']['timestamp']}
"""
# connect tool to the agent
# tools = [analyze_review_tool]
# Initialize the agent with the Groq model and tools
agent = initialize_agent(
    tools=tools,
    llm=ChatGroq(model_name="llama3-8b-8192", temperature=0.1),
    agent_type="zero-shot-react-description",
    verbose=True
)

agent_prompt = """You are a helpful assistant that analyzes product reviews.
You will receive a review.
Your tasks are:
1. Analyze the sentiment of the review and classify it as Positive, Negative, or Neutral.
2. Determine if the review is AI-generated or human-written.
3. Check if the review is similar to existing reviews in the database.
4. Detect if the user has posted too many reviews in a short time.
5. Store the review in the vector database for future reference.
You will use the following tools:
- SentimentAnalysis: Analyze review sentiment.
- AIDetection: Detect if a review is AI-generated.
- SimilarityCheck: Check review similarity.
- SpikeDetection: Detect review spike by user.
- AddEmbedding: Store review in vector DB.

Respond with the analysis results in a structured format from all the tools used.
"""
# Set the agent's prompt
prompt = ChatPromptTemplate.from_messages([
    ("system", agent_prompt),
    ("user", "{input}")
])

chain = prompt | agent

# Pass the review dictionary directly to the agent
response = chain.invoke({"input": review_input})
print(response["output"])



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to analyze the given review and use the provided tools to extract relevant information.

Action: SentimentAnalysis
Action Input: HumanMessage(content="Review Analysis Request:\nTitle: Such a lovely scent but not overpowering.\nText: This spray is really nice. It smells really good, goes on really fine, and does the trick. I will say it feels like you need a lot of it though to get the texture I want. I have a lot of hair, medium thickness. I am comparing to other brands with yucky chemicals so I'm gonna stick with this. Try it!\nUser ID: AGKHLEW2SOWHNMFQIJGBECAF7INQ\nASIN: B000123456\nTimestamp: 2025-06-18T19:58:44.315544\n")[0m
Observation: [36;1m[1;3mAfter analyzing the review, I would classify it as Positive. Here's why:

* The reviewer uses positive adjectives such as "lovely", "nice", "really good", and "fine" to describe the product.
* They mention that the product "does the trick", implying that it m

  embedding = HuggingFaceEmbeddings().embed_query(full_review)
  faiss_index = FAISS.load_local("faiss_review_index", HuggingFaceEmbeddings(), allow_dangerous_deserialization=True)



Observation: [38;5;200m[1;3m{'flagged': True, 'matches': ["Such a lovely scent but not overpowering., This spray is really nice. It smells really good, goes on really fine, and does the trick. I will say it feels like you need a lot of it though to get the texture I want. I have a lot of hair, medium thickness. I am comparing to other brands with yucky chemicals so I'm gonna stick with this. Try it!", 'FAVORITE SCENT, One of mine and my mother’s favorite scents! I went to the store to buy her some and they didn’t have it anymore so I was happy to find that they sell the lotion and spray here! The scent is perfect and the scent of the lotion lasts all day!', "Good product!, I love Circle of Friends products and this one is no exception.  The scent is nice, but subtle; not too over-powering.  Works really well on my daughter's fine, often tangled hair.  easy enough for the youngest ones to spray themselves.", 'This product is amazing! I ordered the coconut scent and it smells ..., Thi

APIStatusError: Error code: 413 - {'error': {'message': 'Request too large for model `llama3-8b-8192` in organization `org_01jwhbvzwmfrxveygh1dha0tm7` service tier `on_demand` on tokens per minute (TPM): Limit 6000, Requested 10232, please reduce your message size and try again. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}

In [None]:
# Correct usage: pass the review dictionary
print(analyze_review(review))
