In [1]:
import nltk
nltk.download('stopwords')
nltk.download('punkt_tab')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\MongkolChut\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\MongkolChut\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [None]:
import json
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
from transformers import pipeline
from typing import List, Dict
from fuzzywuzzy import fuzz
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

# Load product catalog
def load_products(file_path: str) -> List[Dict]:
    with open(file_path, 'r') as f:
        return json.load(f)

# Product recommendation chatbot with RAG
class ProductRecommendationChatbot:
    def __init__(self, products: List[Dict]):
        self.products = products
        self.retriever_model = SentenceTransformer('multi-qa-MiniLM-L6-cos-v1')
        self.generator = pipeline('text2text-generation', model='google/flan-t5-large')
        self.index = self._build_index()
        self.stop_words = set(stopwords.words('english'))

    def _build_index(self):
        texts = [f"{p['name']} {p['description']} {p['category']}" for p in self.products]
        embeddings = self.retriever_model.encode(
            texts,
            convert_to_numpy=True,
            show_progress_bar=False
        )
        dimension = embeddings.shape[1]
        index = faiss.IndexFlatL2(dimension)
        index.add(embeddings)
        return index

    def preprocess_query(self, query: str) -> str:
        tokens = word_tokenize(query.lower())
        filtered = [w for w in tokens if w not in self.stop_words and w.isalnum()]
        return ' '.join(filtered)

    def keyword_search(self, query: str, k: int = 2) -> List[Dict]:
        scores = []
        for product in self.products:
            text = f"{product['name']} {product['description']} {product['category']}"
            score = fuzz.partial_ratio(query.lower(), text.lower())
            scores.append((score, product))
        scores.sort(reverse=True, key=lambda x: x[0])  # Sort by score
        return [product for _, product in scores[:k]]

    def retrieve_products(self, query: str, k: int = 2) -> List[Dict]:
        processed_query = self.preprocess_query(query)
        query_embedding = self.retriever_model.encode([processed_query], convert_to_numpy=True)
        distances, indices = self.index.search(query_embedding, k)
        vector_results = [self.products[i] for i in indices[0]]
        
        # Compute vector similarities for weighting
        product_texts = [f"{p['name']} {p['description']} {p['category']}" for p in vector_results]
        product_embeddings = self.retriever_model.encode(product_texts, convert_to_numpy=True)
        similarities = np.dot(query_embedding, product_embeddings.T).flatten()
        
        # Keyword search as fallback if vector results are weak
        keyword_results = self.keyword_search(query, k) if max(similarities) < 0.5 else []
        
        # Combine results with weighting
        combined = []
        seen_ids = set()
        for i, product in enumerate(vector_results):
            if product['id'] not in seen_ids:
                combined.append((similarities[i], product))
                seen_ids.add(product['id'])
        for product in keyword_results:
            if product['id'] not in seen_ids:
                combined.append((0.5, product))  # Lower weight for keyword results
                seen_ids.add(product['id'])
        
        # Sort by score and take top k
        combined.sort(reverse=True, key=lambda x: x[0])
        return [product for _, product in combined][:k]

    def is_ambiguous(self, query: str) -> bool:
        generic_terms = {"something", "cool", "nice"}
        tokens = set(self.preprocess_query(query).split())
        return len(tokens) < 3 or any(term in tokens for term in generic_terms)

    def detect_intent(self, query: str) -> Dict:
        query_lower = query.lower()
        return {
            "is_gift": "gift" in query_lower,
            "budget_conscious": "cheap" in query_lower or "affordable" in query_lower,
            "for_travel": "travel" in query_lower or "portable" in query_lower,
            "for_entertainment": "entertainment" in query_lower or "movie" in query_lower or "tv" in query_lower,
            "for_professional": "work" in query_lower or "professional" in query_lower
        }

    def generate_response(self, query: str, retrieved_products: List[Dict]) -> str:
        intent = self.detect_intent(query)
        context = "\n".join([f"{p['name']} ({p['category']}): {p['description']} - Price: ${p['price']}" for p in retrieved_products])
        
        extra_instruction = ""
        if intent["is_gift"]:
            extra_instruction = "Emphasize why this product makes a great gift. "
        if intent["budget_conscious"]:
            extra_instruction = "Focus on affordability and value for money. "
        if intent["for_travel"]:
            extra_instruction = "Highlight portability and travel-friendly features. "
        if intent["for_entertainment"]:
            extra_instruction = "Focus on entertainment features like streaming or display quality. "
        if intent["for_professional"]:
            extra_instruction = "Emphasize productivity and professional use. "
        
        prompt = f"""
        You are a friendly product recommendation assistant. {extra_instruction}Based on the user's query and the provided product information, recommend one product in a conversational tone. Highlight why it suits the user's needs, mention key features, and include the price. Keep the response concise (2-3 sentences).

        User query: {query}
        Relevant products:
        {context}

        Recommendation:
        """
        
        response = self.generator(
            prompt,
            max_length=150,
            num_return_sequences=1,
            truncation=True,
            temperature=0.8,
            top_k=50,
            do_sample=True
        )[0]['generated_text']
        
        return response.strip()

    def recommend(self, query: str, max_price: float = None, category: str = None) -> str:
        if self.is_ambiguous(query):
            return "That’s a bit vague! Could you share more details, like what you’re using it for or your budget?"
        
        retrieved_products = self.retrieve_products(query)
        
        # Apply filters
        filtered_products = retrieved_products
        if max_price:
            filtered_products = [p for p in filtered_products if p['price'] <= max_price]
        if category:
            filtered_products = [p for p in filtered_products if p['category'].lower() == category.lower()]
        
        if not filtered_products:
            return "Sorry, no products match your query with the specified filters. Try adjusting your preferences!"
        
        # Check relevance
        query_embedding = self.retriever_model.encode([self.preprocess_query(query)], convert_to_numpy=True)
        product_texts = [f"{p['name']} {p['description']} {p['category']}" for p in filtered_products]
        product_embeddings = self.retriever_model.encode(product_texts, convert_to_numpy=True)
        similarities = np.dot(query_embedding, product_embeddings.T).flatten()
        
        if max(similarities) < 0.3:
            return "Sorry, I couldn't find any products that match your request. Could you provide more details or try a different query?"
        
        # Sort by price if budget-conscious
        intent = self.detect_intent(query)
        if intent["budget_conscious"]:
            filtered_products.sort(key=lambda x: x['price'])
        
        return self.generate_response(query, filtered_products)

# Main execution
if __name__ == "__main__":
    products = load_products('products.json')
    chatbot = ProductRecommendationChatbot(products)
    
    queries = [
        "What mouse would you recommend for everyday?",
        "What's perfect for laptop gaming?",
        "What exercise equipment would you recommend for home use?",
        "What’s available for home security?",
        "I want something cool"  # Added to test ambiguity handling
    ]
    
    for query in queries:
        response = chatbot.recommend(query)
        print(f"\nQ: {query}")
        print(f"A: {response}")

Device set to use cpu



Q: What mouse would you recommend for everyday?
A: Ergonomic Wireless Mouse is best for productivity and gaming.

Q: What's perfect for laptop gaming?
A: Gaming Laptop (Computers): Powerful gaming laptop with 16GB RAM, NVIDIA RTX 3060 GPU, and 1TB SSD. Great for gamers and professionals. - Price: $1299.99 Mechanical Gaming Keyboard (Accessories): RGB-backlit mechanical keyboard with customizable keys and fast response time. Perfect for competitive gaming.

Q: What exercise equipment would you recommend for home use?
A: Sorry, I couldn't find any products that match your request. Could you provide more details or try a different query?

Q: What’s available for home security?
A: Smart Home Security Camera (Smart Home): Wi-Fi-enabled security camera with night vision, motion detection, and mobile app control. Keeps your home safe.

Q: I want something cool
A: That’s a bit vague! Could you share more details, like what you’re using it for or your budget?
