In [39]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import re

nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('stopwords')

def refine_question(question):
    # Define a regular expression to detect product IDs (e.g., B07XYDG2R2)
    product_id_pattern = r'\b[A-Z0-9]{10}\b'
    
    # Check if the question contains a product ID
    product_id_match = re.search(product_id_pattern, question)
    
    if product_id_match:
        # If a product ID is found, return it
        return product_id_match.group(0)
    
    # Tokenize the question
    words = word_tokenize(question)
    
    # Perform POS tagging
    pos_tags = nltk.pos_tag(words)
    
    # List of words to exclude, converted to lowercase for case-insensitive comparison
    exclude_words = {word.lower() for word in ['provide', 'details', 'about', 'tell', 'me', 'for', 'looking', 'what', 'is', 'the', 'product', 'id']}
    
    # Define relevant POS tags
    relevant_pos = {'NN', 'NNS', 'JJ', 'NNP', 'NNPS'}
    
    # Extract relevant words based on POS tags, excluding specific terms, and maintain original order
    keywords = [word for word, pos in pos_tags if pos in relevant_pos and word.lower() not in exclude_words]
    
    # If no keywords were found, return the original question
    if not keywords:
        return question
    
    # Join keywords to form the refined query
    refined_question = ' '.join(keywords)
    
    return refined_question

# Example usage
question1 = "Provide details about B07XYDG2R2"
question2 = "Tell me about the bioworld grinch big face embroid winter hat green"
question3 = "Looking for a winter hat with Grinch embroidery"
question4 = "What is the product id for Heirloom Cloth Ladies Tank Top"

print(refine_question(question1))  # Should return: B07XYDG2R2
print(refine_question(question2))  # Should return: bioworld grinch big face embroid winter hat green
print(refine_question(question3))  # Should now correctly return: winter hat Grinch embroidery
print(refine_question(question4))  # Should return: Heirloom Cloth Ladies Tank Top


B07XYDG2R2
bioworld grinch big face winter hat green
winter hat Grinch embroidery
Heirloom Cloth Ladies Tank Top


[nltk_data] Downloading package punkt to /Users/noel_niko/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/noel_niko/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/noel_niko/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [40]:
print(refine_question("Looking for a winter hat with Grinch embroidery")) 

winter hat Grinch embroidery


In [41]:
print(refine_question("What is the product id for Heirloom Cloth Ladies Tank Top"))

Heirloom Cloth Ladies Tank Top
