#  Product Attribute Extraction using NLP

In [1]:
import spacy
import re

In [2]:
nlp = spacy.load("en_core_web_sm")
products = [
    "Nestle Whole Grain Pasta, 500g pack, high in fiber, perfect for healthy diets.",
    "Barilla Gluten-Free Pasta, 400g box, made with corn and rice, gluten-free alternative.",
    "Coca-Cola 1.5L bottle, refreshing drink, best served chilled.",
    "Pepsi 500ml bottle, carbonated soft drink, ideal for gatherings.",
    "Organic Apple Juice, 1L, cold-pressed, no added sugar, premium quality.",
    "Heinz Tomato Ketchup, 750ml bottle, made with vine-ripened tomatoes, no preservatives."
]

In [3]:
weight_pattern = re.compile(r"(\d+(\.\d+)?\s?(g|kg|grams|kilograms))", re.IGNORECASE)
volume_pattern = re.compile(r"(\d+(\.\d+)?\s?(ml|l|liters|milliliters))", re.IGNORECASE)
price_pattern = re.compile(r"(\$\d+(\.\d{2})?)", re.IGNORECASE)

# Function to extract attributes using both NER and regex
def extract_product_attributes(product_description):
    doc = nlp(product_description)
    attributes = {}

    for ent in doc.ents:
        if ent.label_ in ['QUANTITY', 'CARDINAL']:
            attributes['quantity'] = ent.text

    weight_match = weight_pattern.search(product_description)
    if weight_match:
        attributes['weight'] = weight_match.group(0)

    volume_match = volume_pattern.search(product_description)
    if volume_match:
        attributes['volume'] = volume_match.group(0)

    price_match = price_pattern.search(product_description)
    if price_match:
        attributes['price'] = price_match.group(0)

    return attributes

In [4]:
for product in products:
    attributes = extract_product_attributes(product)
    print(f"\nProduct Description: {product}")
    print(f"Extracted Attributes: {attributes}")


Product Description: Nestle Whole Grain Pasta, 500g pack, high in fiber, perfect for healthy diets.
Extracted Attributes: {'quantity': '500', 'weight': '500g'}

Product Description: Barilla Gluten-Free Pasta, 400g box, made with corn and rice, gluten-free alternative.
Extracted Attributes: {'quantity': '400', 'weight': '400g'}

Product Description: Coca-Cola 1.5L bottle, refreshing drink, best served chilled.
Extracted Attributes: {'volume': '1.5L'}

Product Description: Pepsi 500ml bottle, carbonated soft drink, ideal for gatherings.
Extracted Attributes: {'volume': '500ml'}

Product Description: Organic Apple Juice, 1L, cold-pressed, no added sugar, premium quality.
Extracted Attributes: {'volume': '1L'}

Product Description: Heinz Tomato Ketchup, 750ml bottle, made with vine-ripened tomatoes, no preservatives.
Extracted Attributes: {'volume': '750ml'}
