In [1]:
import networkx as nx
import pandas as pd
import numpy as np
from dataclasses import dataclass, field
from typing import List, Dict, Any, Optional, Tuple


In [2]:
@dataclass
class Product:
    product_id: str
    name: str
    category: str
    brand: str
    price: float
    in_stock: bool
    tags: List[str] = field(default_factory=list)

@dataclass
class Recommendation:
    product: Product
    score: float
    rule_tags: List[str]
    explanation: str


In [3]:
products_data = [
    # Dairy / Milk & Curd
    Product("P001", "Amul Toned Milk 1L", "Dairy", "Amul", 65, False, ["veg_only", "dairy", "milk", "breakfast_item"]),
    Product("P002", "Amul Lactose Free Milk 1L", "Dairy", "Amul", 90, True, ["veg_only", "lactose_free", "dairy", "milk", "breakfast_item"]),
    Product("P003", "Mother Dairy Toned Milk 1L", "Dairy", "Mother Dairy", 62, True, ["veg_only", "dairy", "milk", "breakfast_item"]),
    Product("P004", "Amul Cheese Slices 10pcs", "Dairy", "Amul", 120, True, ["veg_only", "dairy", "cheese", "sandwich"]),
    Product("P005", "Britannia Cheese Block 200g", "Dairy", "Britannia", 110, True, ["veg_only", "dairy", "cheese"]),
    Product("P006", "Amul Low Fat Curd 400g", "Dairy", "Amul", 60, True, ["veg_only", "low_fat", "dairy", "curd"]),
    Product("P007", "Mother Dairy Curd 400g", "Dairy", "Mother Dairy", 55, True, ["veg_only", "dairy", "curd"]),
    Product("P008", "Amul Probiotic Dahi 400g", "Dairy", "Amul", 70, False, ["veg_only", "probiotic", "dairy", "curd"]),

    # Extra dairy items
    Product("P031", "Amul Fresh Cream 200ml", "Dairy", "Amul", 75, True, ["veg_only", "dairy", "cream", "cooking"]),
    Product("P032", "Mother Dairy Paneer 200g", "Dairy", "Mother Dairy", 90, True, ["veg_only", "dairy", "paneer", "high_protein"]),
    Product("P033", "Amul Paneer 200g", "Dairy", "Amul", 95, True, ["veg_only", "dairy", "paneer", "high_protein"]),

    # Bakery / Bread & Buns
    Product("P009", "Britannia Whole Wheat Bread 400g", "Bakery", "Britannia", 45, True, ["veg_only", "high_fiber", "bread", "breakfast_item"]),
    Product("P010", "Britannia White Bread 400g", "Bakery", "Britannia", 40, True, ["veg_only", "bread", "breakfast_item"]),
    Product("P011", "Modern Multigrain Bread 400g", "Bakery", "Modern", 50, True, ["veg_only", "high_fiber", "bread", "breakfast_item"]),
    Product("P012", "Gluten Free Brown Bread 300g", "Bakery", "HealthLife", 80, True, ["veg_only", "gluten_free", "high_fiber", "bread"]),
    Product("P013", "Whole Wheat Burger Buns 4pcs", "Bakery", "Britannia", 60, True, ["veg_only", "bread", "buns", "snack"]),
    Product("P014", "Garlic Bread Loaf 250g", "Bakery", "BakeryFresh", 70, False, ["veg_only", "bread", "snack"]),

    # Extra bakery items (biscuits, rusk)
    Product("P034", "Britannia Marie Gold Biscuits 200g", "Bakery", "Britannia", 35, True, ["veg_only", "biscuit", "tea_time", "light_snack"]),
    Product("P035", "Parle G Biscuits 250g", "Bakery", "Parle", 30, True, ["veg_only", "biscuit", "tea_time"]),
    Product("P036", "Rusk Toast 200g", "Bakery", "LocalBake", 40, True, ["veg_only", "biscuit", "rusk", "tea_time"]),

    # Snacks / Namkeen & healthy snacks
    Product("P015", "Lays Classic Salted 52g", "Snacks", "Lays", 20, True, ["veg_only", "snack", "evening_snack"]),
    Product("P016", "Lays Baked Chips 50g", "Snacks", "Lays", 30, True, ["veg_only", "low_fat", "snack", "evening_snack"]),
    Product("P017", "Kurkure Masala Munch 50g", "Snacks", "Kurkure", 20, True, ["veg_only", "snack", "evening_snack"]),
    Product("P018", "Healthy Mix Nuts 100g", "Snacks", "NutriBox", 150, True, ["veg_only", "high_protein", "snack", "healthy_snack"]),
    Product("P019", "Roasted Chana 100g", "Snacks", "NutriBox", 60, True, ["veg_only", "high_protein", "low_fat", "snack", "healthy_snack"]),
    Product("P020", "Protein Snack Bar 40g", "Snacks", "FitLife", 80, True, ["veg_only", "high_protein", "gluten_free", "snack", "on_the_go"]),

    # Extra snacks
    Product("P037", "Salted Peanuts 100g", "Snacks", "NutriBox", 50, True, ["veg_only", "snack", "evening_snack"]),
    Product("P038", "Masala Fox Nuts (Makhana) 70g", "Snacks", "NutriBox", 120, True, ["veg_only", "healthy_snack", "low_fat", "snack"]),
    Product("P039", "Baked Nachos 75g", "Snacks", "FitLife", 70, True, ["veg_only", "snack", "low_fat"]),

    # Beverages / Juices & tea
    Product("P021", "Real Orange Juice 1L", "Beverages", "Real", 110, True, ["veg_only", "juice", "beverage"]),
    Product("P022", "Real Mixed Fruit Juice 1L", "Beverages", "Real", 115, True, ["veg_only", "juice", "beverage"]),
    Product("P023", "Sugar Free Green Tea 25 bags", "Beverages", "HealthLife", 130, True, ["veg_only", "sugar_free", "tea", "beverage"]),
    Product("P024", "Low Sugar Lemon Juice 1L", "Beverages", "Real", 100, False, ["veg_only", "low_sugar", "juice", "beverage"]),
    Product("P025", "Coconut Water 500ml", "Beverages", "HydroFresh", 45, True, ["veg_only", "beverage", "hydration"]),

    # Extra beverages
    Product("P040", "Masala Chai Instant Premix 200g", "Beverages", "TeaTime", 95, True, ["veg_only", "tea", "beverage"]),
    Product("P041", "Cold Coffee Can 200ml", "Beverages", "Caf√©Cool", 60, True, ["veg_only", "beverage", "coffee"]),
    Product("P042", "Electrolyte Drink 500ml", "Beverages", "HydroFresh", 55, True, ["veg_only", "beverage", "hydration"]),

    # Health / Special
    Product("P026", "Lactose Free Yogurt 200g", "Health", "HealthLife", 55, True, ["veg_only", "lactose_free", "dairy", "curd"]),
    Product("P027", "Sugar Free Digestive Biscuits 200g", "Health", "HealthLife", 70, True, ["veg_only", "sugar_free", "high_fiber", "biscuit", "tea_time"]),
    Product("P028", "Gluten Free Protein Cookies 150g", "Health", "FitLife", 120, True, ["veg_only", "gluten_free", "high_protein", "biscuit", "healthy_snack"]),
    Product("P029", "High Protein Milk Drink 250ml", "Health", "Amul", 60, True, ["veg_only", "high_protein", "dairy", "beverage"]),
    Product("P030", "Sugar Free Dark Chocolate 80g", "Health", "ChocoFit", 150, True, ["veg_only", "sugar_free", "chocolate", "treat"]),

    # Extra health products
    Product("P043", "Multigrain Energy Bar 35g", "Health", "FitLife", 45, True, ["veg_only", "high_protein", "healthy_snack", "on_the_go"]),
    Product("P044", "Oats 1kg", "Health", "HealthLife", 120, True, ["veg_only", "high_fiber", "breakfast_item"]),
    Product("P045", "Chia Seeds 200g", "Health", "HealthLife", 160, True, ["veg_only", "high_fiber", "healthy_snack"]),
]

len(products_data)


45

In [4]:
# Cell 5: Quick view of catalog

df_products = pd.DataFrame([p.__dict__ for p in products_data])
df_products.head()


Unnamed: 0,product_id,name,category,brand,price,in_stock,tags
0,P001,Amul Toned Milk 1L,Dairy,Amul,65,False,"[veg_only, dairy, milk, breakfast_item]"
1,P002,Amul Lactose Free Milk 1L,Dairy,Amul,90,True,"[veg_only, lactose_free, dairy, milk, breakfas..."
2,P003,Mother Dairy Toned Milk 1L,Dairy,Mother Dairy,62,True,"[veg_only, dairy, milk, breakfast_item]"
3,P004,Amul Cheese Slices 10pcs,Dairy,Amul,120,True,"[veg_only, dairy, cheese, sandwich]"
4,P005,Britannia Cheese Block 200g,Dairy,Britannia,110,True,"[veg_only, dairy, cheese]"


In [5]:
# Cell 6: Define categories and "similar categories" (for related suggestions)

categories = ["Dairy", "Bakery", "Snacks", "Beverages", "Health"]

# Similarity between categories (domain knowledge, manual)
similar_categories = {
    "Dairy": ["Health", "Snacks"],          # health drinks, protein snacks
    "Bakery": ["Snacks", "Health"],         # biscuits, protein cookies
    "Snacks": ["Bakery", "Health"],         # multigrain/snack bars
    "Beverages": ["Health"],                # health drinks, green tea
    "Health": ["Dairy", "Snacks", "Bakery", "Beverages"],
}
similar_categories


{'Dairy': ['Health', 'Snacks'],
 'Bakery': ['Snacks', 'Health'],
 'Snacks': ['Bakery', 'Health'],
 'Beverages': ['Health'],
 'Health': ['Dairy', 'Snacks', 'Bakery', 'Beverages']}

In [6]:
# Cell 7: Build Knowledge Graph using NetworkX

KG = nx.Graph()

# Node helper
def add_node(node_id: str, node_type: str, **attrs):
    KG.add_node(node_id, node_type=node_type, **attrs)

# Edge helper
def add_edge(src: str, dst: str, edge_type: str, **attrs):
    KG.add_edge(src, dst, edge_type=edge_type, **attrs)

# 1. Add category nodes
for cat in categories:
    add_node(f"category:{cat}", "category", name=cat)

# 2. Add brand nodes and attribute nodes from product data
brands = sorted({p.brand for p in products_data})
attributes = sorted({tag for p in products_data for tag in p.tags})

for brand in brands:
    add_node(f"brand:{brand}", "brand", name=brand)

for attr in attributes:
    add_node(f"attr:{attr}", "attribute", name=attr)

# 3. Add product nodes and edges
for p in products_data:
    prod_id = f"product:{p.product_id}"
    add_node(
        prod_id,
        "product",
        product_id=p.product_id,
        name=p.name,
        category=p.category,
        brand=p.brand,
        price=p.price,
        in_stock=p.in_stock,
        tags=p.tags,
    )
    # IS_A edge: product -> category
    add_edge(prod_id, f"category:{p.category}", "IS_A")

    # HAS_BRAND edge: product -> brand
    add_edge(prod_id, f"brand:{p.brand}", "HAS_BRAND")

    # HAS_ATTRIBUTE edges: product -> each attribute
    for tag in p.tags:
        add_edge(prod_id, f"attr:{tag}", "HAS_ATTRIBUTE")

len(KG.nodes()), len(KG.edges())


(102, 258)

In [7]:
# Cell 8: Add SIMILAR_TO edges between specific products (manual knowledge)

similar_pairs = [
    ("P001", "P003"),  # Amul Toned Milk ~ Mother Dairy Toned Milk
    ("P002", "P026"),  # Lactose free milk ~ Lactose free yogurt
    ("P009", "P011"),  # Whole wheat bread ~ Multigrain bread
    ("P012", "P020"),  # Gluten free bread ~ Protein snack bar
    ("P015", "P016"),  # Regular chips ~ baked chips
    ("P018", "P019"),  # Mix nuts ~ roasted chana
    ("P021", "P022"),  # Real juices similar
    ("P023", "P024"),  # Green tea ~ low sugar lemon juice
    ("P027", "P028"),  # Digestive biscuits ~ protein cookies
    ("P029", "P026"),  # Protein milk drink ~ lactose free yogurt
    ("P034", "P035"),  #biscuits
    ("P032", "P033"),  #paneer
    ("P037", "P038"),  #NutriBox snacks
]

for a, b in similar_pairs:
    add_edge(f"product:{a}", f"product:{b}", "SIMILAR_TO")

len(KG.edges())


271

In [8]:
# Cell 9: Helper functions to retrieve Product objects

id_to_product: Dict[str, Product] = {p.product_id: p for p in products_data}
name_to_product_id: Dict[str, str] = {p.name: p.product_id for p in products_data}

def get_product_by_id(product_id: str) -> Optional[Product]:
    return id_to_product.get(product_id)

def get_product_by_name(name: str) -> Optional[Product]:
    pid = name_to_product_id.get(name)
    if pid is None:
        return None
    return id_to_product[pid]

def list_all_product_names() -> List[str]:
    return sorted(name_to_product_id.keys())

list_all_product_names()[:10]


['Amul Cheese Slices 10pcs',
 'Amul Fresh Cream 200ml',
 'Amul Lactose Free Milk 1L',
 'Amul Low Fat Curd 400g',
 'Amul Paneer 200g',
 'Amul Probiotic Dahi 400g',
 'Amul Toned Milk 1L',
 'Baked Nachos 75g',
 'Britannia Cheese Block 200g',
 'Britannia Marie Gold Biscuits 200g']

In [9]:
# Cell 10: Mapping between Product and KG node IDs

def product_to_node_id(product_id: str) -> str:
    return f"product:{product_id}"

def node_id_to_product(node_id: str) -> Optional[Product]:
    if not node_id.startswith("product:"):
        return None
    pid = node_id.split("product:")[-1]
    return get_product_by_id(pid)


In [10]:
# Cell 11: Exact product availability check

def check_exact_product_availability(
    product_name: str,
    max_price: Optional[float],
    required_tags: List[str],
    preferred_brand: Optional[str] = None,
) -> Tuple[Optional[Product], List[str]]:
    """
    Returns (product, rule_tags) if exact product is available and satisfies constraints,
    otherwise (None, []).
    """
    p = get_product_by_name(product_name)
    if p is None:
        return None, []

    rule_tags = []

    # brand constraint if specified
    if preferred_brand is not None and p.brand != preferred_brand:
        return None, []

    # price constraint
    if (max_price is not None) and (p.price > max_price):
        return None, []

    # stock constraint
    if not p.in_stock:
        return None, []

    # attributes constraint
    if not set(required_tags).issubset(set(p.tags)):
        return None, []

    # rule tags
    rule_tags.append("exact_match_available")
    if preferred_brand is not None:
        rule_tags.append("preferred_brand_respected")
    if required_tags:
        rule_tags.append("all_required_tags_matched")

    return p, rule_tags


In [11]:
# Cell 12: BFS traversal from requested product to find candidate products

from collections import deque

def bfs_candidates(start_prod: Product, max_depth: int = 3) -> List[Product]:
    """
    BFS over KG starting from product node; returns list of product candidates (excluding start).
    Traverses through brand/category/attribute/similar links.
    """
    start_node = product_to_node_id(start_prod.product_id)
    visited = set([start_node])
    queue = deque([(start_node, 0)])
    product_candidates: Dict[str, Product] = {}

    while queue:
        node, depth = queue.popleft()
        if depth >= max_depth:
            continue

        for neighbor in KG.neighbors(node):
            if neighbor in visited:
                continue
            visited.add(neighbor)

            # track depth
            queue.append((neighbor, depth + 1))

            # if neighbor is a product (and not the start), collect
            if neighbor.startswith("product:") and neighbor != start_node:
                p = node_id_to_product(neighbor)
                if p:
                    product_candidates[p.product_id] = p

    return list(product_candidates.values())


In [12]:
# Cell 13: Category closeness

def category_closeness(source_cat: str, target_cat: str) -> float:
    """
    Score:
      1.0 -> same category
      0.7 -> similar category (via similar_categories mapping)
      0.0 -> otherwise
    """
    if source_cat == target_cat:
        return 1.0
    if target_cat in similar_categories.get(source_cat, []):
        return 0.7
    return 0.0

# Quick sanity check
print(category_closeness("Dairy", "Dairy"))
print(category_closeness("Dairy", "Health"))
print(category_closeness("Dairy", "Bakery"))


1.0
0.7
0.0


In [13]:
# Cell 14: Scoring function (classical heuristic, no ML)

def score_candidate(
    requested: Product,
    candidate: Product,
    max_price: Optional[float],
    required_tags: List[str],
    preferred_brand: Optional[str] = None,
) -> Tuple[float, List[str]]:
    """
    Returns (score, rule_tags) if candidate passes hard filters, else (None, []).
    Hard filters:
      - in stock
      - price <= max_price (if given)
      - contains all required tags
    Soft preferences included in score:
      - category closeness
      - brand match
      - attribute richness (contains all required + extra)
      - price advantage (cheaper preferred)
    """
    rule_tags: List[str] = []

    # Hard filters
    if not candidate.in_stock:
        return None, []

    if max_price is not None and candidate.price > max_price:
        return None, []

    if not set(required_tags).issubset(set(candidate.tags)):
        return None, []

    # Passed filters -> compute score
    score = 0.0

    # Category closeness
    cat_score = category_closeness(requested.category, candidate.category)
    score += cat_score * 4.0
    if cat_score == 1.0:
        rule_tags.append("same_category")
    elif cat_score > 0:
        rule_tags.append("similar_category")

    # Brand preference
    if preferred_brand is not None:
        if candidate.brand == preferred_brand:
            score += 3.0
            rule_tags.append("preferred_brand_respected")
        else:
            # small penalty
            score -= 0.5
            rule_tags.append("different_brand_than_preferred")
    else:
        if candidate.brand == requested.brand:
            score += 2.0
            rule_tags.append("same_brand_as_requested")
        else:
            rule_tags.append("different_brand_than_requested")

    # Required tags all matched
    if required_tags:
        rule_tags.append("all_required_tags_matched")
        # extra attributes beyond required -> slight bonus
        extra_attrs = set(candidate.tags) - set(required_tags)
        score += 0.1 * len(extra_attrs)

    # Price advantage: cheaper is better
    if candidate.price < requested.price:
        score += 1.0
        rule_tags.append("cheaper_option")
    elif candidate.price == requested.price:
        score += 0.5
        rule_tags.append("same_price_as_requested")
    else:
        # little penalty for being more expensive
        score -= 0.2
        rule_tags.append("slightly_more_expensive")

    return score, rule_tags


In [14]:
# Cell 15: Rule tags -> human-readable explanation text (deterministic, no random text)

RULE_EXPLANATIONS = {
    "exact_match_available": "The exact product you requested is in stock and satisfies your constraints.",
    "preferred_brand_respected": "Matches your preferred brand.",
    "same_brand_as_requested": "Has the same brand as your requested product.",
    "different_brand_than_requested": "Brand is different from your requested product.",
    "different_brand_than_preferred": "Brand is different from your preferred brand.",
    "same_category": "Belongs to the same category as your requested product.",
    "similar_category": "Belongs to a related category.",
    "all_required_tags_matched": "Matches all of your required tags.",
    "cheaper_option": "This option is cheaper than your requested product.",
    "same_price_as_requested": "This option has the same price as your requested product.",
    "slightly_more_expensive": "This option is slightly more expensive than your requested product.",
}

def build_explanation(rule_tags: List[str], candidate: Product, requested: Product, required_tags: List[str]) -> str:
    """Build a short, rule-based explanation sentence."""
    parts = []
    for tag in rule_tags:
        if tag in RULE_EXPLANATIONS:
            parts.append(RULE_EXPLANATIONS[tag])

    # Add explicit tags info if required
    if required_tags and "all_required_tags_matched" in rule_tags:
        parts.append(f"Required tags satisfied: {', '.join(required_tags)}.")

    # Concise join
    explanation = " ".join(parts)
    return explanation.strip()


In [15]:
# Cell 16: Core reasoning function

def find_alternatives(
    requested_product_name: str,
    max_price: Optional[float],
    required_tags: List[str],
    preferred_brand: Optional[str] = None,
    max_alternatives: int = 3,
) -> Dict[str, Any]:
    """
    High-level function performing:
      1. Exact match check.
      2. If not suitable, BFS-based candidate search.
      3. Scoring, rule tagging, explanation generation.
    Returns a dict with keys:
      - "requested": Product or None
      - "exact_match": dict or None
      - "alternatives": List[Recommendation]
      - "message": str
    """
    result: Dict[str, Any] = {
        "requested": None,
        "exact_match": None,
        "alternatives": [],
        "message": "",
    }

    requested = get_product_by_name(requested_product_name)
    if requested is None:
        result["message"] = "Requested product not found in catalog."
        return result

    result["requested"] = requested

    # 1. Check exact availability
    exact_prod, exact_rules = check_exact_product_availability(
        requested_product_name,
        max_price,
        required_tags,
        preferred_brand,
    )

    if exact_prod is not None:
        explanation = build_explanation(exact_rules, exact_prod, requested, required_tags)
        result["exact_match"] = {
            "product": exact_prod,
            "explanation": explanation,
            "rule_tags": exact_rules,
        }
        result["message"] = "Exact product is available."
        return result

    # 2. BFS-based candidates
    candidates = bfs_candidates(requested, max_depth=3)

    scored: List[Recommendation] = []
    for cand in candidates:
        score, rule_tags = score_candidate(
            requested,
            cand,
            max_price,
            required_tags,
            preferred_brand,
        )
        if score is None:
            continue
        explanation = build_explanation(rule_tags, cand, requested, required_tags)
        scored.append(Recommendation(product=cand, score=score, rule_tags=rule_tags, explanation=explanation))

    # Sort by score descending
    scored_sorted = sorted(scored, key=lambda r: r.score, reverse=True)
    top = scored_sorted[:max_alternatives]

    if not top:
        result["message"] = "Requested product is not suitable or out of stock, and no alternatives were found."
    else:
        result["message"] = "Requested product not suitable / out of stock. Showing alternative suggestions."
        result["alternatives"] = top

    return result


In [16]:
# Cell 17: Pretty printing helpers

def print_product(p: Product):
    print(f"{p.name} [{p.product_id}]")
    print(f"  Category: {p.category}, Brand: {p.brand}")
    print(f"  Price: ‚Çπ{p.price}, In stock: {p.in_stock}")
    print(f"  Tags: {', '.join(p.tags)}")

def print_result(result: Dict[str, Any]):
    print("MESSAGE:", result["message"])
    print()

    if result["requested"]:
        print("Requested product:")
        print_product(result["requested"])
        print()

    if result["exact_match"]:
        print("Exact match:")
        print_product(result["exact_match"]["product"])
        print("Explanation:", result["exact_match"]["explanation"])
        print("Rule tags:", result["exact_match"]["rule_tags"])
        print()
    else:
        print("Exact match: None")
        print()

    if result["alternatives"]:
        print("Alternatives:")
        for i, rec in enumerate(result["alternatives"], 1):
            print(f"#{i}: (score={rec.score:.2f})")
            print_product(rec.product)
            print("Explanation:", rec.explanation)
            print("Rule tags:", rec.rule_tags)
            print()
    else:
        print("Alternatives: None")


In [17]:
# Cell 18: Test 1 ‚Äì Exact match scenario

result1 = find_alternatives(
    requested_product_name="Britannia Whole Wheat Bread 400g",
    max_price=50,
    required_tags=["veg_only", "high_fiber"],
    preferred_brand=None,
)

print_result(result1)


MESSAGE: Exact product is available.

Requested product:
Britannia Whole Wheat Bread 400g [P009]
  Category: Bakery, Brand: Britannia
  Price: ‚Çπ45, In stock: True
  Tags: veg_only, high_fiber, bread, breakfast_item

Exact match:
Britannia Whole Wheat Bread 400g [P009]
  Category: Bakery, Brand: Britannia
  Price: ‚Çπ45, In stock: True
  Tags: veg_only, high_fiber, bread, breakfast_item
Explanation: The exact product you requested is in stock and satisfies your constraints. Matches all of your required tags. Required tags satisfied: veg_only, high_fiber.
Rule tags: ['exact_match_available', 'all_required_tags_matched']

Alternatives: None


In [18]:
# Cell 19: Test 2 ‚Äì Out of stock, find alternatives

# Amul Toned Milk is out of stock in our data
result2 = find_alternatives(
    requested_product_name="Amul Toned Milk 1L",
    max_price=100,
    required_tags=["veg_only"],
    preferred_brand="Amul",  # try to keep same brand
)

print_result(result2)


MESSAGE: Requested product not suitable / out of stock. Showing alternative suggestions.

Requested product:
Amul Toned Milk 1L [P001]
  Category: Dairy, Brand: Amul
  Price: ‚Çπ65, In stock: False
  Tags: veg_only, dairy, milk, breakfast_item

Exact match: None

Alternatives:
#1: (score=8.30)
Amul Low Fat Curd 400g [P006]
  Category: Dairy, Brand: Amul
  Price: ‚Çπ60, In stock: True
  Tags: veg_only, low_fat, dairy, curd
Explanation: Belongs to the same category as your requested product. Matches your preferred brand. Matches all of your required tags. This option is cheaper than your requested product. Required tags satisfied: veg_only.
Rule tags: ['same_category', 'preferred_brand_respected', 'all_required_tags_matched', 'cheaper_option']

#2: (score=7.20)
Amul Lactose Free Milk 1L [P002]
  Category: Dairy, Brand: Amul
  Price: ‚Çπ90, In stock: True
  Tags: veg_only, lactose_free, dairy, milk, breakfast_item
Explanation: Belongs to the same category as your requested product. Matche

In [19]:
# Cell 20: Test 3 ‚Äì Strict tag requirements

result3 = find_alternatives(
    requested_product_name="Amul Toned Milk 1L",
    max_price=150,
    required_tags=["lactose_free"],
    preferred_brand=None,
)

print_result(result3)


MESSAGE: Requested product not suitable / out of stock. Showing alternative suggestions.

Requested product:
Amul Toned Milk 1L [P001]
  Category: Dairy, Brand: Amul
  Price: ‚Çπ65, In stock: False
  Tags: veg_only, dairy, milk, breakfast_item

Exact match: None

Alternatives:
#1: (score=6.20)
Amul Lactose Free Milk 1L [P002]
  Category: Dairy, Brand: Amul
  Price: ‚Çπ90, In stock: True
  Tags: veg_only, lactose_free, dairy, milk, breakfast_item
Explanation: Belongs to the same category as your requested product. Has the same brand as your requested product. Matches all of your required tags. This option is slightly more expensive than your requested product. Required tags satisfied: lactose_free.
Rule tags: ['same_category', 'same_brand_as_requested', 'all_required_tags_matched', 'slightly_more_expensive']

#2: (score=4.10)
Lactose Free Yogurt 200g [P026]
  Category: Health, Brand: HealthLife
  Price: ‚Çπ55, In stock: True
  Tags: veg_only, lactose_free, dairy, curd
Explanation: Belon

In [20]:
# Cell 21: Test 4 ‚Äì Very strict constraints -> likely no alternative

result4 = find_alternatives(
    requested_product_name="Lays Classic Salted 52g",
    max_price=10,
    required_tags=["gluten_free", "high_protein"],
    preferred_brand="Lays",
)

print_result(result4)


MESSAGE: Requested product is not suitable or out of stock, and no alternatives were found.

Requested product:
Lays Classic Salted 52g [P015]
  Category: Snacks, Brand: Lays
  Price: ‚Çπ20, In stock: True
  Tags: veg_only, snack, evening_snack

Exact match: None

Alternatives: None


In [21]:
# Cell 22: Inspect KG structure (sanity checks)

print("Total nodes:", KG.number_of_nodes())
print("Total edges:", KG.number_of_edges())

print("\nSample product node attributes:")
sample_node = product_to_node_id("P001")
KG.nodes[sample_node]


Total nodes: 102
Total edges: 271

Sample product node attributes:


{'node_type': 'product',
 'product_id': 'P001',
 'name': 'Amul Toned Milk 1L',
 'category': 'Dairy',
 'brand': 'Amul',
 'price': 65,
 'in_stock': False,
 'tags': ['veg_only', 'dairy', 'milk', 'breakfast_item']}

In [22]:
# Cell 23: Export KG as JSON-like dictionaries for later saving to file

import json

def export_products_to_json(path: str = "products.json"):
    data = [p.__dict__ for p in products_data]
    with open(path, "w", encoding="utf-8") as f:
        json.dump(data, f, indent=2, ensure_ascii=False)
    print(f"Saved products to {path}")

def export_categories_to_json(path: str = "categories.json"):
    data = [{"name": cat, "similar_to": similar_categories.get(cat, [])} for cat in categories]
    with open(path, "w", encoding="utf-8") as f:
        json.dump(data, f, indent=2, ensure_ascii=False)
    print(f"Saved categories to {path}")

def export_attributes_to_json(path: str = "attributes.json"):
    data = [{"name": attr} for attr in attributes]
    with open(path, "w", encoding="utf-8") as f:
        json.dump(data, f, indent=2, ensure_ascii=False)
    print(f"Saved attributes to {path}")

# Uncomment these when running locally (they write files next to notebook)
export_products_to_json()
export_categories_to_json()
export_attributes_to_json()


Saved products to products.json
Saved categories to categories.json
Saved attributes to attributes.json


In [34]:
# Cell: regenerate app.py with improved dark-blue UI

streamlit_app_code = r'''
import streamlit as st
import json
import os
from dataclasses import dataclass, field
from typing import List, Dict, Any, Optional, Tuple
import networkx as nx
from collections import deque
import matplotlib.pyplot as plt

# -----------------------------
# Data models
# -----------------------------

@dataclass
class Product:
    product_id: str
    name: str
    category: str
    brand: str
    price: float
    in_stock: bool
    tags: List[str] = field(default_factory=list)

@dataclass
class Recommendation:
    product: Product
    score: float
    rule_tags: List[str]
    explanation: str

# -----------------------------
# Load products from JSON
# -----------------------------

@st.cache_data
def load_products() -> List[Product]:
    current_dir = os.path.dirname(os.path.abspath(__file__))
    json_path = os.path.join(current_dir, "products.json")
    with open(json_path, "r", encoding="utf-8") as f:
        data = json.load(f)
    return [Product(**item) for item in data]

products_data = load_products()
id_to_product: Dict[str, Product] = {p.product_id: p for p in products_data}
name_to_product_id: Dict[str, str] = {p.name: p.product_id for p in products_data}

def get_product_by_name(name: str) -> Optional[Product]:
    pid = name_to_product_id.get(name)
    if pid is None:
        return None
    return id_to_product[pid]

# -----------------------------
# Knowledge Graph construction
# -----------------------------

CATEGORIES = ["Dairy", "Bakery", "Snacks", "Beverages", "Health"]

similar_categories: Dict[str, List[str]] = {
    "Dairy": ["Health", "Snacks"],
    "Bakery": ["Snacks", "Health"],
    "Snacks": ["Bakery", "Health"],
    "Beverages": ["Health"],
    "Health": ["Dairy", "Snacks", "Bakery", "Beverages"],
}

def build_kg(products: List[Product]) -> nx.Graph:
    KG = nx.Graph()

    # Categories
    for cat in CATEGORIES:
        KG.add_node(f"category:{cat}", node_type="category", name=cat)

    # Brands and attributes
    brands = sorted({p.brand for p in products})
    attributes = sorted({tag for p in products for tag in p.tags})

    for brand in brands:
        KG.add_node(f"brand:{brand}", node_type="brand", name=brand)

    for attr in attributes:
        KG.add_node(f"attr:{attr}", node_type="attribute", name=attr)

    # Products
    for p in products:
        pid = f"product:{p.product_id}"
        KG.add_node(
            pid,
            node_type="product",
            name=p.name,
            product_id=p.product_id,
            category=p.category,
            brand=p.brand,
            price=p.price,
            in_stock=p.in_stock,
            tags=p.tags,
        )
        KG.add_edge(pid, f"category:{p.category}", edge_type="IS_A")
        KG.add_edge(pid, f"brand:{p.brand}", edge_type="HAS_BRAND")
        for tag in p.tags:
            KG.add_edge(pid, f"attr:{tag}", edge_type="HAS_ATTRIBUTE")

    # SIMILAR_TO pairs
    similar_pairs = [
        ("P001", "P003"), ("P002", "P026"), ("P009", "P011"),
        ("P012", "P028"), ("P015", "P016"), ("P018", "P019"),
        ("P021", "P022"), ("P023", "P024"), ("P027", "P028"),
        ("P029", "P026"),
    ]
    for a, b in similar_pairs:
        if a in id_to_product and b in id_to_product:
            KG.add_edge(f"product:{a}", f"product:{b}", edge_type="SIMILAR_TO")

    return KG

KG = build_kg(products_data)

# -----------------------------
# KG helpers
# -----------------------------

def product_to_node_id(product_id: str) -> str:
    return f"product:{product_id}"

def node_id_to_product(node_id: str) -> Optional[Product]:
    if not node_id.startswith("product:"):
        return None
    pid = node_id.split("product:")[-1]
    return id_to_product.get(pid)

def category_closeness(source_cat: str, target_cat: str) -> float:
    if source_cat == target_cat:
        return 1.0
    if target_cat in similar_categories.get(source_cat, []):
        return 0.7
    return 0.0

# -----------------------------
# BFS + scoring
# -----------------------------

RULE_EXPLANATIONS = {
    "exact_match_available": "The exact item is in stock and matches your filters.",
    "preferred_brand_respected": "Matches your preferred brand.",
    "same_brand_as_requested": "Same brand as the requested product.",
    "different_brand_than_requested": "Different brand than the requested product.",
    "all_required_tags_matched": "Matches all required tags.",
    "cheaper_option": "Cheaper than the requested product.",
    "same_price_as_requested": "Same price as the requested product.",
    "slightly_more_expensive": "Slightly more expensive.",
    "same_category": "Same category.",
    "similar_category": "Related category.",
    "closer_in_graph": "Close to the requested item in the knowledge graph.",
}

def build_explanation(rule_tags: List[str], required_tags: List[str]) -> str:
    parts = [RULE_EXPLANATIONS[t] for t in rule_tags if t in RULE_EXPLANATIONS]
    if required_tags and "all_required_tags_matched" in rule_tags:
        parts.append("Required tags: " + ", ".join(required_tags) + ".")
    return " ".join(parts).strip()

def check_exact_product_availability(
    product: Product,
    max_price: Optional[float],
    required_tags: List[str],
    preferred_brand: Optional[str] = None,
) -> Tuple[Optional[Product], List[str]]:
    if preferred_brand is not None and product.brand != preferred_brand:
        return None, []
    if max_price is not None and product.price > max_price:
        return None, []
    if not product.in_stock:
        return None, []
    if not set(required_tags).issubset(set(product.tags)):
        return None, []
    tags = ["exact_match_available"]
    if preferred_brand is not None:
        tags.append("preferred_brand_respected")
    if required_tags:
        tags.append("all_required_tags_matched")
    return product, tags

def bfs_candidates_with_depth(requested: Product, max_depth: int = 2) -> Tuple[Dict[str, Tuple[Product,int]], int]:
    start = product_to_node_id(requested.product_id)
    visited = {start}
    queue = deque([(start, 0)])
    traversed = 0
    candidates: Dict[str, Tuple[Product,int]] = {}

    while queue:
        node, depth = queue.popleft()
        traversed += 1
        if depth >= max_depth:
            continue
        for nb in KG.neighbors(node):
            if nb in visited:
                continue
            visited.add(nb)
            queue.append((nb, depth + 1))
            if nb.startswith("product:") and nb != start:
                p = node_id_to_product(nb)
                if p:
                    prev = candidates.get(p.product_id)
                    if prev is None or depth + 1 < prev[1]:
                        candidates[p.product_id] = (p, depth + 1)

    return candidates, traversed

def score_candidate(
    requested: Product,
    candidate: Product,
    depth: int,
    max_price: Optional[float],
    required_tags: List[str],
    preferred_brand: Optional[str],
) -> Tuple[Optional[float], List[str]]:
    rule_tags: List[str] = []

    if not candidate.in_stock:
        return None, []
    if max_price is not None and candidate.price > max_price:
        return None, []
    if not set(required_tags).issubset(set(candidate.tags)):
        return None, []

    score = 0.0

    cat_score = category_closeness(requested.category, candidate.category)
    score += cat_score * 4.0
    if cat_score == 1.0:
        rule_tags.append("same_category")
    elif cat_score > 0:
        rule_tags.append("similar_category")

    if preferred_brand is not None:
        if candidate.brand == preferred_brand:
            score += 3.0
            rule_tags.append("preferred_brand_respected")
        else:
            score -= 0.5
            rule_tags.append("different_brand_than_requested")
    else:
        if candidate.brand == requested.brand:
            score += 2.0
            rule_tags.append("same_brand_as_requested")
        else:
            rule_tags.append("different_brand_than_requested")

    if candidate.price < requested.price:
        score += 1.0
        rule_tags.append("cheaper_option")
    elif candidate.price == requested.price:
        score += 0.5
        rule_tags.append("same_price_as_requested")
    else:
        score -= 0.2
        rule_tags.append("slightly_more_expensive")

    score += max(0, (3 - depth)) * 0.5
    rule_tags.append("closer_in_graph")

    if required_tags:
        rule_tags.append("all_required_tags_matched")

    return score, rule_tags

def find_alternatives(
    requested_product_name: str,
    max_price: Optional[float],
    required_tags: List[str],
    preferred_brand: Optional[str],
    max_alternatives: int = 3,
) -> Dict[str, Any]:
    res: Dict[str, Any] = {
        "requested": None,
        "exact_match": None,
        "alternatives": [],
        "message": "",
        "traversed_nodes": 0,
    }

    requested = get_product_by_name(requested_product_name)
    if requested is None:
        res["message"] = "Product not found."
        return res

    res["requested"] = requested

    exact, exact_tags = check_exact_product_availability(
        requested, max_price, required_tags, preferred_brand
    )
    if exact is not None:
        res["exact_match"] = {
            "product": exact,
            "rule_tags": exact_tags,
            "explanation": build_explanation(exact_tags, required_tags),
        }

    candidates_with_depth, traversed = bfs_candidates_with_depth(requested, max_depth=2)
    res["traversed_nodes"] = traversed

    scored: List[Recommendation] = []
    for pid, (cand, depth) in candidates_with_depth.items():
        s, tags = score_candidate(
            requested, cand, depth, max_price, required_tags, preferred_brand
        )
        if s is None:
            continue
        expl = build_explanation(tags, required_tags)
        scored.append(Recommendation(product=cand, score=s, rule_tags=tags, explanation=expl))

    scored.sort(key=lambda r: r.score, reverse=True)
    top = scored[:max_alternatives]

    if not top:
        if exact is not None:
            res["message"] = "Exact product is available, but no better alternatives were found."
        else:
            res["message"] = "No alternatives found matching constraints."
    else:
        if exact is not None:
            res["message"] = "Exact product is available. Showing additional alternatives."
        else:
            res["message"] = "Alternatives found."
        res["alternatives"] = top

    return res

# -----------------------------
# Visualization: shortest paths
# -----------------------------

def visualize_search_path(root_product: Product, recs: List[Recommendation]) -> Optional[plt.Figure]:
    if not recs:
        return None

    root_id = product_to_node_id(root_product.product_id)
    target_ids = [product_to_node_id(r.product.product_id) for r in recs]

    nodes = {root_id}
    edges = set()

    for tid in target_ids:
        try:
            path = nx.shortest_path(KG, source=root_id, target=tid)
        except nx.NetworkXNoPath:
            continue
        for i in range(len(path) - 1):
            a, b = path[i], path[i+1]
            nodes.add(a); nodes.add(b)
            edges.add((a, b))

    if not edges:
        return None

    sub = KG.edge_subgraph(list(edges)).copy()

    plt.figure(figsize=(10, 6))
    shells = [
        [root_id],
        [n for n in nodes if n not in [root_id] + target_ids],
        target_ids,
    ]
    pos = nx.shell_layout(sub, nlist=shells)

    # Node colors: white nodes so dark text is readable
    colors = []
    for n in sub.nodes():
        if n == root_id:
            colors.append("#ffe680")   # light yellow for requested
        elif n in target_ids:
            colors.append("#b3ffb3")   # light green for recommended
        else:
            t = sub.nodes[n].get("node_type")
            if t == "category":
                colors.append("#cfe2ff")
            elif t == "brand":
                colors.append("#ffd6a5")
            elif t == "attribute":
                colors.append("#ffccd5")
            else:
                colors.append("#f0f0f0")

    nx.draw_networkx_nodes(sub, pos, node_size=650, node_color=colors, edgecolors="#000000")
    nx.draw_networkx_edges(sub, pos, alpha=0.7, width=1.5, edge_color="#bbbbbb")

    # Black font on light nodes
    labels = {n: sub.nodes[n].get("name", n) for n in sub.nodes()}
    nx.draw_networkx_labels(sub, pos, labels=labels, font_size=8, font_color="#000000")

    # Dark background for contrast
    ax = plt.gca()
    ax.set_facecolor("#050b16")           # dark navy background inside plot
    plt.title(f"Paths from '{root_product.name}' to recommended items", fontsize=10, color="#ffffff")
    plt.axis("off")
    return plt

# -----------------------------
# UI layout (dark blue theme)
# -----------------------------

st.set_page_config(layout="wide", page_title="General Store - Product Finder")

st.markdown("""
    <style>
    body, .main, .stApp {
        background-color: #050b16;
        color: #e0e6f0;
    }
    .block-container {
        padding-top: 1.5rem;
        padding-bottom: 1.5rem;
    }
    .product-card {
        border: 1px solid #1f2a3a;
        border-radius: 10px;
        padding: 14px;
        margin-bottom: 10px;
        background: linear-gradient(135deg, #0b1626, #152338);
        box-shadow: 0 2px 6px rgba(0,0,0,0.6);
    }
    .product-title {
        font-weight: 700;
        font-size: 16px;
        margin-bottom: 4px;
        color: #ffffff;
    }
    .product-price {
        font-size: 15px;
        font-weight: 600;
        color: #4fe3c1;
    }
    .product-meta {
        font-size: 13px;
        color: #d0d6e0;
    }
    .stMarkdown, .stText, .stSelectbox label, .stSlider label {
        color: #e0e6f0 !important;
    }
    .stSelectbox div[data-baseweb="select"] > div {
        background-color: #0b1626;
        color: #e0e6f0;
    }
    .stSlider > div > div > div {
        background-color: #1b3b6f;
    }
    .stButton button {
        background: linear-gradient(90deg, #1b3b6f, #274a8a);
        color: #ffffff;
        border-radius: 8px;
        border: 1px solid #3b6ac9;
    }
    .stButton button:hover {
        background: linear-gradient(90deg, #274a8a, #3560b3);
        border-color: #4f82ff;
    }
    .stExpander {
        border: 1px solid #1f2a3a;
        border-radius: 8px;
        background-color: #081321;
    }
    </style>
""", unsafe_allow_html=True)

st.title("üõí Smart Catalog & Substitution Assistant")

# Sidebar filters
with st.sidebar:
    st.header("Filters")
    max_price_input = st.slider("Max Price (‚Çπ)", 0, 250, 0, 5)
    max_price_val = max_price_input if max_price_input > 0 else None

    all_tags = sorted({tag for p in products_data for tag in p.tags})
    required_tags = st.multiselect("Required Tags (optional)", options=all_tags, default=[])

    brands = sorted({p.brand for p in products_data})
    preferred_brand = st.selectbox("Preferred Brand (optional)", ["(no preference)"] + brands)
    preferred_brand_val = None if preferred_brand == "(no preference)" else preferred_brand

# Product selection
cat = st.selectbox("Category", CATEGORIES)
cat_products = sorted([p for p in products_data if p.category == cat], key=lambda x: x.name)
selected_name = st.selectbox("Product", [p.name for p in cat_products])

if st.button("Check stock & alternatives"):
    result = find_alternatives(
        selected_name, max_price_val, required_tags, preferred_brand_val
    )

    st.write("---")
    st.subheader("Result")
    st.info(result["message"])

    requested = result["requested"]

    # Exact
    if result["exact_match"]:
        em = result["exact_match"]["product"]
        st.markdown(f"""
        <div class="product-card">
            <div class="product-title">Exact product: {em.name}</div>
            <div class="product-price">‚Çπ{em.price}</div>
            <div class="product-meta">
                Brand: {em.brand} | In stock: {em.in_stock}
            </div>
            <div class="product-meta"><b>Why suggested:</b> {result["exact_match"]["explanation"]}</div>
        </div>
        """, unsafe_allow_html=True)

    # Alternatives
    if result["alternatives"]:
        st.markdown("### Alternatives")
        for rec in result["alternatives"]:
            p = rec.product
            st.markdown(f"""
            <div class="product-card">
                <div class="product-title">{p.name}</div>
                <div class="product-price">‚Çπ{p.price}</div>
                <div class="product-meta">
                    Brand: {p.brand} | In stock: {p.in_stock}
                </div>
                <div class="product-meta"><b>Why suggested:</b> {rec.explanation}</div>
            </div>
            """, unsafe_allow_html=True)
    else:
        st.write("No alternative suggestions.")

    with st.expander("Knowledge Graph reasoning (paths used)"):
        if requested and result["alternatives"]:
            fig = visualize_search_path(requested, result["alternatives"])
            if fig:
                st.pyplot(fig)
        else:
            st.write("No paths to visualize.")
'''

with open("app.py", "w", encoding="utf-8") as f:
    f.write(streamlit_app_code)

print("Written updated app.py to current directory. Run: streamlit run app.py")


Written updated app.py to current directory. Run: streamlit run app.py
