In [50]:
import networkx as nx
import pandas as pd
import numpy as np
from dataclasses import dataclass, field
from typing import List, Dict, Any, Optional, Tuple


In [51]:
@dataclass
class Product:
    product_id: str
    name: str
    category: str
    brand: str
    price: float
    in_stock: bool
    tags: List[str] = field(default_factory=list)

@dataclass
class Recommendation:
    product: Product
    score: float
    rule_tags: List[str]
    explanation: str


In [52]:
# Cell 4: Define a small but rich product catalog (hand-crafted, no ML)

products_data = [
    # Dairy
    Product("P001", "Amul Toned Milk 1L", "Dairy", "Amul", 65, False, ["veg_only"]),
    Product("P002", "Amul Lactose Free Milk 1L", "Dairy", "Amul", 90, True, ["veg_only", "lactose_free"]),
    Product("P003", "Mother Dairy Toned Milk 1L", "Dairy", "Mother Dairy", 62, True, ["veg_only"]),
    Product("P004", "Amul Cheese Slices 10pcs", "Dairy", "Amul", 120, True, ["veg_only"]),
    Product("P005", "Britannia Cheese Block 200g", "Dairy", "Britannia", 110, True, ["veg_only"]),
    Product("P006", "Amul Low Fat Curd 400g", "Dairy", "Amul", 60, True, ["veg_only", "low_fat"]),
    Product("P007", "Mother Dairy Curd 400g", "Dairy", "Mother Dairy", 55, True, ["veg_only"]),
    Product("P008", "Amul Probiotic Dahi 400g", "Dairy", "Amul", 70, False, ["veg_only", "probiotic"]),

    # Bakery
    Product("P009", "Britannia Whole Wheat Bread 400g", "Bakery", "Britannia", 45, True, ["veg_only", "high_fiber"]),
    Product("P010", "Britannia White Bread 400g", "Bakery", "Britannia", 40, True, ["veg_only"]),
    Product("P011", "Modern Multigrain Bread 400g", "Bakery", "Modern", 50, True, ["veg_only", "high_fiber"]),
    Product("P012", "Gluten Free Brown Bread 300g", "Bakery", "HealthLife", 80, True, ["veg_only", "gluten_free", "high_fiber"]),
    Product("P013", "Whole Wheat Burger Buns 4pcs", "Bakery", "Britannia", 60, True, ["veg_only"]),
    Product("P014", "Garlic Bread Loaf 250g", "Bakery", "BakeryFresh", 70, False, ["veg_only"]),

    # Snacks
    Product("P015", "Lays Classic Salted 52g", "Snacks", "Lays", 20, True, ["veg_only"]),
    Product("P016", "Lays Baked Chips 50g", "Snacks", "Lays", 30, True, ["veg_only", "low_fat"]),
    Product("P017", "Kurkure Masala Munch 50g", "Snacks", "Kurkure", 20, True, ["veg_only"]),
    Product("P018", "Healthy Mix Nuts 100g", "Snacks", "NutriBox", 150, True, ["veg_only", "high_protein"]),
    Product("P019", "Roasted Chana 100g", "Snacks", "NutriBox", 60, True, ["veg_only", "high_protein", "low_fat"]),
    Product("P020", "Protein Snack Bar 40g", "Snacks", "FitLife", 80, True, ["veg_only", "high_protein", "gluten_free"]),

    # Beverages
    Product("P021", "Real Orange Juice 1L", "Beverages", "Real", 110, True, ["veg_only"]),
    Product("P022", "Real Mixed Fruit Juice 1L", "Beverages", "Real", 115, True, ["veg_only"]),
    Product("P023", "Sugar Free Green Tea 25 bags", "Beverages", "HealthLife", 130, True, ["veg_only", "sugar_free"]),
    Product("P024", "Low Sugar Lemon Juice 1L", "Beverages", "Real", 100, False, ["veg_only", "low_sugar"]),
    Product("P025", "Coconut Water 500ml", "Beverages", "HydroFresh", 45, True, ["veg_only"]),

    # Health / Special
    Product("P026", "Lactose Free Yogurt 200g", "Health", "HealthLife", 55, True, ["veg_only", "lactose_free"]),
    Product("P027", "Sugar Free Digestive Biscuits 200g", "Health", "HealthLife", 70, True, ["veg_only", "sugar_free", "high_fiber"]),
    Product("P028", "Gluten Free Protein Cookies 150g", "Health", "FitLife", 120, True, ["veg_only", "gluten_free", "high_protein"]),
    Product("P029", "High Protein Milk Drink 250ml", "Health", "Amul", 60, True, ["veg_only", "high_protein"]),
    Product("P030", "Sugar Free Dark Chocolate 80g", "Health", "ChocoFit", 150, True, ["veg_only", "sugar_free"]),
]
len(products_data)


30

In [53]:
# Cell 5: Quick view of catalog

df_products = pd.DataFrame([p.__dict__ for p in products_data])
df_products.head()


Unnamed: 0,product_id,name,category,brand,price,in_stock,tags
0,P001,Amul Toned Milk 1L,Dairy,Amul,65,False,[veg_only]
1,P002,Amul Lactose Free Milk 1L,Dairy,Amul,90,True,"[veg_only, lactose_free]"
2,P003,Mother Dairy Toned Milk 1L,Dairy,Mother Dairy,62,True,[veg_only]
3,P004,Amul Cheese Slices 10pcs,Dairy,Amul,120,True,[veg_only]
4,P005,Britannia Cheese Block 200g,Dairy,Britannia,110,True,[veg_only]


In [54]:
# Cell 6: Define categories and "similar categories" (for related suggestions)

categories = ["Dairy", "Bakery", "Snacks", "Beverages", "Health"]

# Similarity between categories (domain knowledge, manual)
similar_categories = {
    "Dairy": ["Health", "Snacks"],          # health drinks, protein snacks
    "Bakery": ["Snacks", "Health"],         # biscuits, protein cookies
    "Snacks": ["Bakery", "Health"],         # multigrain/snack bars
    "Beverages": ["Health"],                # health drinks, green tea
    "Health": ["Dairy", "Snacks", "Bakery", "Beverages"],
}
similar_categories


{'Dairy': ['Health', 'Snacks'],
 'Bakery': ['Snacks', 'Health'],
 'Snacks': ['Bakery', 'Health'],
 'Beverages': ['Health'],
 'Health': ['Dairy', 'Snacks', 'Bakery', 'Beverages']}

In [55]:
# Cell 7: Build Knowledge Graph using NetworkX

KG = nx.Graph()

# Node helper
def add_node(node_id: str, node_type: str, **attrs):
    KG.add_node(node_id, node_type=node_type, **attrs)

# Edge helper
def add_edge(src: str, dst: str, edge_type: str, **attrs):
    KG.add_edge(src, dst, edge_type=edge_type, **attrs)

# 1. Add category nodes
for cat in categories:
    add_node(f"category:{cat}", "category", name=cat)

# 2. Add brand nodes and attribute nodes from product data
brands = sorted({p.brand for p in products_data})
attributes = sorted({tag for p in products_data for tag in p.tags})

for brand in brands:
    add_node(f"brand:{brand}", "brand", name=brand)

for attr in attributes:
    add_node(f"attr:{attr}", "attribute", name=attr)

# 3. Add product nodes and edges
for p in products_data:
    prod_id = f"product:{p.product_id}"
    add_node(
        prod_id,
        "product",
        product_id=p.product_id,
        name=p.name,
        category=p.category,
        brand=p.brand,
        price=p.price,
        in_stock=p.in_stock,
        tags=p.tags,
    )
    # IS_A edge: product -> category
    add_edge(prod_id, f"category:{p.category}", "IS_A")

    # HAS_BRAND edge: product -> brand
    add_edge(prod_id, f"brand:{p.brand}", "HAS_BRAND")

    # HAS_ATTRIBUTE edges: product -> each attribute
    for tag in p.tags:
        add_edge(prod_id, f"attr:{tag}", "HAS_ATTRIBUTE")

len(KG.nodes()), len(KG.edges())


(57, 112)

In [56]:
# Cell 8: Add SIMILAR_TO edges between specific products (manual knowledge)

similar_pairs = [
    ("P001", "P003"),  # Amul Toned Milk ~ Mother Dairy Toned Milk
    ("P002", "P026"),  # Lactose free milk ~ Lactose free yogurt
    ("P009", "P011"),  # Whole wheat bread ~ Multigrain bread
    ("P012", "P020"),  # Gluten free bread ~ Protein snack bar
    ("P015", "P016"),  # Regular chips ~ baked chips
    ("P018", "P019"),  # Mix nuts ~ roasted chana
    ("P021", "P022"),  # Real juices similar
    ("P023", "P024"),  # Green tea ~ low sugar lemon juice
    ("P027", "P028"),  # Digestive biscuits ~ protein cookies
    ("P029", "P026"),  # Protein milk drink ~ lactose free yogurt
]

for a, b in similar_pairs:
    add_edge(f"product:{a}", f"product:{b}", "SIMILAR_TO")

len(KG.edges())


122

In [57]:
# Cell 9: Helper functions to retrieve Product objects

id_to_product: Dict[str, Product] = {p.product_id: p for p in products_data}
name_to_product_id: Dict[str, str] = {p.name: p.product_id for p in products_data}

def get_product_by_id(product_id: str) -> Optional[Product]:
    return id_to_product.get(product_id)

def get_product_by_name(name: str) -> Optional[Product]:
    pid = name_to_product_id.get(name)
    if pid is None:
        return None
    return id_to_product[pid]

def list_all_product_names() -> List[str]:
    return sorted(name_to_product_id.keys())

list_all_product_names()[:10]


['Amul Cheese Slices 10pcs',
 'Amul Lactose Free Milk 1L',
 'Amul Low Fat Curd 400g',
 'Amul Probiotic Dahi 400g',
 'Amul Toned Milk 1L',
 'Britannia Cheese Block 200g',
 'Britannia White Bread 400g',
 'Britannia Whole Wheat Bread 400g',
 'Coconut Water 500ml',
 'Garlic Bread Loaf 250g']

In [58]:
# Cell 10: Mapping between Product and KG node IDs

def product_to_node_id(product_id: str) -> str:
    return f"product:{product_id}"

def node_id_to_product(node_id: str) -> Optional[Product]:
    if not node_id.startswith("product:"):
        return None
    pid = node_id.split("product:")[-1]
    return get_product_by_id(pid)


In [59]:
# Cell 11: Exact product availability check

def check_exact_product_availability(
    product_name: str,
    max_price: Optional[float],
    required_tags: List[str],
    preferred_brand: Optional[str] = None,
) -> Tuple[Optional[Product], List[str]]:
    """
    Returns (product, rule_tags) if exact product is available and satisfies constraints,
    otherwise (None, []).
    """
    p = get_product_by_name(product_name)
    if p is None:
        return None, []

    rule_tags = []

    # brand constraint if specified
    if preferred_brand is not None and p.brand != preferred_brand:
        return None, []

    # price constraint
    if (max_price is not None) and (p.price > max_price):
        return None, []

    # stock constraint
    if not p.in_stock:
        return None, []

    # attributes constraint
    if not set(required_tags).issubset(set(p.tags)):
        return None, []

    # rule tags
    rule_tags.append("exact_match_available")
    if preferred_brand is not None:
        rule_tags.append("preferred_brand_respected")
    if required_tags:
        rule_tags.append("all_required_tags_matched")

    return p, rule_tags


In [60]:
# Cell 12: BFS traversal from requested product to find candidate products

from collections import deque

def bfs_candidates(start_prod: Product, max_depth: int = 3) -> List[Product]:
    """
    BFS over KG starting from product node; returns list of product candidates (excluding start).
    Traverses through brand/category/attribute/similar links.
    """
    start_node = product_to_node_id(start_prod.product_id)
    visited = set([start_node])
    queue = deque([(start_node, 0)])
    product_candidates: Dict[str, Product] = {}

    while queue:
        node, depth = queue.popleft()
        if depth >= max_depth:
            continue

        for neighbor in KG.neighbors(node):
            if neighbor in visited:
                continue
            visited.add(neighbor)

            # track depth
            queue.append((neighbor, depth + 1))

            # if neighbor is a product (and not the start), collect
            if neighbor.startswith("product:") and neighbor != start_node:
                p = node_id_to_product(neighbor)
                if p:
                    product_candidates[p.product_id] = p

    return list(product_candidates.values())


In [61]:
# Cell 13: Category closeness

def category_closeness(source_cat: str, target_cat: str) -> float:
    """
    Score:
      1.0 -> same category
      0.7 -> similar category (via similar_categories mapping)
      0.0 -> otherwise
    """
    if source_cat == target_cat:
        return 1.0
    if target_cat in similar_categories.get(source_cat, []):
        return 0.7
    return 0.0

# Quick sanity check
print(category_closeness("Dairy", "Dairy"))
print(category_closeness("Dairy", "Health"))
print(category_closeness("Dairy", "Bakery"))


1.0
0.7
0.0


In [62]:
# Cell 14: Scoring function (classical heuristic, no ML)

def score_candidate(
    requested: Product,
    candidate: Product,
    max_price: Optional[float],
    required_tags: List[str],
    preferred_brand: Optional[str] = None,
) -> Tuple[float, List[str]]:
    """
    Returns (score, rule_tags) if candidate passes hard filters, else (None, []).
    Hard filters:
      - in stock
      - price <= max_price (if given)
      - contains all required tags
    Soft preferences included in score:
      - category closeness
      - brand match
      - attribute richness (contains all required + extra)
      - price advantage (cheaper preferred)
    """
    rule_tags: List[str] = []

    # Hard filters
    if not candidate.in_stock:
        return None, []

    if max_price is not None and candidate.price > max_price:
        return None, []

    if not set(required_tags).issubset(set(candidate.tags)):
        return None, []

    # Passed filters -> compute score
    score = 0.0

    # Category closeness
    cat_score = category_closeness(requested.category, candidate.category)
    score += cat_score * 4.0
    if cat_score == 1.0:
        rule_tags.append("same_category")
    elif cat_score > 0:
        rule_tags.append("similar_category")

    # Brand preference
    if preferred_brand is not None:
        if candidate.brand == preferred_brand:
            score += 3.0
            rule_tags.append("preferred_brand_respected")
        else:
            # small penalty
            score -= 0.5
            rule_tags.append("different_brand_than_preferred")
    else:
        if candidate.brand == requested.brand:
            score += 2.0
            rule_tags.append("same_brand_as_requested")
        else:
            rule_tags.append("different_brand_than_requested")

    # Required tags all matched
    if required_tags:
        rule_tags.append("all_required_tags_matched")
        # extra attributes beyond required -> slight bonus
        extra_attrs = set(candidate.tags) - set(required_tags)
        score += 0.1 * len(extra_attrs)

    # Price advantage: cheaper is better
    if candidate.price < requested.price:
        score += 1.0
        rule_tags.append("cheaper_option")
    elif candidate.price == requested.price:
        score += 0.5
        rule_tags.append("same_price_as_requested")
    else:
        # little penalty for being more expensive
        score -= 0.2
        rule_tags.append("slightly_more_expensive")

    return score, rule_tags


In [63]:
# Cell 15: Rule tags -> human-readable explanation text (deterministic, no random text)

RULE_EXPLANATIONS = {
    "exact_match_available": "The exact product you requested is in stock and satisfies your constraints.",
    "preferred_brand_respected": "Matches your preferred brand.",
    "same_brand_as_requested": "Has the same brand as your requested product.",
    "different_brand_than_requested": "Brand is different from your requested product.",
    "different_brand_than_preferred": "Brand is different from your preferred brand.",
    "same_category": "Belongs to the same category as your requested product.",
    "similar_category": "Belongs to a related category.",
    "all_required_tags_matched": "Matches all of your required tags.",
    "cheaper_option": "This option is cheaper than your requested product.",
    "same_price_as_requested": "This option has the same price as your requested product.",
    "slightly_more_expensive": "This option is slightly more expensive than your requested product.",
}

def build_explanation(rule_tags: List[str], candidate: Product, requested: Product, required_tags: List[str]) -> str:
    """Build a short, rule-based explanation sentence."""
    parts = []
    for tag in rule_tags:
        if tag in RULE_EXPLANATIONS:
            parts.append(RULE_EXPLANATIONS[tag])

    # Add explicit tags info if required
    if required_tags and "all_required_tags_matched" in rule_tags:
        parts.append(f"Required tags satisfied: {', '.join(required_tags)}.")

    # Concise join
    explanation = " ".join(parts)
    return explanation.strip()


In [64]:
# Cell 16: Core reasoning function

def find_alternatives(
    requested_product_name: str,
    max_price: Optional[float],
    required_tags: List[str],
    preferred_brand: Optional[str] = None,
    max_alternatives: int = 3,
) -> Dict[str, Any]:
    """
    High-level function performing:
      1. Exact match check.
      2. If not suitable, BFS-based candidate search.
      3. Scoring, rule tagging, explanation generation.
    Returns a dict with keys:
      - "requested": Product or None
      - "exact_match": dict or None
      - "alternatives": List[Recommendation]
      - "message": str
    """
    result: Dict[str, Any] = {
        "requested": None,
        "exact_match": None,
        "alternatives": [],
        "message": "",
    }

    requested = get_product_by_name(requested_product_name)
    if requested is None:
        result["message"] = "Requested product not found in catalog."
        return result

    result["requested"] = requested

    # 1. Check exact availability
    exact_prod, exact_rules = check_exact_product_availability(
        requested_product_name,
        max_price,
        required_tags,
        preferred_brand,
    )

    if exact_prod is not None:
        explanation = build_explanation(exact_rules, exact_prod, requested, required_tags)
        result["exact_match"] = {
            "product": exact_prod,
            "explanation": explanation,
            "rule_tags": exact_rules,
        }
        result["message"] = "Exact product is available."
        return result

    # 2. BFS-based candidates
    candidates = bfs_candidates(requested, max_depth=3)

    scored: List[Recommendation] = []
    for cand in candidates:
        score, rule_tags = score_candidate(
            requested,
            cand,
            max_price,
            required_tags,
            preferred_brand,
        )
        if score is None:
            continue
        explanation = build_explanation(rule_tags, cand, requested, required_tags)
        scored.append(Recommendation(product=cand, score=score, rule_tags=rule_tags, explanation=explanation))

    # Sort by score descending
    scored_sorted = sorted(scored, key=lambda r: r.score, reverse=True)
    top = scored_sorted[:max_alternatives]

    if not top:
        result["message"] = "Requested product is not suitable or out of stock, and no alternatives were found."
    else:
        result["message"] = "Requested product not suitable / out of stock. Showing alternative suggestions."
        result["alternatives"] = top

    return result


In [65]:
# Cell 17: Pretty printing helpers

def print_product(p: Product):
    print(f"{p.name} [{p.product_id}]")
    print(f"  Category: {p.category}, Brand: {p.brand}")
    print(f"  Price: â‚¹{p.price}, In stock: {p.in_stock}")
    print(f"  Tags: {', '.join(p.tags)}")

def print_result(result: Dict[str, Any]):
    print("MESSAGE:", result["message"])
    print()

    if result["requested"]:
        print("Requested product:")
        print_product(result["requested"])
        print()

    if result["exact_match"]:
        print("Exact match:")
        print_product(result["exact_match"]["product"])
        print("Explanation:", result["exact_match"]["explanation"])
        print("Rule tags:", result["exact_match"]["rule_tags"])
        print()
    else:
        print("Exact match: None")
        print()

    if result["alternatives"]:
        print("Alternatives:")
        for i, rec in enumerate(result["alternatives"], 1):
            print(f"#{i}: (score={rec.score:.2f})")
            print_product(rec.product)
            print("Explanation:", rec.explanation)
            print("Rule tags:", rec.rule_tags)
            print()
    else:
        print("Alternatives: None")


In [66]:
# Cell 18: Test 1 â€“ Exact match scenario

result1 = find_alternatives(
    requested_product_name="Britannia Whole Wheat Bread 400g",
    max_price=50,
    required_tags=["veg_only", "high_fiber"],
    preferred_brand=None,
)

print_result(result1)


MESSAGE: Exact product is available.

Requested product:
Britannia Whole Wheat Bread 400g [P009]
  Category: Bakery, Brand: Britannia
  Price: â‚¹45, In stock: True
  Tags: veg_only, high_fiber

Exact match:
Britannia Whole Wheat Bread 400g [P009]
  Category: Bakery, Brand: Britannia
  Price: â‚¹45, In stock: True
  Tags: veg_only, high_fiber
Explanation: The exact product you requested is in stock and satisfies your constraints. Matches all of your required tags. Required tags satisfied: veg_only, high_fiber.
Rule tags: ['exact_match_available', 'all_required_tags_matched']

Alternatives: None


In [67]:
# Cell 19: Test 2 â€“ Out of stock, find alternatives

# Amul Toned Milk is out of stock in our data
result2 = find_alternatives(
    requested_product_name="Amul Toned Milk 1L",
    max_price=100,
    required_tags=["veg_only"],
    preferred_brand="Amul",  # try to keep same brand
)

print_result(result2)


MESSAGE: Requested product not suitable / out of stock. Showing alternative suggestions.

Requested product:
Amul Toned Milk 1L [P001]
  Category: Dairy, Brand: Amul
  Price: â‚¹65, In stock: False
  Tags: veg_only

Exact match: None

Alternatives:
#1: (score=8.10)
Amul Low Fat Curd 400g [P006]
  Category: Dairy, Brand: Amul
  Price: â‚¹60, In stock: True
  Tags: veg_only, low_fat
Explanation: Belongs to the same category as your requested product. Matches your preferred brand. Matches all of your required tags. This option is cheaper than your requested product. Required tags satisfied: veg_only.
Rule tags: ['same_category', 'preferred_brand_respected', 'all_required_tags_matched', 'cheaper_option']

#2: (score=6.90)
Amul Lactose Free Milk 1L [P002]
  Category: Dairy, Brand: Amul
  Price: â‚¹90, In stock: True
  Tags: veg_only, lactose_free
Explanation: Belongs to the same category as your requested product. Matches your preferred brand. Matches all of your required tags. This option 

In [68]:
# Cell 20: Test 3 â€“ Strict tag requirements

result3 = find_alternatives(
    requested_product_name="Amul Toned Milk 1L",
    max_price=150,
    required_tags=["lactose_free"],
    preferred_brand=None,
)

print_result(result3)


MESSAGE: Requested product not suitable / out of stock. Showing alternative suggestions.

Requested product:
Amul Toned Milk 1L [P001]
  Category: Dairy, Brand: Amul
  Price: â‚¹65, In stock: False
  Tags: veg_only

Exact match: None

Alternatives:
#1: (score=5.90)
Amul Lactose Free Milk 1L [P002]
  Category: Dairy, Brand: Amul
  Price: â‚¹90, In stock: True
  Tags: veg_only, lactose_free
Explanation: Belongs to the same category as your requested product. Has the same brand as your requested product. Matches all of your required tags. This option is slightly more expensive than your requested product. Required tags satisfied: lactose_free.
Rule tags: ['same_category', 'same_brand_as_requested', 'all_required_tags_matched', 'slightly_more_expensive']

#2: (score=3.90)
Lactose Free Yogurt 200g [P026]
  Category: Health, Brand: HealthLife
  Price: â‚¹55, In stock: True
  Tags: veg_only, lactose_free
Explanation: Belongs to a related category. Brand is different from your requested produc

In [69]:
# Cell 21: Test 4 â€“ Very strict constraints -> likely no alternative

result4 = find_alternatives(
    requested_product_name="Lays Classic Salted 52g",
    max_price=10,
    required_tags=["gluten_free", "high_protein"],
    preferred_brand="Lays",
)

print_result(result4)


MESSAGE: Requested product is not suitable or out of stock, and no alternatives were found.

Requested product:
Lays Classic Salted 52g [P015]
  Category: Snacks, Brand: Lays
  Price: â‚¹20, In stock: True
  Tags: veg_only

Exact match: None

Alternatives: None


In [70]:
# Cell 22: Inspect KG structure (sanity checks)

print("Total nodes:", KG.number_of_nodes())
print("Total edges:", KG.number_of_edges())

print("\nSample product node attributes:")
sample_node = product_to_node_id("P001")
KG.nodes[sample_node]


Total nodes: 57
Total edges: 122

Sample product node attributes:


{'node_type': 'product',
 'product_id': 'P001',
 'name': 'Amul Toned Milk 1L',
 'category': 'Dairy',
 'brand': 'Amul',
 'price': 65,
 'in_stock': False,
 'tags': ['veg_only']}

In [71]:
# Cell 23: Export KG as JSON-like dictionaries for later saving to file

import json

def export_products_to_json(path: str = "products.json"):
    data = [p.__dict__ for p in products_data]
    with open(path, "w", encoding="utf-8") as f:
        json.dump(data, f, indent=2, ensure_ascii=False)
    print(f"Saved products to {path}")

def export_categories_to_json(path: str = "categories.json"):
    data = [{"name": cat, "similar_to": similar_categories.get(cat, [])} for cat in categories]
    with open(path, "w", encoding="utf-8") as f:
        json.dump(data, f, indent=2, ensure_ascii=False)
    print(f"Saved categories to {path}")

def export_attributes_to_json(path: str = "attributes.json"):
    data = [{"name": attr} for attr in attributes]
    with open(path, "w", encoding="utf-8") as f:
        json.dump(data, f, indent=2, ensure_ascii=False)
    print(f"Saved attributes to {path}")

# Uncomment these when running locally (they write files next to notebook)
export_products_to_json()
export_categories_to_json()
export_attributes_to_json()


Saved products to products.json
Saved categories to categories.json
Saved attributes to attributes.json


In [72]:
streamlit_app_code = r'''
import streamlit as st
import json
from typing import List, Optional, Dict, Any
from dataclasses import dataclass, field

# -----------------------------
# Data models
# -----------------------------

@dataclass
class Product:
    product_id: str
    name: str
    category: str
    brand: str
    price: float
    in_stock: bool
    tags: List[str] = field(default_factory=list)

# -----------------------------
# Load product data
# -----------------------------

@st.cache_data
def load_products() -> List[Product]:
    with open("products.json", "r", encoding="utf-8") as f:
        data = json.load(f)
    return [Product(**item) for item in data]

products_data = load_products()
id_to_product = {p.product_id: p for p in products_data}
name_to_product_id = {p.name: p.product_id for p in products_data}

def get_product_by_name(name: str) -> Optional[Product]:
    pid = name_to_product_id.get(name)
    if pid is None:
        return None
    return id_to_product[pid]

# -----------------------------
# Rule-based reasoning (simplified version)
# -----------------------------

def exact_match_check(
    requested: Product,
    max_price: Optional[float],
    required_tags: List[str],
    preferred_brand: Optional[str],
):
    if not requested.in_stock:
        return None, []

    if max_price is not None and requested.price > max_price:
        return None, []

    if not set(required_tags).issubset(set(requested.tags)):
        return None, []

    if preferred_brand is not None and requested.brand != preferred_brand:
        return None, []

    rule_tags = ["exact_match_available"]
    if preferred_brand is not None:
        rule_tags.append("preferred_brand_respected")
    if required_tags:
        rule_tags.append("all_required_tags_matched")

    return requested, rule_tags

RULE_EXPLANATIONS = {
    "exact_match_available": "The exact item you searched for is in stock and fits your filters.",
    "preferred_brand_respected": "Matches your preferred brand.",
    "same_brand_as_requested": "Same brand as the product you searched.",
    "different_brand_than_requested": "Different brand than the product you searched.",
    "all_required_tags_matched": "Matches all filter tags you selected.",
    "cheaper_option": "Cheaper than the product you searched.",
    "same_price_as_requested": "Same price as the product you searched.",
    "slightly_more_expensive": "Slightly more expensive than the product you searched.",
    "same_category": "Same category as the product you searched.",
    "similar_category": "Related category.",
}

def build_explanation(rule_tags: List[str], required_tags: List[str]) -> str:
    parts = []
    for tag in rule_tags:
        if tag in RULE_EXPLANATIONS:
            parts.append(RULE_EXPLANATIONS[tag])
    if required_tags and "all_required_tags_matched" in rule_tags:
        parts.append("Filter tags satisfied: " + ", ".join(required_tags) + ".")
    return " ".join(parts).strip()

def find_alternatives_catalog(
    requested_product_name: str,
    max_price: Optional[float],
    required_tags: List[str],
    preferred_brand: Optional[str],
):
    import pandas as pd

    df = pd.DataFrame([p.__dict__ for p in products_data])

    requested = get_product_by_name(requested_product_name)
    if requested is None:
        return None, None, [], "Product not found in catalog."

    # Try exact match first
    exact, exact_rule_tags = exact_match_check(
        requested, max_price, required_tags, preferred_brand
    )
    if exact is not None:
        return requested, (exact, exact_rule_tags), [], "Exact product is available."

    # Candidate pool
    mask = df["in_stock"] == True
    if max_price is not None:
        mask &= df["price"] <= max_price

    if required_tags:
        def has_required(row_tags):
            return set(required_tags).issubset(set(row_tags))
        mask &= df["tags"].apply(has_required)

    # Exclude requested itself
    mask &= df["product_id"] != requested.product_id

    candidates = df[mask].copy()

    # Scoring
    scores = []
    for _, row in candidates.iterrows():
        score = 0.0
        rule_tags = []

        # Category importance
        if row["category"] == requested.category:
            score += 4.0
            rule_tags.append("same_category")
        else:
            score += 1.0
            rule_tags.append("similar_category")

        # Brand preference
        if row["brand"] == requested.brand:
            score += 2.0
            rule_tags.append("same_brand_as_requested")
        else:
            rule_tags.append("different_brand_than_requested")
        if preferred_brand is not None and row["brand"] == preferred_brand:
            score += 3.0
            rule_tags.append("preferred_brand_respected")

        # Tags
        if required_tags:
            rule_tags.append("all_required_tags_matched")

        # Price relation
        if row["price"] < requested.price:
            score += 1.0
            rule_tags.append("cheaper_option")
        elif row["price"] == requested.price:
            score += 0.5
            rule_tags.append("same_price_as_requested")
        else:
            score -= 0.2
            rule_tags.append("slightly_more_expensive")

        scores.append((row["product_id"], score, rule_tags))

    scores_sorted = sorted(scores, key=lambda x: x[1], reverse=True)
    top = scores_sorted[:3]
    alternatives = [(id_to_product[pid], rule_tags) for pid, _, rule_tags in top]

    if not alternatives:
        return requested, None, [], "Product is out of stock or filtered out, and no alternatives were found."

    return requested, None, alternatives, "Product is out of stock or does not meet filters. Showing alternatives."

# -----------------------------
# UI Layout (store-like)
# -----------------------------

st.set_page_config(layout="wide", page_title="General Store - Product Finder")

# Custom minimal styling to look less like a form
st.markdown(
    """
    <style>
    .product-card {
        border: 1px solid #e0e0e0;
        border-radius: 8px;
        padding: 12px;
        margin-bottom: 10px;
        background-color: #ffffff;
        box-shadow: 0 1px 3px rgba(0,0,0,0.05);
    }
    .product-title {
        font-weight: 600;
        font-size: 16px;
        margin-bottom: 4px;
    }
    .product-meta {
        font-size: 13px;
        color: #555555;
    }
    .product-price {
        font-size: 14px;
        font-weight: 600;
        color: #1a7f37;
    }
    </style>
    """,
    unsafe_allow_html=True,
)

st.title("ðŸ›’ General Store Catalog")

# Sidebar filters (like Amazon filters)
with st.sidebar:
    st.header("Filters")

    max_price_input = st.slider(
        "Maximum price (â‚¹)",
        min_value=0,
        max_value=200,
        value=0,
        step=5,
        help="Set 0 for no maximum limit.",
    )
    max_price_val = max_price_input if max_price_input > 0 else None

    all_tags = sorted({tag for p in products_data for tag in p.tags})
    required_tags = st.multiselect(
        "Filter by tags (optional)",
        options=all_tags,
        default=[],
        help="Tags are used internally for filtering; they are not shown in the product name.",
    )

    brands = sorted({p.brand for p in products_data})
    preferred_brand = st.selectbox(
        "Preferred brand (optional)",
        options=["(no preference)"] + brands,
    )
    preferred_brand_val = None if preferred_brand == "(no preference)" else preferred_brand

# Main search area
col_left, col_right = st.columns([2, 3])

with col_left:
    st.subheader("Search product")
    product_names = sorted(name_to_product_id.keys())

    # Simulate Amazon-type search: text input with suggestions
    search_text = st.text_input("Type product name", "")

    if search_text:
        matched_names = [
            name for name in product_names if search_text.lower() in name.lower()
        ]
    else:
        matched_names = product_names

    selected_name = st.selectbox(
        "Select from catalog",
        matched_names,
        index=0 if matched_names else None,
        key="product_select",
    )

    search_button = st.button("Search")

with col_right:
    st.subheader("Product & alternatives")

    if search_button and selected_name:
        requested, exact_info, alternatives, message = find_alternatives_catalog(
            requested_product_name=selected_name,
            max_price=max_price_val,
            required_tags=required_tags,
            preferred_brand=preferred_brand_val,
        )

        st.write(message)

        if requested:
            st.markdown("**Searched product**")
            with st.container():
                st.markdown(
                    f"""
                    <div class="product-card">
                        <div class="product-title">{requested.name}</div>
                        <div class="product-meta">
                            Category: {requested.category} | Brand: {requested.brand}
                        </div>
                        <div class="product-price">â‚¹{requested.price}</div>
                        <div class="product-meta">In stock: {requested.in_stock}</div>
                    </div>
                    """,
                    unsafe_allow_html=True,
                )

        if exact_info:
            exact, rule_tags = exact_info
            explanation = build_explanation(rule_tags, required_tags)
            st.success("Exact item available")
            st.markdown("**You can buy this item:**")
            st.markdown(
                f"""
                <div class="product-card">
                    <div class="product-title">{exact.name}</div>
                    <div class="product-meta">
                        Category: {exact.category} | Brand: {exact.brand}
                    </div>
                    <div class="product-price">â‚¹{exact.price}</div>
                    <div class="product-meta">In stock: {exact.in_stock}</div>
                    <div class="product-meta"><b>Why suggested:</b> {explanation}</div>
                </div>
                """,
                unsafe_allow_html=True,
            )
        elif alternatives:
            st.markdown("**Alternative options**")
            for alt, rule_tags in alternatives:
                explanation = build_explanation(rule_tags, required_tags)
                st.markdown(
                    f"""
                    <div class="product-card">
                        <div class="product-title">{alt.name}</div>
                        <div class="product-meta">
                            Category: {alt.category} | Brand: {alt.brand}
                        </div>
                        <div class="product-price">â‚¹{alt.price}</div>
                        <div class="product-meta">In stock: {alt.in_stock}</div>
                        <div class="product-meta"><b>Why suggested:</b> {explanation}</div>
                    </div>
                    """,
                    unsafe_allow_html=True,
                )
        else:
            if search_button:
                st.warning("No suitable alternatives found with current filters.")
'''
with open("app.py", "w", encoding="utf-8") as f:
    f.write(streamlit_app_code)