In [20]:
!pip install fastapi uvicorn colabcode sentence-transformers faiss-cpu


Collecting colabcode
  Downloading colabcode-0.3.0-py3-none-any.whl.metadata (2.5 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Collecting pyngrok>=5.0.0 (from colabcode)
  Downloading pyngrok-7.3.0-py3-none-any.whl.metadata (8.1 kB)
Collecting nest-asyncio==1.4.3 (from colabcode)
  Downloading nest_asyncio-1.4.3-py3-none-any.whl.metadata (2.6 kB)
Collecting uvicorn
  Downloading uvicorn-0.13.1-py3-none-any.whl.metadata (4.6 kB)
Requested uvicorn from https://files.pythonhosted.org/packages/ef/67/546c35e9fffb585ea0608ba3bdcafe17ae402e304367203d0b08d6c23051/uvicorn-0.13.1-py3-none-any.whl has invalid metadata: .* suffix can only be used with `==` or `!=` operators
    python-dotenv (>=0.13.*) ; extra == 'standard'
                   ~~~~~~~^
Please use pip<24.1 if you need to use this version.[0m[33m
[0mINFO: pip is looking at multiple versions of colabcode to determine which version is compatible

In [21]:
import csv, random, json
import numpy as np
import faiss
from datetime import datetime, timedelta
from sentence_transformers import SentenceTransformer

In [22]:
NUM = 100  # keep small for demo (can increase to 200+)
OUT = "dummy_products.csv"

IMAGE_BASE = "https://example.com/media/"
PRODUCT_TEMPLATES = [
    ("Portable Bluetooth Speaker","Compact speaker with 12h battery","image","speaker{n}.jpg", ['Electronics','Audio'], ['bluetooth','portable','bass']),
    ("Noise-canceling Headphones","Over-ear headphones with ANC","image","headphones{n}.jpg", ['Electronics','Audio'], ['ANC','wireless','comfort']),
    ("Travel Drone 4K","Lightweight drone with 4K camera","video","drone_demo{n}.mp4", ['Electronics','Drones'], ['4K','camera','gps']),
    ("Ergonomic Office Chair","Comfortable chair with lumbar support","image","chair{n}.jpg", ['Furniture','Office'], ['ergonomic','mesh','adjustable']),
    ("Smartwatch Pro","Fitness-tracking smartwatch","image","watch{n}.jpg", ['Electronics','Wearables'], ['heart-rate','gps','waterproof']),
    ("Coffee Maker 12-cup","Programmable drip coffee maker","image","coffee{n}.jpg", ['Home','Kitchen'], ['coffee','programmable','12cup']),
    ("4K Action Camera","Waterproof action camera 4K video","video","actioncam_demo{n}.mp4", ['Electronics','Cameras'], ['4K','waterproof','wide-angle']),
    ("Kids Educational Tablet","Tablet with child-safe apps","image","tablet{n}.jpg", ['Electronics','Kids'], ['education','parental-control','toddler']),
]

start_date = datetime(2025, 1, 1)

with open(OUT, "w", newline="", encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerow(['id','title','description','media_type','media_url','price','categories','tags','created_at'])
    for i in range(1, NUM+1):
        tid = random.choice(PRODUCT_TEMPLATES)
        title, desc, mtype, media_tmpl, cats, tags = tid
        media = media_tmpl.format(n=(i%10)+1)
        price = round(random.uniform(19.99, 499.99), 2)
        categories = ';'.join(cats)
        tags = ';'.join(tags)
        created = (start_date + timedelta(days=random.randint(0,90))).date().isoformat()
        pid = f"p_{i:04d}"
        writer.writerow([pid, title, desc, mtype, IMAGE_BASE+media, price, categories, tags, created])

print("✅ Dummy dataset created:", OUT)


✅ Dummy dataset created: dummy_products.csv


In [23]:
CSV = "dummy_products.csv"
INDEX_FILE = "products.faiss"
META_FILE = "products_meta.jsonl"

model = SentenceTransformer("all-MiniLM-L6-v2")

rows, texts = [], []
with open(CSV, newline='', encoding='utf-8') as f:
    reader = csv.DictReader(f)
    for r in reader:
        rows.append(r)
        text = f"{r['title']}. {r['description']}. Tags: {r['tags']}. Categories: {r['categories']}"
        texts.append(text)

print("Encoding", len(texts), "items...")
embs = model.encode(texts, show_progress_bar=True, convert_to_numpy=True)
d = embs.shape[1]

# normalize to unit length for cosine similarity
faiss.normalize_L2(embs)
index = faiss.IndexFlatIP(d)
index.add(embs)
faiss.write_index(index, INDEX_FILE)

with open(META_FILE, "w", encoding="utf-8") as f:
    for r in rows:
        f.write(json.dumps(r, ensure_ascii=False)+"\n")

print("✅ Embeddings & index ready")


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Encoding 100 items...


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

✅ Embeddings & index ready


In [24]:
# 4) Query inside Colab (no API)
# ------------------------------
def search(query, k=5):
    q_emb = model.encode([query], convert_to_numpy=True)
    faiss.normalize_L2(q_emb)
    D, I = index.search(q_emb, k)
    results = []
    for score, idx in zip(D[0].tolist(), I[0].tolist()):
        if idx < 0: continue
        m = rows[idx]
        results.append({
            "id": m["id"],
            "title": m["title"],
            "description": m["description"],
            "price": float(m["price"]),
            "tags": m["tags"],
            "score": float(score)
        })
    return results


In [25]:
print("🔎 Query: 'wireless headphones with ANC and comfort'")
print(search("wireless headphones with ANC and comfort", k=3))


🔎 Query: 'wireless headphones with ANC and comfort'
[{'id': 'p_0016', 'title': 'Noise-canceling Headphones', 'description': 'Over-ear headphones with ANC', 'price': 122.67, 'tags': 'ANC;wireless;comfort', 'score': 0.6815690398216248}, {'id': 'p_0015', 'title': 'Noise-canceling Headphones', 'description': 'Over-ear headphones with ANC', 'price': 124.57, 'tags': 'ANC;wireless;comfort', 'score': 0.6815690398216248}, {'id': 'p_0002', 'title': 'Noise-canceling Headphones', 'description': 'Over-ear headphones with ANC', 'price': 64.29, 'tags': 'ANC;wireless;comfort', 'score': 0.6815690398216248}]


In [26]:
print("🔎 Query: 'compact waterproof camera for biking'")
print(search("compact waterproof camera for biking", k=3))


🔎 Query: 'compact waterproof camera for biking'
[{'id': 'p_0049', 'title': '4K Action Camera', 'description': 'Waterproof action camera 4K video', 'price': 160.21, 'tags': '4K;waterproof;wide-angle', 'score': 0.5252586603164673}, {'id': 'p_0046', 'title': '4K Action Camera', 'description': 'Waterproof action camera 4K video', 'price': 275.83, 'tags': '4K;waterproof;wide-angle', 'score': 0.5252586603164673}, {'id': 'p_0017', 'title': '4K Action Camera', 'description': 'Waterproof action camera 4K video', 'price': 25.58, 'tags': '4K;waterproof;wide-angle', 'score': 0.5252586603164673}]


In [31]:
import gradio as gr

def search_ui(query):
    results = search(query, k=5)
    display = []
    for r in results:
        display.append([
            r.get("title", ""),
            r.get("description", ""),
            f"${r.get('price', 0.0)}",
            r.get("tags", ""),
            r.get("media_url", "N/A"),
            r.get("score", 0.0)
        ])
    return display


with gr.Blocks() as demo:
    gr.Markdown("## 🔎 AI-Powered Product Recommendation Engine")
    with gr.Row():
        query = gr.Textbox(label="Enter your product query", placeholder="e.g., waterproof camera for biking")
    btn = gr.Button("Search")
    results = gr.Dataframe(headers=["Title","Description","Price","Tags","Media URL","Score"], row_count=5)
    btn.click(fn=search_ui, inputs=query, outputs=results)

demo.launch(share=True, debug=True)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://860d5aeb20629a4ba4.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7862 <> https://860d5aeb20629a4ba4.gradio.live


