# Product Recommendation â€” Intro Notebook

Minimal notebook to demonstrate a simple content-based product recommender for an e-commerce site.

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [None]:
# Example product catalog (replace with your real data load)
products = pd.DataFrame([
    {"id": 1, "title": "Wireless Bluetooth Headphones", "description": "Over-ear, noise-cancelling, 20h battery"},
    {"id": 2, "title": "USB-C Fast Charger", "description": "65W charger, compact design"},
    {"id": 3, "title": "Smart Fitness Watch", "description": "Heart rate, GPS, sleep tracking"},
    {"id": 4, "title": "Wireless Earbuds", "description": "True wireless, charging case, water resistant"},
    {"id": 5, "title": "Portable Bluetooth Speaker", "description": "Waterproof, 12h playtime, deep bass"},
    {"id": 6, "title": "Noise Cancelling Headphones", "description": "Over-ear, premium sound, long battery life"}
])
products

In [None]:
# Build TF-IDF on product descriptions (simple content-based approach)
tfidf = TfidfVectorizer(stop_words='english')
descs = products['description'].fillna('')
tfidf_matrix = tfidf.fit_transform(descs)
sim_matrix = cosine_similarity(tfidf_matrix)


In [None]:
def recommend(product_id, top_n=3):
    if product_id not in products['id'].values:
        raise ValueError('product_id not found')
    idx = products.index[products['id'] == product_id][0]
    sims = list(enumerate(sim_matrix[idx]))
    sims = sorted(sims, key=lambda x: x[1], reverse=True)
    # skip the item itself
    results = [products.iloc[i]['id'] for i, score in sims if i != idx]
    return results[:top_n]


In [None]:
# Example: recommend for product id 1
print('Recommendations for product 1:', recommend(1, top_n=3))


Notes:
- Replace the sample DataFrame with your real product catalog (CSV, DB, etc.).
- Consider richer features (title, category, tags) and preprocessing for better results.
- For large catalogs use approximate nearest neighbors for speed.