In [7]:
!pip install numpy==1.26.4 --force-reinstall
!pip install scikit-surprise
!pip install pandas scikit-learn


Collecting numpy==1.26.4
  Using cached numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
Using cached numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.2.6
    Uninstalling numpy-2.2.6:
      Successfully uninstalled numpy-2.2.6
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires pandas==2.2.2, but you have pandas 2.2.3 which is incompatible.
thinc 8.3.6 requires numpy<3.0.0,>=2.0.0, but you have numpy 1.26.4 which is incompatible.[0m[31m
[0mSuccessfully installed numpy-1.26.4


In [1]:
import pandas as pd

# Load the raw dataset
df = pd.read_csv('/content/amazon.csv')  # Or re-upload and use /content/ path if needed

# Clean the `rating` column to extract valid floats
df['rating'] = df['rating'].astype(str).str.extract(r'(\d+\.?\d*)')
df.dropna(subset=['rating'], inplace=True)
df['rating'] = df['rating'].astype(float)

# Use review_title (shorter and easier to split)
df['user_id'] = df['user_id'].apply(lambda x: str(x).split(','))
df['review_title'] = df['review_title'].apply(lambda x: str(x).split(','))

# Filter out mismatched rows
df = df[df['user_id'].apply(len) == df['review_title'].apply(len)]

# Explode
df = df.explode(['user_id', 'review_title']).reset_index(drop=True)

# Rename
df = df.rename(columns={'review_title': 'review_text'})

# Clean and strip
df = df[['user_id', 'product_id', 'review_text', 'rating']].copy()
df['user_id'] = df['user_id'].str.strip()
df['product_id'] = df['product_id'].str.strip()
df['review_text'] = df['review_text'].str.strip()

# Final clean
df.dropna(inplace=True)
df = df[df['review_text'] != '']

# Save cleaned file
df.to_csv('/content/clean_amazon_reviews.csv', index=False)
print("✅ Cleaned file saved as: clean_amazon_reviews.csv")


✅ Cleaned file saved as: clean_amazon_reviews.csv


In [2]:
from surprise import SVD, Dataset, Reader
from surprise.model_selection import train_test_split
from surprise.accuracy import rmse

# Load cleaned dataset
df = pd.read_csv('/content/clean_amazon_reviews.csv')

# Setup for Surprise library
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df[['user_id', 'product_id', 'rating']], reader)

# Split into train/test
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# Train SVD
svd_model = SVD()
svd_model.fit(trainset)

# Predict and evaluate
predictions = svd_model.test(testset)
print("✅ SVD RMSE:")
rmse(predictions)


✅ SVD RMSE:
RMSE: 0.1767


0.17674777911788875

In [3]:
!pip install -U sentence-transformers




In [4]:
from sentence_transformers import SentenceTransformer
import numpy as np
from tqdm import tqdm

# Load BERT model (small but powerful variant)
bert_model = SentenceTransformer('all-MiniLM-L6-v2')


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [5]:
# Load cleaned dataset
df = pd.read_csv('/content/clean_amazon_reviews.csv')

# Remove duplicates just in case
df.drop_duplicates(subset=['user_id', 'product_id'], inplace=True)

# Generate BERT embeddings
tqdm.pandas()
df['bert_vector'] = df['review_text'].progress_apply(lambda x: bert_model.encode(x))

# Save for fusion
df.to_pickle('/content/bert_embeddings.pkl')
print("✅ BERT embeddings generated and saved.")


100%|██████████| 7099/7099 [02:24<00:00, 49.26it/s]

✅ BERT embeddings generated and saved.





In [6]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn

# Reload data with BERT vectors
df = pd.read_pickle('/content/bert_embeddings.pkl')

# Map user and product IDs to internal SVD indices
user_map = {uid: i for i, uid in enumerate(df['user_id'].unique())}
item_map = {pid: i for i, pid in enumerate(df['product_id'].unique())}

df['user_idx'] = df['user_id'].map(user_map)
df['item_idx'] = df['product_id'].map(item_map)

# Get latent factors
def get_user_item_vectors(row):
    u_vec = svd_model.pu[row['user_idx']]
    i_vec = svd_model.qi[row['item_idx']]
    return u_vec, i_vec

df['svd_user_vec'], df['svd_item_vec'] = zip(*df.apply(get_user_item_vectors, axis=1))


IndexError: index 5018 is out of bounds for axis 0 with size 5018

In [7]:
# Helper: safely map to internal SVD indices
def get_user_item_vectors(row):
    try:
        uid = svd_model.trainset.to_inner_uid(row['user_id'])
        iid = svd_model.trainset.to_inner_iid(row['product_id'])
        u_vec = svd_model.pu[uid]
        i_vec = svd_model.qi[iid]
        return u_vec, i_vec
    except ValueError:
        return np.nan, np.nan  # If user/item was unseen during training

# Apply the mapping
df['svd_user_vec'], df['svd_item_vec'] = zip(*df.apply(get_user_item_vectors, axis=1))

# Drop rows where user/item vector wasn't found
df.dropna(subset=['svd_user_vec', 'svd_item_vec'], inplace=True)


In [8]:
import numpy as np
import pandas as pd

# Load cleaned and BERT-enhanced data
df = pd.read_pickle('/content/bert_embeddings.pkl')

# Helper to get SVD latent vectors using Surprise internal mappings
def get_user_item_vectors(row):
    try:
        # Use Surprise's internal index mapping
        uid = svd_model.trainset.to_inner_uid(row['user_id'])
        iid = svd_model.trainset.to_inner_iid(row['product_id'])
        u_vec = svd_model.pu[uid]
        i_vec = svd_model.qi[iid]
        return u_vec, i_vec
    except ValueError:
        # Happens when user/item wasn't in the trainset
        return np.nan, np.nan

# Apply to all rows
user_item_vecs = df.apply(get_user_item_vectors, axis=1)

# Unpack results
df['svd_user_vec'] = user_item_vecs.apply(lambda x: x[0])
df['svd_item_vec'] = user_item_vecs.apply(lambda x: x[1])

# Drop any rows where either vector is missing
df.dropna(subset=['svd_user_vec', 'svd_item_vec'], inplace=True)

# Confirm it worked
print("✅ Vector extraction complete.")
print(df[['user_id', 'product_id', 'rating']].head(3))
print("User vector shape:", df['svd_user_vec'].iloc[0].shape)
print("Item vector shape:", df['svd_item_vec'].iloc[0].shape)


✅ Vector extraction complete.
                        user_id  product_id  rating
0  AG3D6O4STAQKAY2UVGEUV46KN35Q  B07JW9H4J1     4.2
1  AHMY5CWJMMK5BJRBBSNLYT3ONILA  B07JW9H4J1     4.2
2  AHCTC6ULH4XB6YHDY6PCH2R772LQ  B07JW9H4J1     4.2
User vector shape: (100,)
Item vector shape: (100,)


In [9]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn

class HybridDataset(Dataset):
    def __init__(self, df):
        self.svd_user = df['svd_user_vec'].tolist()
        self.svd_item = df['svd_item_vec'].tolist()
        self.bert_vecs = df['bert_vector'].tolist()
        self.ratings = df['rating'].tolist()

    def __len__(self):
        return len(self.ratings)

    def __getitem__(self, idx):
        user = torch.tensor(self.svd_user[idx], dtype=torch.float32)
        item = torch.tensor(self.svd_item[idx], dtype=torch.float32)
        bert = torch.tensor(self.bert_vecs[idx], dtype=torch.float32)
        rating = torch.tensor(self.ratings[idx], dtype=torch.float32)
        return user, item, bert, rating

# Create DataLoader
dataset = HybridDataset(df)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

print("✅ Hybrid dataset ready. Total samples:", len(dataset))


✅ Hybrid dataset ready. Total samples: 5995


In [10]:
# Save as pickle for structured storage
df.to_pickle('/content/final_hybrid_dataset.pkl')
print("✅ Saved: /content/final_hybrid_dataset.pkl")


✅ Saved: /content/final_hybrid_dataset.pkl


In [11]:
df = pd.read_pickle('/content/final_hybrid_dataset.pkl')


In [12]:
class CrossAttentionRecommender(nn.Module):
    def __init__(self, svd_dim=100, bert_dim=384, hidden_dim=128):
        super().__init__()
        self.user_proj = nn.Linear(svd_dim, hidden_dim)
        self.item_proj = nn.Linear(svd_dim, hidden_dim)
        self.bert_proj = nn.Linear(bert_dim, hidden_dim)

        self.attn = nn.MultiheadAttention(embed_dim=hidden_dim, num_heads=4, batch_first=True)

        self.mlp = nn.Sequential(
            nn.Linear(hidden_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )

    def forward(self, user_vec, item_vec, bert_vec):
        u = self.user_proj(user_vec)
        i = self.item_proj(item_vec)
        b = self.bert_proj(bert_vec)

        x = torch.stack([u, i, b], dim=1)  # Shape: (batch, 3, hidden)
        attn_out, _ = self.attn(x, x, x)

        fused = attn_out.mean(dim=1)  # Mean-pool across sequence
        return self.mlp(fused).squeeze()


In [13]:
model = CrossAttentionRecommender()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.MSELoss()

# Training loop
epochs = 5  # You can increase this for better performance
for epoch in range(epochs):
    total_loss = 0
    model.train()
    for user, item, bert, rating in dataloader:
        pred = model(user, item, bert)
        loss = loss_fn(pred, rating)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    avg_loss = total_loss / len(dataloader)
    print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}")


Epoch 1/5, Loss: 1.5041
Epoch 2/5, Loss: 0.0830
Epoch 3/5, Loss: 0.0749
Epoch 4/5, Loss: 0.0730
Epoch 5/5, Loss: 0.0746


In [14]:
def recommend_for_user(user_id, df, model, top_n=5):
    # Ensure user exists
    if user_id not in df['user_id'].values:
        print("User not found in dataset.")
        return []

    # Get all products the user hasn't rated yet
    user_df = df[df['user_id'] == user_id]
    rated_products = set(user_df['product_id'])

    all_products = df['product_id'].unique()
    unrated_products = [pid for pid in all_products if pid not in rated_products]

    recommendations = []

    user_vec = user_df['svd_user_vec'].iloc[0]
    user_bert_vec = user_df['bert_vector'].iloc[0]  # Use avg review BERT for simplicity

    for pid in unrated_products:
        prod_df = df[df['product_id'] == pid]
        if len(prod_df) == 0:
            continue
        item_vec = prod_df['svd_item_vec'].iloc[0]

        # Prepare input tensors
        u = torch.tensor(user_vec, dtype=torch.float32).unsqueeze(0)
        i = torch.tensor(item_vec, dtype=torch.float32).unsqueeze(0)
        b = torch.tensor(user_bert_vec, dtype=torch.float32).unsqueeze(0)

        # Predict
        model.eval()
        with torch.no_grad():
            pred_rating = model(u, i, b).item()

        recommendations.append((pid, pred_rating))

    # Sort and return top-N
    recommendations.sort(key=lambda x: x[1], reverse=True)
    return recommendations[:top_n]


In [15]:
user_to_recommend = df['user_id'].iloc[0]  # You can change this to any user_id from your data

top_recs = recommend_for_user(user_to_recommend, df, model, top_n=5)

print(f"\nTop 5 recommendations for user `{user_to_recommend}`:\n")
for pid, score in top_recs:
    product_name = df[df['product_id'] == pid]['review_text'].iloc[0]
    print(f"→ Product ID: {pid} | Predicted Rating: {score:.2f} | Sample Review: {product_name}")



Top 5 recommendations for user `AG3D6O4STAQKAY2UVGEUV46KN35Q`:

→ Product ID: B009P2LK08 | Predicted Rating: 4.71 | Sample Review: Poor packaging
→ Product ID: B08RDWBYCQ | Predicted Rating: 4.60 | Sample Review: बढिया है।वजन कम होने की वजह से जादा देर तक चला सकते है।
→ Product ID: B0B6BLTGTT | Predicted Rating: 4.59 | Sample Review: Noise pulse2max smart watch is awesome and looks good
→ Product ID: B01LONQBDG | Predicted Rating: 4.57 | Sample Review: Very good.
→ Product ID: B0B1YZ9CB8 | Predicted Rating: 4.57 | Sample Review: Wonderful TV and Awful installation service from amazon


In [23]:
def search_and_fuse_recommendations(query, user_id, df, model, bert_model, top_n=5, alpha=0.5):
    """
    query: Natural language text input from user
    user_id: ID of user to fetch SVD predictions
    df: DataFrame with bert_vector, svd vectors, etc.
    model: Trained attention fusion model
    bert_model: SentenceTransformer instance
    alpha: fusion weight between BERT similarity and SVD prediction
    """

    # 1. Encode query using BERT
    query_vec = bert_model.encode(query)

    # 2. Prepare grouped product vectors and product names
    grouped = df.groupby('product_id')
    product_names = grouped['review_text'].first()
    product_titles = grouped['product_id'].first()

    product_bert_vecs = grouped['bert_vector'].apply(lambda x: np.mean(x.tolist(), axis=0))
    product_svd_vecs = grouped['svd_item_vec'].apply(lambda x: np.mean(x.tolist(), axis=0))

    # 3. Compute cosine similarities
    similarities = cosine_similarity([query_vec], product_bert_vecs.tolist())[0]

    # 4. Get user SVD + BERT vector
    user_df = df[df['user_id'] == user_id]
    if user_df.empty:
        print("⚠️ User ID not found in training data.")
        return []

    user_svd_vec = np.mean(user_df['svd_user_vec'].tolist(), axis=0)
    user_bert_vec = np.mean(user_df['bert_vector'].tolist(), axis=0)

    recommendations = []
    for pid in product_bert_vecs.index:
        if pid in set(user_df['product_id']):
             continue

        item_svd_vec = product_svd_vecs[pid]
        item_bert_vec = product_bert_vecs[pid]

    # Predict rating
        u = torch.tensor(user_svd_vec, dtype=torch.float32).unsqueeze(0)
        i = torch.tensor(item_svd_vec, dtype=torch.float32).unsqueeze(0)
        b = torch.tensor(user_bert_vec, dtype=torch.float32).unsqueeze(0)

        with torch.no_grad():
             rating_pred = model(u, i, b).item()

    # Get similarity index correctly
        sim_score = cosine_similarity([query_vec], [item_bert_vec])[0][0]

        fusion_score = alpha * sim_score + (1 - alpha) * rating_pred
        recommendations.append((pid, product_names[pid], fusion_score, sim_score, rating_pred))




    # Sort by fused score
    recommendations.sort(key=lambda x: x[2], reverse=True)

    # Print top-N
    print(f"\n🔍 Top {top_n} results for: \"{query}\"")
    for pid, name, fused, sim, pred in recommendations[:top_n]:
        print(f"→ {pid} | {name[:60]}... | Sim: {sim:.3f} | Pred: {pred:.2f} | Fused: {fused:.3f}")


In [31]:
# Load original CSV with product_name
metadata = pd.read_csv('/content/amazon.csv')[['product_id', 'product_name', 'img_link', 'discounted_price', 'product_link']]
metadata = metadata.drop_duplicates(subset='product_id')


In [33]:
# Merge into your working df
df = pd.merge(df, metadata, on='product_id', how='left')


In [34]:
from sklearn.metrics.pairwise import cosine_similarity

def search_and_fuse_recommendations(query, user_id, df, model, bert_model, top_n=5, alpha=0.5):
    # Encode query with BERT
    query_vec = bert_model.encode(query)

    # Prepare grouped product vectors
    grouped = df.groupby('product_id')
    product_bert_vecs = grouped['bert_vector'].apply(lambda x: np.mean(x.tolist(), axis=0))
    product_svd_vecs = grouped['svd_item_vec'].apply(lambda x: np.mean(x.tolist(), axis=0))
    product_names = df.groupby('product_id')['product_name'].first()


    # Get user latent + BERT vector
    user_df = df[df['user_id'] == user_id]
    if user_df.empty:
        print("⚠️ User not found.")
        return []

    user_svd_vec = np.mean(user_df['svd_user_vec'].tolist(), axis=0)
    user_bert_vec = np.mean(user_df['bert_vector'].tolist(), axis=0)

    recommendations = []

    for pid in product_bert_vecs.index:
        if pid in set(user_df['product_id']):
            continue

        item_bert_vec = product_bert_vecs[pid]
        item_svd_vec = product_svd_vecs[pid]

        # Similarity
        sim_score = cosine_similarity([query_vec], [item_bert_vec])[0][0]

        # Predict rating
        u = torch.tensor(user_svd_vec, dtype=torch.float32).unsqueeze(0)
        i = torch.tensor(item_svd_vec, dtype=torch.float32).unsqueeze(0)
        b = torch.tensor(user_bert_vec, dtype=torch.float32).unsqueeze(0)

        with torch.no_grad():
            rating_pred = model(u, i, b).item()

        # Fused score
        fusion_score = alpha * sim_score + (1 - alpha) * rating_pred

        # Safe name lookup
        product_name = product_names.get(pid, "No Name Available")

        recommendations.append((pid, product_name, fusion_score, sim_score, rating_pred))

    # Sort and display
    recommendations.sort(key=lambda x: x[2], reverse=True)

    print(f"\n🔍 Top {top_n} matches for: \"{query}\"")
    for pid, name, fused, sim, pred in recommendations[:top_n]:
        print(f"→ {pid} | {name[:60]}... | Sim: {sim:.3f} | Pred: {pred:.2f} | Fused: {fused:.3f}")


In [24]:
import random

random_user = random.choice(df['user_id'].unique().tolist())
print("🔀 Random User ID:", random_user)


🔀 Random User ID: AFX6NQOSMDSQWMBRDX6NUHNLZEYA


In [37]:
from sklearn.metrics.pairwise import cosine_similarity
search_and_fuse_recommendations(
    query="I need a charging cable",
    user_id="AFX6NQOSMDSQWMBRDX6NUHNLZEYA",  # Replace with valid user_id
    df=df,
    model=model,
    bert_model=bert_model,
    top_n=5,
    alpha=0.5
)



🔍 Top 5 matches for: "I need a charging cable"
→ B0B4T6MR8N | pTron Solero M241 2.4A Micro USB Data & Charging Cable, Made... | Sim: 0.634 | Pred: 4.07 | Fused: 2.353
→ B09Q8WQ5QJ | Portronics Konnect L 60W PD Type C to Type C Mobile Charging... | Sim: 0.569 | Pred: 4.13 | Fused: 2.351
→ B01LONQBDG | AmazonBasics USB Type-C to Micro-B 2.0 Cable - 6 Inches (15.... | Sim: 0.358 | Pred: 4.27 | Fused: 2.315
→ B0B4HKH19N | pTron Solero 331 3.4Amps Multifunction Fast Charging Cable, ... | Sim: 0.634 | Pred: 3.98 | Fused: 2.308
→ B0B21C4BMX | Portronics Konnect Spydr 31 3-in-1 Multi Functional Cable wi... | Sim: 0.445 | Pred: 4.15 | Fused: 2.300


In [38]:
from sklearn.metrics.pairwise import cosine_similarity
search_and_fuse_recommendations(
    query="I need a smart phone",
    user_id="AFX6NQOSMDSQWMBRDX6NUHNLZEYA",  # Replace with valid user_id
    df=df,
    model=model,
    bert_model=bert_model,
    top_n=5,
    alpha=0.5
)


🔍 Top 5 matches for: "I need a smart phone"
→ B09TWHTBKQ | Samsung Galaxy M33 5G (Mystique Green, 8GB, 128GB Storage) |... | Sim: 0.516 | Pred: 4.14 | Fused: 2.329
→ B0B14MR9L1 | Samsung Galaxy M33 5G (Emerald Brown, 6GB, 128GB Storage) | ... | Sim: 0.516 | Pred: 4.12 | Fused: 2.318
→ B0B6BLTGTT | Noise Pulse 2 Max Advanced Bluetooth Calling Smart Watch wit... | Sim: 0.306 | Pred: 4.29 | Fused: 2.297
→ B09YDFDVNS | Nokia 105 Plus Single SIM, Keypad Mobile Phone with Wireless... | Sim: 0.449 | Pred: 4.14 | Fused: 2.296
→ B009P2LK08 | Bajaj Minor 1000 Watts Radiant Room Heater (Steel, ISI Appro... | Sim: 0.180 | Pred: 4.41 | Fused: 2.296


In [41]:
from sklearn.metrics.pairwise import cosine_similarity
search_and_fuse_recommendations(
    query=" Need a water heater",
    user_id="AFX6NQOSMDSQWMBRDX6NUHNLZEYA",  # Replace with valid user_id
    df=df,
    model=model,
    bert_model=bert_model,
    top_n=5,
    alpha=0.5
)


🔍 Top 5 matches for: " Need a water heater"
→ B009P2LK08 | Bajaj Minor 1000 Watts Radiant Room Heater (Steel, ISI Appro... | Sim: 0.268 | Pred: 4.41 | Fused: 2.340
→ B09N3BFP4M | Bajaj New Shakti Neo Plus 15 Litre 4 Star Rated Storage Wate... | Sim: 0.594 | Pred: 3.98 | Fused: 2.286
→ B09MQ9PDHR | SaiEllin Room Heater For Home 2000 Watts Room Heater For Bed... | Sim: 0.453 | Pred: 4.04 | Fused: 2.248
→ B08RDWBYCQ | T TOPLINE 180 W Electric Hand Mixer,Hand Blender , Egg Beate... | Sim: 0.168 | Pred: 4.30 | Fused: 2.232
→ B06XR9PR5X | Amazon Basics HDMI Coupler,Black... | Sim: 0.167 | Pred: 4.27 | Fused: 2.218


In [42]:
def recommend_for_new_user(query, df, bert_model, top_n=5):
    query_vec = bert_model.encode(query)

    # Group product BERT vectors and metadata
    product_bert_vecs = df.groupby('product_id')['bert_vector'].apply(lambda x: np.mean(x.tolist(), axis=0))
    product_names = df.groupby('product_id')['product_name'].first()
    product_links = df.groupby('product_id')['product_link'].first()
    product_prices = df.groupby('product_id')['discounted_price'].first()

    # Compute cosine similarity
    similarities = cosine_similarity([query_vec], product_bert_vecs.tolist())[0]

    # Collect top results
    top_indices = np.argsort(similarities)[::-1][:top_n]
    top_pids = product_bert_vecs.index[top_indices]

    print(f"\n🔍 Top {top_n} results for new user query: \"{query}\"")
    for pid in top_pids:
        name = product_names.get(pid, "No Name Available")
        link = product_links.get(pid, "")
        price = product_prices.get(pid, "N/A")
        sim = similarities[list(product_bert_vecs.index).index(pid)]
        print(f"→ {name[:60]}... | ₹{price} | Sim: {sim:.3f} | [🔗 Product]({link})")


In [43]:
recommend_for_new_user(
    query="Looking for a fast-charging cable for iPhone",
    df=df,
    bert_model=bert_model,
    top_n=5
)



🔍 Top 5 results for new user query: "Looking for a fast-charging cable for iPhone"
→ pTron Solero T241 2.4A Type-C Data & Charging USB Cable, Mad... | ₹₹99 | Sim: 0.605 | [🔗 Product](https://www.amazon.in/pTron-Charging-480Mbps-Durable-Smartphone/dp/B0B4T8RSJ1/ref=sr_1_448?qid=1672909146&s=electronics&sr=1-448)
→ pTron Solero M241 2.4A Micro USB Data & Charging Cable, Made... | ₹₹89 | Sim: 0.605 | [🔗 Product](https://www.amazon.in/pTron-Charging-480Mbps-Durable-1-Meter/dp/B0B4T6MR8N/ref=sr_1_287?qid=1672909138&s=electronics&sr=1-287)
→ pTron Solero T351 3.5Amps Fast Charging Type-C to Type-C PD ... | ₹₹199 | Sim: 0.605 | [🔗 Product](https://www.amazon.in/pTron-3-5Amps-Charging-480Mbps-Smartphones/dp/B0B4HJNPV4/ref=sr_1_38?qid=1672909125&s=electronics&sr=1-38)
→ pTron Solero 331 3.4Amps Multifunction Fast Charging Cable, ... | ₹₹249 | Sim: 0.605 | [🔗 Product](https://www.amazon.in/pTron-3-4Amps-Multifunction-Charging-Tangle-free/dp/B0B4HKH19N/ref=sr_1_155?qid=1672909131&s=electronics&s

In [44]:
recommend_for_new_user(
    query="Looking for a phone with good camera",
    df=df,
    bert_model=bert_model,
    top_n=5
)



🔍 Top 5 results for new user query: "Looking for a phone with good camera"
→ Tecno Spark 9 (Sky Mirror, 6GB RAM,128GB Storage) | 11GB Exp... | ₹₹8,999 | Sim: 0.625 | [🔗 Product](https://www.amazon.in/Tecno-Spark-Storage-Expandable-Processor/dp/B0B56YRBNT/ref=sr_1_153?qid=1672895791&s=electronics&sr=1-153)
→ iQOO vivo Z6 5G (Chromatic Blue, 6GB RAM, 128GB Storage) | S... | ₹₹16,499 | Sim: 0.588 | [🔗 Product](https://www.amazon.in/iQOO-Chromatic-Storage-Snapdragon-Processor/dp/B07WGMMQGP/ref=sr_1_36?qid=1672895755&s=electronics&sr=1-36)
→ iQOO vivo Z6 5G (Dynamo Black, 6GB RAM, 128GB Storage) | Sna... | ₹₹16,499 | Sim: 0.588 | [🔗 Product](https://www.amazon.in/iQOO-Storage-Snapdragon-695-6nm-Processor/dp/B07WJWRNVK/ref=sr_1_136?qid=1672895784&s=electronics&sr=1-136)
→ iQOO vivo Z6 5G (Chromatic Blue, 8GB RAM, 128GB Storage) | S... | ₹₹17,999 | Sim: 0.588 | [🔗 Product](https://www.amazon.in/iQOO-Chromatic-Storage-Snapdragon-Processor/dp/B07WHQBZLS/ref=sr_1_123?qid=1672895784&s=electronic

In [46]:
recommend_for_new_user(
    query="looking for a geyser",
    df=df,
    bert_model=bert_model,
    top_n=5
)



🔍 Top 5 results for new user query: "looking for a geyser"
→ Racold Pronto Pro 3Litres 3KW Vertical Instant Water Heater ... | ₹₹2,949 | Sim: 0.360 | [🔗 Product](https://www.amazon.in/Racold-Pronto-3Litres-Vertical-Instant/dp/B097MKZHNV/ref=sr_1_391?qid=1672923612&s=kitchen&sr=1-391)
→ AMERICAN MICRONIC- Imported Wet & Dry Vacuum Cleaner, 21 Lit... | ₹₹8,886 | Sim: 0.355 | [🔗 Product](https://www.amazon.in/American-Micronic-AMI-VCD21-1600WDx-Wet-1600Watts-21-litres-Stainless/dp/B072NCN9M4/ref=sr_1_317?qid=1672923607&s=kitchen&sr=1-317)
→ Havells Immersion HB15 1500 Watt (White Blue)... | ₹₹719 | Sim: 0.333 | [🔗 Product](https://www.amazon.in/Havells-Immersion-HB15-1500-White/dp/B088ZTJT2R/ref=sr_1_48_mod_primary_new?qid=1672923592&s=kitchen&sbo=RZvfv%2F%2FHxDF%2BO5021pAnSA%3D%3D&sr=1-48)
→ Campfire Spring Chef Prolix Instant Portable Water Heater Ge... | ₹₹1,499 | Sim: 0.333 | [🔗 Product](https://www.amazon.in/Spring-Chef-Stainless-Restaurant-Installation/dp/B0BP89YBC1/ref=sr_1_419?qi

In [47]:
recommend_for_new_user(
    query="in search of a geyser",
    df=df,
    bert_model=bert_model,
    top_n=5
)



🔍 Top 5 results for new user query: "in search of a geyser"
→ Racold Pronto Pro 3Litres 3KW Vertical Instant Water Heater ... | ₹₹2,949 | Sim: 0.339 | [🔗 Product](https://www.amazon.in/Racold-Pronto-3Litres-Vertical-Instant/dp/B097MKZHNV/ref=sr_1_391?qid=1672923612&s=kitchen&sr=1-391)
→ AMERICAN MICRONIC- Imported Wet & Dry Vacuum Cleaner, 21 Lit... | ₹₹8,886 | Sim: 0.325 | [🔗 Product](https://www.amazon.in/American-Micronic-AMI-VCD21-1600WDx-Wet-1600Watts-21-litres-Stainless/dp/B072NCN9M4/ref=sr_1_317?qid=1672923607&s=kitchen&sr=1-317)
→ SUJATA Powermatic Plus, Juicer Mixer Grinder, 900 Watts, 2 J... | ₹₹5,865 | Sim: 0.311 | [🔗 Product](https://www.amazon.in/Powermatic-Plus-CH-900-Watt-Grinder/dp/B07D8VBYB4/ref=sr_1_432?qid=1672923613&s=kitchen&sr=1-432)
→ Havells Instanio 1-Litre 3KW Instant Water Heater (Geyser), ... | ₹₹2,599 | Sim: 0.310 | [🔗 Product](https://www.amazon.in/Havells-Instanio-1-Litre-Instant-Geyser/dp/B078JBK4GX/ref=sr_1_244?qid=1672923605&s=kitchen&sr=1-244)
→ Baja

In [48]:
recommend_for_new_user(
    query="in search of air fryer",
    df=df,
    bert_model=bert_model,
    top_n=5
)


🔍 Top 5 results for new user query: "in search of air fryer"
→ PHILIPS Digital Air Fryer HD9252/90 with Touch Panel, uses u... | ₹₹8,799 | Sim: 0.630 | [🔗 Product](https://www.amazon.in/PHILIPS-Digital-HD9252-90-Technology/dp/B097RJ867P/ref=sr_1_107?qid=1672923596&s=kitchen&sr=1-107)
→ Pigeon Healthifry Digital Air Fryer, 360° High Speed Air Cir... | ₹₹3,599 | Sim: 0.466 | [🔗 Product](https://www.amazon.in/Pigeon-Healthifry-Circulation-Technology-Non-Stick/dp/B0B8XNPQPN/ref=sr_1_44?qid=1672923592&s=kitchen&sr=1-44)
→ SaiEllin Room Heater For Home 2000 Watts Room Heater For Bed... | ₹₹979 | Sim: 0.440 | [🔗 Product](https://www.amazon.in/SaiEllin-Heater-Portable-Bedroom-Compact/dp/B09MQ9PDHR/ref=sr_1_364?qid=1672923611&s=kitchen&sr=1-364)
→ Bajaj New Shakti Neo Plus 15 Litre 4 Star Rated Storage Wate... | ₹₹5,499 | Sim: 0.412 | [🔗 Product](https://www.amazon.in/Bajaj-New-Shakti-Neo-Storage/dp/B09N3BFP4M/ref=sr_1_291?qid=1672923606&s=kitchen&sr=1-291)
→ Candes Gloster All in One Silent B

In [49]:
recommend_for_new_user(
    query="coffee machine",
    df=df,
    bert_model=bert_model,
    top_n=5
)


🔍 Top 5 results for new user query: "coffee machine"
→ PHILIPS Drip Coffee Maker HD7432/20, 0.6 L, Ideal for 2-7 cu... | ₹₹2,999 | Sim: 0.733 | [🔗 Product](https://www.amazon.in/PHILIPS-Coffee-HD7432-20-Medium/dp/B09H7JDJCW/ref=sr_1_483?qid=1672923615&s=kitchen&sr=1-483)
→ InstaCuppa Milk Frother for Coffee - Handheld Battery-Operat... | ₹₹1,099 | Sim: 0.595 | [🔗 Product](https://www.amazon.in/InstaCuppa-Handheld-Operated-Electric-Stainless/dp/B0763K5HLQ/ref=sr_1_350?qid=1672923610&s=kitchen&sr=1-350)
→ Saiyam Stainless Steel Espresso Maker Stovetop Coffee Percol... | ₹₹599 | Sim: 0.494 | [🔗 Product](https://www.amazon.in/Saiyam-Stainless-Espresso-Maker-Percolator/dp/B095K14P86/ref=sr_1_394?qid=1672923612&s=kitchen&sr=1-394)
→ SUJATA Powermatic Plus, Juicer Mixer Grinder, 900 Watts, 2 J... | ₹₹5,865 | Sim: 0.491 | [🔗 Product](https://www.amazon.in/Powermatic-Plus-CH-900-Watt-Grinder/dp/B07D8VBYB4/ref=sr_1_432?qid=1672923613&s=kitchen&sr=1-432)
→ Cafe JEI French Press Coffee and Tea Ma