# How It Works

Preprocessing: Combines relevant features into a single column (clean_text).

Embedding Generation: Creates vector representations of product descriptions using Sentence-BERT.

Cosine Similarity: Matches the query to the most similar product embeddings.

Recommendation: Returns the top-k recommendations based on similarity scores.

# Load Dataset

In [1]:
import pandas as pd

df = pd.read_csv("C:\\Users\\Ganesh k\\OneDrive\\Desktop\\Amazon project\\Amazon_cleaned.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,name,main_category,sub_category,image,link,ratings,no_of_ratings,discount_price,actual_price,product,specifications
0,0,"Redmi 10 Power (Power Black, 8GB RAM, 128GB St...","tv, audio & cameras",All Electronics,https://m.media-amazon.com/images/I/81eM15lVcJ...,https://www.amazon.in/Redmi-Power-Black-128GB-...,4.0,965,10999,18999,Redmi 10 Power,"(Power Black, 8GB RAM, 128GB Storage)"
1,1,"OnePlus Nord CE 2 Lite 5G (Blue Tide, 6GB RAM,...","tv, audio & cameras",All Electronics,https://m.media-amazon.com/images/I/71AvQd3Vzq...,https://www.amazon.in/OnePlus-Nord-Lite-128GB-...,4.3,113956,18999,19999,OnePlus Nord CE 2 Lite 5G,"(Blue Tide, 6GB RAM, 128GB Storage)"
2,2,OnePlus Bullets Z2 Bluetooth Wireless in Ear E...,"tv, audio & cameras",All Electronics,https://m.media-amazon.com/images/I/51UhwaQXCp...,https://www.amazon.in/Oneplus-Bluetooth-Wirele...,4.2,90304,1999,2299,OnePlus Bullets Z2 Bluetooth Wireless in Ear E...,
3,3,"Samsung Galaxy M33 5G (Mystique Green, 6GB, 12...","tv, audio & cameras",All Electronics,https://m.media-amazon.com/images/I/81I3w4J6yj...,https://www.amazon.in/Samsung-Mystique-Storage...,4.1,24863,15999,24999,"Samsung Galaxy M33 5G (Mystique Green, 6GB, 12...",
4,4,"OnePlus Nord CE 2 Lite 5G (Black Dusk, 6GB RAM...","tv, audio & cameras",All Electronics,https://m.media-amazon.com/images/I/71V--WZVUI...,https://www.amazon.in/OnePlus-Nord-Black-128GB...,4.3,113956,18999,19999,OnePlus Nord CE 2 Lite 5G,"(Black Dusk, 6GB RAM, 128GB Storage)"


# Preprocessing & Cleaning

In [2]:
df.head()


Unnamed: 0.1,Unnamed: 0,name,main_category,sub_category,image,link,ratings,no_of_ratings,discount_price,actual_price,product,specifications
0,0,"Redmi 10 Power (Power Black, 8GB RAM, 128GB St...","tv, audio & cameras",All Electronics,https://m.media-amazon.com/images/I/81eM15lVcJ...,https://www.amazon.in/Redmi-Power-Black-128GB-...,4.0,965,10999,18999,Redmi 10 Power,"(Power Black, 8GB RAM, 128GB Storage)"
1,1,"OnePlus Nord CE 2 Lite 5G (Blue Tide, 6GB RAM,...","tv, audio & cameras",All Electronics,https://m.media-amazon.com/images/I/71AvQd3Vzq...,https://www.amazon.in/OnePlus-Nord-Lite-128GB-...,4.3,113956,18999,19999,OnePlus Nord CE 2 Lite 5G,"(Blue Tide, 6GB RAM, 128GB Storage)"
2,2,OnePlus Bullets Z2 Bluetooth Wireless in Ear E...,"tv, audio & cameras",All Electronics,https://m.media-amazon.com/images/I/51UhwaQXCp...,https://www.amazon.in/Oneplus-Bluetooth-Wirele...,4.2,90304,1999,2299,OnePlus Bullets Z2 Bluetooth Wireless in Ear E...,
3,3,"Samsung Galaxy M33 5G (Mystique Green, 6GB, 12...","tv, audio & cameras",All Electronics,https://m.media-amazon.com/images/I/81I3w4J6yj...,https://www.amazon.in/Samsung-Mystique-Storage...,4.1,24863,15999,24999,"Samsung Galaxy M33 5G (Mystique Green, 6GB, 12...",
4,4,"OnePlus Nord CE 2 Lite 5G (Black Dusk, 6GB RAM...","tv, audio & cameras",All Electronics,https://m.media-amazon.com/images/I/71V--WZVUI...,https://www.amazon.in/OnePlus-Nord-Black-128GB...,4.3,113956,18999,19999,OnePlus Nord CE 2 Lite 5G,"(Black Dusk, 6GB RAM, 128GB Storage)"


In [3]:
# Combine relevant features into a single text column
def clean_text(row):
    name = row['product'] if pd.notna(row['product']) else ""
    ratings = row['ratings'] if pd.notna(row['ratings']) else ""
    category = row['sub_category'] if pd.notna(row['sub_category']) else ""
    specs = row['specifications'].strip("()") if pd.notna(row['specifications']) else ""
    return f"{name} {ratings} {category} {specs}".lower()

df['clean_text'] = df.apply(clean_text, axis=1)
df = df.dropna(subset=['clean_text'])


# Create Embeddings

In [None]:
from sentence_transformers import SentenceTransformer

# Load a pre-trained Sentence-BERT model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Generate embeddings for product descriptions
df['embeddings'] = df['clean_text'].apply(lambda x: model.encode(x))


  from .autonotebook import tqdm as notebook_tqdm


# Load and Save Embeddings

In [None]:
# Save the embeddings to a file
df.to_pickle('product_embeddings.pkl')

# # Load the embeddings later
df = pd.read_pickle('product_embeddings.pkl')

In [None]:
df.head()


# Recommendation Engine

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Function to get recommendations based on a user query
def recommend_products(query, top_k=5):
    # lower case
    query = query.lower()
    # Embed the query
    query_embedding = model.encode(query)

    # Compute cosine similarity with all products
    df['similarity'] = df['embeddings'].apply(lambda x: cosine_similarity([query_embedding], [x]).flatten()[0])

    # Sort products by similarity score
    recommendations = df.sort_values(by='similarity', ascending=False).head(top_k)
    return recommendations[['name', 'product', 'actual_price', 'ratings', 'image']]

# Example usage
query = "8GB RAM smartphone"
recommendations = recommend_products(query)
recommendations

In [None]:
# Example usage
query = "Oppo Smart phone"
recommendations = recommend_products(query)
recommendations