In [27]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity  

In [28]:

df = pd.read_csv('amazon.csv')

df['product_name'] = df['product_name'].fillna('')
df['about_product'] = df['about_product'].fillna('')
df['category'] = df['category'].fillna('')

df['combined'] = df['product_name'] + ' ' + df['about_product'] + ' ' + df['category']


In [29]:
# Vectorize the combined text
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['combined'])

cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)


In [30]:

def get_recommendations(query, top_n=5):
   
    query_vec = tfidf.transform([query])
    
   
    sim_scores = cosine_similarity(query_vec, tfidf_matrix).flatten()
    
  
    top_indices = sim_scores.argsort()[-top_n:][::-1]

    recommendations = []
    for idx in top_indices:
        recommendations.append({
            'Title': df.iloc[idx]['product_name'],
            'Similarity': round(sim_scores[idx], 3),
            'Image': df.iloc[idx]['img_link'],
            'Price': df.iloc[idx]['discounted_price'],
            'Rating': df.iloc[idx]['rating']
        })

    return pd.DataFrame(recommendations)


In [31]:

product_to_search = "boAt"

recommendations = get_recommendations(product_to_search, top_n=5)
recommendations


Unnamed: 0,Title,Similarity,Image,Price,Rating
0,boAt Rugged V3 Braided Micro USB Cable (Pearl ...,0.319,https://m.media-amazon.com/images/W/WEBP_40237...,₹299,4.2
1,boAt Rugged v3 Extra Tough Unbreakable Braided...,0.309,https://m.media-amazon.com/images/I/41SDfuK7L2...,₹299,4.2
2,boAt Rugged v3 Extra Tough Unbreakable Braided...,0.309,https://m.media-amazon.com/images/I/41SDfuK7L2...,₹299,4.2
3,Sounce Protective Case Cover Compatible Boat X...,0.286,https://m.media-amazon.com/images/I/4155YhLwDi...,₹199,4.0
4,"boAt Type C A750 Stress Resistant, Tangle-free...",0.24,https://m.media-amazon.com/images/I/41jlh3c7Ub...,₹399,4.1


In [32]:
import pickle

# Save the TF-IDF vectorizer and matrix
with open('tfidf_vectorizer.pkl', 'wb') as f:
    pickle.dump(tfidf, f)

with open('tfidf_matrix.pkl', 'wb') as f:
    pickle.dump(tfidf_matrix, f)

# Save the DataFrame too (optional but useful for metadata like product titles)
with open('products_df.pkl', 'wb') as f:
    pickle.dump(df, f)
