In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import pickle

In [2]:
data = pd.read_csv('amazon.csv')

In [3]:
# Convert data types and handle missing values
data['actual_price'] = pd.to_numeric(data['actual_price'].replace({'₹': ''}, regex=True), errors='coerce')
data['rating'] = pd.to_numeric(data['rating'], errors='coerce')
data['rating_count'] = pd.to_numeric(data['rating_count'], errors='coerce')

In [4]:
data['actual_price'] = pd.to_numeric(data['actual_price'].replace({'₹': ''}, regex=True), errors='coerce')
data['rating'] = pd.to_numeric(data['rating'], errors='coerce')
data['rating_count'] = pd.to_numeric(data['rating_count'], errors='coerce')

In [5]:
# Define columns for collaborative filtering
data_columns = ['rating', 'rating_count']
column_data = data[data_columns].fillna(0)

In [6]:
# Compute similarity matrix using cosine similarity
similarity_matrix = cosine_similarity(column_data)

In [7]:
# Initialize a dictionary to store collaborative filtering results
collaborative_filtering_dict = {}

In [8]:
# Function to calculate top similar products for each product
def collaborative_filtering(product_name):
    # Get the index of the product
    product_index = data[data['product_name'] == product_name].index[0]
    
    # Calculate similarity scores between the product and all other products
    similar_products = list(enumerate(similarity_matrix[product_index]))
    
    # Sort similar products based on similarity scores (descending order)
    similar_products = sorted(similar_products, key=lambda x: x[1], reverse=True)
    
    # Get the top similar products (excluding the product itself)
    top_similar_products = similar_products[1:6]  # Top 5 similar products
    
    return top_similar_products

In [9]:
# Calculate top similar products for each product and store in dictionary
for index, row in data.iterrows():
    product_name = row['product_name']
    similar_products = collaborative_filtering(product_name)
    collaborative_filtering_dict[product_name] = similar_products

In [10]:
# Save the collaborative filtering dictionary as a .pkl file
with open('collaborative_filtering.pkl', 'wb') as f:
    pickle.dump(collaborative_filtering_dict, f)

In [11]:
def present_top_similar_products(product_name):
    # Check if the product exists in the data
    if product_name not in data['product_name'].values:
        print(f"Product '{product_name}' not found in the data.")
        return

    # Calculate top similar products using the collaborative_filtering function
    top_similar_products = collaborative_filtering(product_name)
    
    # Display the top similar products
    print(f"Top similar products to '{product_name}':")
    
    # Iterate through the top similar products and display their details
    for i, (product_index, similarity_score) in enumerate(top_similar_products):
        # Retrieve the details of the similar product from the data
        similar_product = data.loc[product_index]
        
        # Display the similar product's details
        print(f"{i + 1}. Product Name: {similar_product['product_name']}, Similarity Score: {similarity_score:.4f}")
        print(f"   Actual Price: ₹{similar_product['actual_price']:.2f}, Rating: {similar_product['rating']:.1f}, Rating Count: {similar_product['rating_count']}")
        print("-" * 50)

# Example usage
product_name_to_query = 'Wayona Nylon Braided USB to Lightning Fast Charging and Data Sync Cable Compatible for iPhone 13, 12,11, X, 8, 7, 6, 5, iPad Air, Pro, Mini (3 FT Pack of 1, Grey)'  # Replace with the product name you want to query
present_top_similar_products(product_name_to_query)

Top similar products to 'Wayona Nylon Braided USB to Lightning Fast Charging and Data Sync Cable Compatible for iPhone 13, 12,11, X, 8, 7, 6, 5, iPad Air, Pro, Mini (3 FT Pack of 1, Grey)':
1. Product Name: Ambrane Unbreakable 60W / 3A Fast Charging 1.5m Braided Type C Cable for Smartphones, Tablets, Laptops & other Type C devices, PD Technology, 480Mbps Data Sync, Quick Charge 3.0 (RCT15A, Black), Similarity Score: 1.0000
   Actual Price: ₹349.00, Rating: 4.0, Rating Count: nan
--------------------------------------------------
2. Product Name: Sounce Fast Phone Charging Cable & Data Sync USB Cable Compatible for iPhone 13, 12,11, X, 8, 7, 6, 5, iPad Air, Pro, Mini & iOS Devices, Similarity Score: 1.0000
   Actual Price: ₹nan, Rating: 3.9, Rating Count: nan
--------------------------------------------------
3. Product Name: boAt Deuce USB 300 2 in 1 Type-C & Micro USB Stress Resistant, Tangle-Free, Sturdy Cable with 3A Fast Charging & 480mbps Data Transmission, 10000+ Bends Lifespan a