In [1]:
# 📦 Step 1: Import Libraries
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import joblib

# 📂 Step 2: Load Cleaned Dataset
df = pd.read_csv("../data/cleaned_data.csv")

# 📋 Step 3: Build Customer-Item Matrix
pivot_table = df.pivot_table(index='CustomerID', columns='Description', values='Quantity', aggfunc='sum', fill_value=0)

# 🧠 Step 4: Compute Cosine Similarity Between Products
product_similarity = cosine_similarity(pivot_table.T)  # Transpose to get product × product
similarity_df = pd.DataFrame(product_similarity, index=pivot_table.columns, columns=pivot_table.columns)

# 🔍 Step 5: Function to Recommend Top 5 Similar Products
def recommend_products(product_name, similarity_matrix=similarity_df, top_n=5):
    if product_name not in similarity_matrix.columns:
        return f"❌ '{product_name}' not found in the product list."
    
    similar_scores = similarity_matrix[product_name].sort_values(ascending=False)[1:top_n+1]
    return similar_scores

# 🧪 Example Usage:
print("Example Recommendations for 'WHITE HANGING HEART T-LIGHT HOLDER':\n")
print(recommend_products('WHITE HANGING HEART T-LIGHT HOLDER'))

# 💾 Step 6: Save Similarity Matrix for Streamlit Use
similarity_df.to_pickle("../models/product_similarity.pkl")

print("\n✅ Product similarity matrix saved.")


Example Recommendations for 'WHITE HANGING HEART T-LIGHT HOLDER':

Description
GIN + TONIC DIET METAL SIGN         0.750410
RED HANGING HEART T-LIGHT HOLDER    0.658719
WASHROOM METAL SIGN                 0.643500
LAUNDRY 15C METAL SIGN              0.642206
GREEN VINTAGE SPOT BEAKER           0.631461
Name: WHITE HANGING HEART T-LIGHT HOLDER, dtype: float64

✅ Product similarity matrix saved.
