In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
user_product = pd.read_csv('../data/customer_segments.csv')
user_product.set_index('customer_id',inplace=True)

In [5]:
cluster = user_product['cluster']
product_matrix = user_product.drop(columns=['cluster'])

product_matrix.shape

(2172, 51)

In [7]:
similarity_matrix = cosine_similarity(product_matrix)

In [9]:
similarity_df = pd.DataFrame(similarity_matrix,index=product_matrix.index,columns=product_matrix.index)

In [21]:
def recommend_products_for_customer(customer_id,top_n_similar_users = 5,top_n_products=5):
    if customer_id not in similarity_df.index:
        print("Customer Id not found")
        return[]

    similar_users = similarity_df[customer_id].sort_values(ascending=False)[1:top_n_similar_users+1] #Find top similar users
    similar_users_products = product_matrix.loc[similar_users.index] #Get products bought by similar users
    #Sum product counts and remove already purchased items
    product_scores = similar_users_products.sum(axis=0)
    already_bought = product_matrix.loc[customer_id]
    product_scores = product_scores[already_bought==0]
    #Recommend top N products
    recommended = product_scores.sort_values(ascending=False).head(top_n_products)
    return recommended

In [23]:
sample_customer = product_matrix.index[0]
recommendations = recommend_products_for_customer(sample_customer)

In [25]:
print(f'product recommendations for customer {sample_customer}:\n')
print(recommendations)

product recommendations for customer 31813:

Chips          2
Baby Food      0
Potatoes       0
Lotion         0
Mango Drink    0
dtype: int64


In [35]:
#recommendations within clusters
def recommend_within_cluster(customer_id, top_n_users=5, top_n_products=5):
    if customer_id not in cluster.index:
        print("❌ Customer ID not found.")
        return []

    customer_cluster = cluster[customer_id]

    # Filter users from same cluster
    same_cluster_users = cluster[cluster == customer_cluster].index
    cluster_product_matrix = product_matrix.loc[same_cluster_users]
    
    # Compute similarity within cluster
    cluster_similarity = cosine_similarity(cluster_product_matrix)
    cluster_sim_df = pd.DataFrame(cluster_similarity,
                                  index=same_cluster_users,
                                  columns=same_cluster_users)
    
    # Repeat same steps
    similar_users = cluster_sim_df[customer_id].sort_values(ascending=False)[1:top_n_users+1]
    similar_users_products = cluster_product_matrix.loc[similar_users.index]
    product_scores = similar_users_products.sum(axis=0)
    already_bought = cluster_product_matrix.loc[customer_id]
    product_scores = product_scores[already_bought == 0]
    
    recommended = product_scores.sort_values(ascending=False).head(top_n_products)
    return recommended

# 🧪 Try within-cluster recommendation
print(f"\n🧠 Within-Cluster Recommendations for Customer {sample_customer}:\n")
print(recommend_within_cluster(sample_customer))


🧠 Within-Cluster Recommendations for Customer 31813:

Chips          2
Baby Food      0
Potatoes       0
Lotion         0
Mango Drink    0
dtype: int64
