 use the Customer ID as the user identifier and Product ID as the item identifier. use Product Adoption as the rating, assuming it reflects the level of adoption or satisfaction with the product.

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix

# Load the dataset
file_path = "C:\\Users\\AksharaVenkatesh\\OneDrive - ConceptVines\\KPMG\\wholesale_banking_synthetic_data.csv"
data = pd.read_csv(file_path)

# Prepare the data
user_item_interactions = data[['Customer ID', 'Product ID', 'Product Adoption']]

# Split the data into training and test sets
train_data, test_data = train_test_split(user_item_interactions, test_size=0.2, random_state=42)

# Create interaction matrices for training and test sets
train_interaction_matrix = train_data.pivot_table(index='Customer ID', columns='Product ID', values='Product Adoption', fill_value=0)
test_interaction_matrix = test_data.pivot_table(index='Customer ID', columns='Product ID', values='Product Adoption', fill_value=0)

# Get the combined set of users and items
all_users = sorted(set(train_interaction_matrix.index) | set(test_interaction_matrix.index))
all_items = sorted(set(train_interaction_matrix.columns) | set(test_interaction_matrix.columns))

# Reindex the interaction matrices to ensure they have the same dimensions
train_interaction_matrix = train_interaction_matrix.reindex(index=all_users, columns=all_items, fill_value=0)
test_interaction_matrix = test_interaction_matrix.reindex(index=all_users, columns=all_items, fill_value=0)

# Create a sparse matrix from the interaction matrix
sparse_matrix = csr_matrix(train_interaction_matrix.values)

# Create a nearest neighbors model
model = NearestNeighbors(metric='cosine', algorithm='brute')
model.fit(sparse_matrix)

# Function to recommend products for a given user
def recommend_products(user_id, num_recommendations=5):
    if user_id not in train_interaction_matrix.index:
        return []
    
    # Get the user's index in the interaction matrix
    user_index = train_interaction_matrix.index.get_loc(user_id)
    
    # Get the nearest neighbors for the user
    distances, indices = model.kneighbors(sparse_matrix[user_index], n_neighbors=num_recommendations + 1)
    
    # Get the recommended products
    recommended_products = []
    for index in indices.flatten()[1:]:  # Flattened to handle 1D array of indices
        recommended_products.extend(train_interaction_matrix.columns[train_interaction_matrix.iloc[index] > 0])
    
    # Remove products already adopted by the user
    user_interactions = train_interaction_matrix.loc[user_id]
    recommended_products = [product for product in recommended_products if product not in user_interactions[user_interactions > 0].index]
    
    return recommended_products[:num_recommendations]

# Function to calculate Precision@K and Recall@K
def precision_at_k(recommended_products, user_id, k):
    if user_id not in test_interaction_matrix.index:
        return 0.0
    relevant_items = test_interaction_matrix.loc[user_id][test_interaction_matrix.loc[user_id] > 0].index
    if len(relevant_items) == 0:
        print(f"No relevant items for user {user_id} in the test set.")
        return 0.0
    recommended_relevant_items = [item for item in recommended_products if item in relevant_items]
    return len(recommended_relevant_items) / k if k else 0.0

def recall_at_k(recommended_products, user_id, k):
    if user_id not in test_interaction_matrix.index:
        return 0.0
    relevant_items = test_interaction_matrix.loc[user_id][test_interaction_matrix.loc[user_id] > 0].index
    if len(relevant_items) == 0:
        print(f"No relevant items for user {user_id} in the test set.")
        return 0.0
    recommended_relevant_items = [item for item in recommended_products if item in relevant_items]
    return len(recommended_relevant_items) / len(relevant_items) if relevant_items.size > 0 else 0.0

# Evaluate Precision@5 and Recall@5 for a specific user
example_user_id = 'CUS-283'
recommended_products = recommend_products(example_user_id, num_recommendations=5)

# Check if the user has any relevant items in the test set
relevant_items_example_user = test_interaction_matrix.loc[example_user_id][test_interaction_matrix.loc[example_user_id] > 0].index
print(f"Relevant items for user {example_user_id}: {list(relevant_items_example_user)}")

precision_5 = precision_at_k(recommended_products, example_user_id, 5)
recall_5 = recall_at_k(recommended_products, example_user_id, 5)

print(f'Precision@5: {precision_5}')
print(f'Recall@5: {recall_5}')


Relevant items for user CUS-283: []
No relevant items for user CUS-283 in the test set.
No relevant items for user CUS-283 in the test set.
Precision@5: 0.0
Recall@5: 0.0
