In [5]:
#import necessary libraries
import pandas as pd
from scipy.spatial.distance import cosine

# Loading the datasets
customers_df = pd.read_csv('Customers.csv')
products_df = pd.read_csv('Products.csv')
transactions_df = pd.read_csv('Transactions.csv')

In [6]:
# Merge customer and transaction data
customer_data = customers.merge(transactions, on='CustomerID')

# Build a user-item matrix
user_item_matrix = customer_data.pivot_table(index='CustomerID', columns='ProductID', values='Quantity', fill_value=0)

# Compute cosine similarity between users
user_similarities = user_item_matrix.T.dot(user_item_matrix)
# Calculate norm using user_similarities shape for consistency
norm = user_item_matrix.pow(2).sum(axis=1).values
# Reshape norm to be broadcastable along axis 0
norm = norm[:user_similarities.shape[0]]
user_similarities = user_similarities.divide(norm, axis=0).divide(norm[:, None], axis=1)
user_similarities.fillna(0, inplace=True)

# Function to get lookalike customers
def recommend_lookalike_customers(customer_id, user_item_matrix, similarities, top_n=3):
    if customer_id not in similarities.index:
        return []
    sim_scores = similarities.loc[customer_id].drop(customer_id).nlargest(top_n)
    return list(zip(sim_scores.index, sim_scores.values))

# Example usage with a loop for a list of customer IDs
customer_ids_to_recommend = customers['CustomerID'].head(20)

lookalike_data = {}
for cust_id in customer_ids_to_recommend:
    recommendations = recommend_lookalike_customers(cust_id, user_item_matrix, user_similarities)
    lookalike_data[cust_id] = recommendations

# Create a DataFrame for better CSV formatting
lookalike_df = pd.DataFrame.from_dict(lookalike_data, orient='index')
lookalike_df.index.name = 'cust_id' # Name the index column
lookalike_df.to_csv('Lookalike.csv', header=False) # Save without header