Task 2: Lookalike Model

Build a Lookalike Model that takes a user's information as input and recommends 3 similar
customers based on their profile and transaction history. 
The model should:

● Use both customer and product information.

● Assign a similarity score to each recommended customer.

In [3]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.metrics import davies_bouldin_score
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors

In [12]:
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

In [13]:
def build_lookalike_model():
    # Merging customer and transaction data
    customer_transactions = transactions.merge(customers, on='CustomerID')
    customer_transactions = customer_transactions.merge(products, on='ProductID')

In [17]:
def build_lookalike_model(transactions, customers, products):
    # Merging customer and transaction data
    customer_transactions = transactions.merge(customers, on='CustomerID')
    customer_transactions = customer_transactions.merge(products, on='ProductID')
    
    # Pivoting data for similarity
    user_product_matrix = customer_transactions.pivot_table(index='CustomerID', columns='ProductID', values='TotalValue', aggfunc='sum').fillna(0)
    
    return user_product_matrix


In [18]:
user_product_matrix = build_lookalike_model(transactions, customers, products)


In [20]:
# Standardize the data
scaler = StandardScaler()
user_product_matrix_scaled = scaler.fit_transform(user_product_matrix)

In [22]:
# Nearest Neighbors Model
model = NearestNeighbors(n_neighbors=4, metric='cosine')
model.fit(user_product_matrix_scaled)

In [26]:
import numpy as np

lookalikes = {}

# Ensure user_product_matrix_scaled is a NumPy array
if isinstance(user_product_matrix_scaled, pd.DataFrame):
    user_product_matrix_scaled = user_product_matrix_scaled.values

for customer_id in customers['CustomerID'][:20]:
    # Find the index of the customer in the user_product_matrix_scaled
    customer_idx = np.where(user_product_matrix.index == customer_id)[0][0]
    
    # Get distances and indices of similar customers
    distances, indices = model.kneighbors(user_product_matrix_scaled[customer_idx].reshape(1, -1), n_neighbors=4)
    
    # Find similar customers
    similar_customers = [(user_product_matrix.index[i], 1 - distances[0][j]) for j, i in enumerate(indices[0]) if user_product_matrix.index[i] != customer_id]
    
    # Store the similar customers in the lookalikes dictionary
    lookalikes[customer_id] = similar_customers

# Output the lookalikes dictionary
print(lookalikes)


{'C0001': [('C0194', 0.40492753118932323), ('C0104', 0.3740015051203954), ('C0020', 0.3666086563453339)], 'C0002': [('C0030', 0.40461685378594114), ('C0091', 0.3837780302090952), ('C0071', 0.320157981058087)], 'C0003': [('C0181', 0.47757179800393024), ('C0134', 0.47101615387801), ('C0144', 0.42379990716450333)], 'C0004': [('C0070', 0.3519014889798192), ('C0175', 0.3160978979266085), ('C0132', 0.2795985542449837)], 'C0005': [('C0096', 0.4874561392926372), ('C0023', 0.47025182492905504), ('C0055', 0.38209962414485577)], 'C0006': [('C0040', 0.48690905811550356), ('C0178', 0.39781090490669013), ('C0058', 0.3141630233621908)], 'C0007': [('C0079', 0.6174415040345875), ('C0118', 0.4782769742317402), ('C0020', 0.4566150346666942)], 'C0008': [('C0144', 0.3267505488339224), ('C0169', 0.2788847602830611), ('C0091', 0.26056033455199956)], 'C0009': [('C0140', 0.5334414848517184), ('C0083', 0.5308424299367157), ('C0162', 0.4988926977967797)], 'C0010': [('C0094', 0.5150640624213298), ('C0092', 0.4055

In [27]:
 # Save Lookalike.csv
lookalike_df = pd.DataFrame([{'cust_id': cust, 'lookalikes': str(lookalikes[cust])} for cust in lookalikes])
lookalike_df.to_csv('Lookalike.csv', index=False)


In [30]:
build_lookalike_model(transactions, customers, products)

ProductID,P001,P002,P003,P004,P005,P006,P007,P008,P009,P010,...,P091,P092,P093,P094,P095,P096,P097,P098,P099,P100
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
C0001,0.0,0.0,0.0,0.00,0.0,0.00,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.00,614.94,0.00,0.0,0.0,0.0
C0002,0.0,0.0,0.0,382.76,0.0,0.00,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,454.52,0.00,0.00,0.0,0.0,0.0
C0003,0.0,1385.2,0.0,0.00,0.0,363.96,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0,0.0
C0004,0.0,0.0,0.0,0.00,0.0,0.00,0.0,293.7,0.0,0.0,...,0.0,0.0,0.0,0.0,0.00,0.00,958.02,0.0,0.0,0.0
C0005,0.0,0.0,0.0,0.00,0.0,0.00,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
C0196,0.0,0.0,0.0,0.00,0.0,0.00,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0,0.0
C0197,0.0,0.0,0.0,0.00,0.0,0.00,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0,0.0
C0198,0.0,0.0,0.0,0.00,0.0,0.00,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0,0.0
C0199,0.0,0.0,0.0,0.00,0.0,0.00,0.0,293.7,0.0,0.0,...,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0.0,0.0
