In [8]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors

customers = pd.read_csv('/content/drive/MyDrive/Customers.csv')
products = pd.read_csv('/content/drive/MyDrive/Products.csv')
transactions = pd.read_csv('/content/drive/MyDrive/Transactions.csv')


customer_transactions = transactions.merge(customers, on='CustomerID')


print(customers.head())
print(transactions.head())

  CustomerID        CustomerName         Region  SignupDate
0      C0001    Lawrence Carroll  South America  2022-07-10
1      C0002      Elizabeth Lutz           Asia  2022-02-13
2      C0003      Michael Rivera  South America  2024-03-07
3      C0004  Kathleen Rodriguez  South America  2022-10-09
4      C0005         Laura Weber           Asia  2022-08-15
  TransactionID CustomerID ProductID      TransactionDate  Quantity  \
0        T00001      C0199      P067  2024-08-25 12:38:23         1   
1        T00112      C0146      P067  2024-05-27 22:23:54         1   
2        T00166      C0127      P067  2024-04-25 07:38:55         1   
3        T00272      C0087      P067  2024-03-26 22:55:37         2   
4        T00363      C0070      P067  2024-03-21 15:10:10         3   

   TotalValue   Price  
0      300.68  300.68  
1      300.68  300.68  
2      300.68  300.68  
3      601.36  300.68  
4      902.04  300.68  


In this segment I created a customer profile by aggregating transaction data. This was done by grouping the transactions by CustomerID and calculating the total transaction value (TotalValue) and the total quantity of products purchased (Quantity). This helps us understand each customer’s overall spending and purchasing behavior, which are key features for identifying similar customers.

In [9]:

customer_profile = customer_transactions.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum'
}).reset_index()


print(customer_profile.head())

  CustomerID  TotalValue  Quantity
0      C0001     3354.52        12
1      C0002     1862.74        10
2      C0003     2725.38        14
3      C0004     5354.88        23
4      C0005     2034.24         7


In this segment i used the NearestNeighbors model with n_neighbors=4, which allows us to find the top 3 similar customers, plus the customer itself. The model was trained using the TotalValue and Quantity features from the customer profiles. This is essential for using the Nearest Neighbors algorithm to find the most similar customers based on their spending and purchasing behaviors.

In [10]:

model = NearestNeighbors(n_neighbors=4)
model.fit(customer_profile[['TotalValue', 'Quantity']])


lookalikes = {}


for customer_id in customer_profile['CustomerID'].head(20):

    distances, indices = model.kneighbors(customer_profile[customer_profile['CustomerID'] == customer_id][['TotalValue', 'Quantity']])

    similar_customers = customer_profile.iloc[indices[0]]['CustomerID'].tolist()
    similar_customers.remove(customer_id)

    lookalikes[customer_id] = similar_customers[:3]
print(lookalikes)

{'C0001': ['C0181', 'C0137', 'C0152'], 'C0002': ['C0132', 'C0157', 'C0036'], 'C0003': ['C0178', 'C0038', 'C0086'], 'C0004': ['C0155', 'C0053', 'C0093'], 'C0005': ['C0073', 'C0031', 'C0159'], 'C0006': ['C0039', 'C0079', 'C0037'], 'C0007': ['C0035', 'C0146', 'C0027'], 'C0008': ['C0124', 'C0037', 'C0079'], 'C0009': ['C0020', 'C0198', 'C0110'], 'C0010': ['C0186', 'C0029', 'C0062'], 'C0011': ['C0158', 'C0139', 'C0154'], 'C0012': ['C0093', 'C0023', 'C0053'], 'C0013': ['C0045', 'C0143', 'C0068'], 'C0014': ['C0058', 'C0151', 'C0097'], 'C0015': ['C0094', 'C0043', 'C0119'], 'C0016': ['C0194', 'C0048', 'C0183'], 'C0017': ['C0200', 'C0153', 'C0018'], 'C0018': ['C0162', 'C0200', 'C0105'], 'C0019': ['C0172', 'C0061', 'C0088'], 'C0020': ['C0198', 'C0110', 'C0009']}


In [11]:

lookalike_df = pd.DataFrame(lookalikes.items(), columns=['CustomerID', 'Lookalikes'])
lookalike_df.to_csv('Lookalike.csv', index=False)
print(lookalike_df)

   CustomerID             Lookalikes
0       C0001  [C0181, C0137, C0152]
1       C0002  [C0132, C0157, C0036]
2       C0003  [C0178, C0038, C0086]
3       C0004  [C0155, C0053, C0093]
4       C0005  [C0073, C0031, C0159]
5       C0006  [C0039, C0079, C0037]
6       C0007  [C0035, C0146, C0027]
7       C0008  [C0124, C0037, C0079]
8       C0009  [C0020, C0198, C0110]
9       C0010  [C0186, C0029, C0062]
10      C0011  [C0158, C0139, C0154]
11      C0012  [C0093, C0023, C0053]
12      C0013  [C0045, C0143, C0068]
13      C0014  [C0058, C0151, C0097]
14      C0015  [C0094, C0043, C0119]
15      C0016  [C0194, C0048, C0183]
16      C0017  [C0200, C0153, C0018]
17      C0018  [C0162, C0200, C0105]
18      C0019  [C0172, C0061, C0088]
19      C0020  [C0198, C0110, C0009]
