In [5]:
# Step 1: Load libraries
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

# Step 2: Load data
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

# Step 3: Merge datasets
# Join transactions with customers and products for a complete view
transactions = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

# Step 4: Feature engineering
# Example features: transaction frequency, avg transaction value, product preferences
customer_features = transactions.groupby('CustomerID').agg({
    'TotalValue': 'mean',        # Avg transaction value
    'TransactionID': 'count',   # Transaction frequency
    'Quantity': 'sum',          # Total quantity purchased
    'Price_y': 'mean',            # Avg price paid
    'Category': lambda x: x.mode()[0]  # Most frequent product category
}).reset_index()

# Normalize numeric features
scaler = MinMaxScaler()
numeric_cols = ['TotalValue', 'TransactionID', 'Quantity', 'Price_y']
customer_features[numeric_cols] = scaler.fit_transform(customer_features[numeric_cols])

# Step 5: Compute similarity
# Convert customer features to vectors and calculate cosine similarity
customer_vectors = customer_features[numeric_cols].values
similarity_matrix = cosine_similarity(customer_vectors)

# Step 6: Generate recommendations
# Find top 3 most similar customers for each target customer
top_n = 3
lookalikes = {}
for idx, customer_id in enumerate(customer_features['CustomerID']):
    # Exclude self-similarity, get top N lookalikes
    scores = list(enumerate(similarity_matrix[idx]))
    scores = sorted(scores, key=lambda x: x[1], reverse=True)[1:top_n+1]
    lookalikes[customer_id] = [(customer_features['CustomerID'][i], round(score, 2)) for i, score in scores]

# Step 7: Create Lookalike.csv
lookalike_df = pd.DataFrame.from_dict(lookalikes, orient='index', columns=['Lookalike1', 'Lookalike2', 'Lookalike3'])
lookalike_df.to_csv('Lookalike.csv', index_label='CustomerID')

# Step 8: Output for first 20 customers
print(lookalike_df.head(20))


          Lookalike1     Lookalike2     Lookalike3
C0001   (C0100, 1.0)   (C0011, 1.0)   (C0137, 1.0)
C0002   (C0029, 1.0)   (C0086, 1.0)   (C0155, 1.0)
C0003   (C0018, 1.0)   (C0163, 1.0)   (C0053, 1.0)
C0004   (C0034, 1.0)   (C0065, 1.0)   (C0113, 1.0)
C0005   (C0158, 1.0)   (C0061, 1.0)   (C0007, 1.0)
C0006   (C0129, 1.0)   (C0079, 1.0)   (C0117, 1.0)
C0007   (C0085, 1.0)   (C0050, 1.0)   (C0061, 1.0)
C0008   (C0084, 1.0)   (C0121, 1.0)   (C0049, 1.0)
C0009  (C0128, 0.98)  (C0083, 0.97)  (C0103, 0.96)
C0010   (C0028, 1.0)   (C0045, 1.0)   (C0039, 1.0)
C0011   (C0166, 1.0)   (C0052, 1.0)   (C0064, 1.0)
C0012   (C0141, 1.0)   (C0038, 1.0)   (C0099, 1.0)
C0013   (C0174, 1.0)   (C0055, 1.0)   (C0022, 1.0)
C0014   (C0150, 1.0)   (C0130, 1.0)   (C0078, 1.0)
C0015  (C0198, 0.99)  (C0042, 0.99)  (C0128, 0.99)
C0016   (C0146, 1.0)   (C0192, 1.0)   (C0187, 1.0)
C0017   (C0065, 1.0)   (C0113, 1.0)   (C0004, 1.0)
C0018   (C0003, 1.0)   (C0182, 1.0)   (C0163, 1.0)
C0019   (C0047, 1.0)   (C0139, 

In [4]:
transactions


Unnamed: 0,TransactionID,CustomerID,ProductID,TransactionDate,Quantity,TotalValue,Price_x,CustomerName,Region,SignupDate,ProductName,Category,Price_y
0,T00001,C0199,P067,2024-08-25 12:38:23,1,300.68,300.68,Andrea Jenkins,Europe,2022-12-03,ComfortLiving Bluetooth Speaker,Electronics,300.68
1,T00112,C0146,P067,2024-05-27 22:23:54,1,300.68,300.68,Brittany Harvey,Asia,2024-09-04,ComfortLiving Bluetooth Speaker,Electronics,300.68
2,T00166,C0127,P067,2024-04-25 07:38:55,1,300.68,300.68,Kathryn Stevens,Europe,2024-04-04,ComfortLiving Bluetooth Speaker,Electronics,300.68
3,T00272,C0087,P067,2024-03-26 22:55:37,2,601.36,300.68,Travis Campbell,South America,2024-04-11,ComfortLiving Bluetooth Speaker,Electronics,300.68
4,T00363,C0070,P067,2024-03-21 15:10:10,3,902.04,300.68,Timothy Perez,Europe,2022-03-15,ComfortLiving Bluetooth Speaker,Electronics,300.68
...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,T00496,C0118,P037,2024-10-24 08:30:27,1,459.86,459.86,Jacob Holt,South America,2022-01-22,SoundWave Smartwatch,Electronics,459.86
996,T00759,C0059,P037,2024-06-04 02:15:24,3,1379.58,459.86,Mrs. Kimberly Wright,North America,2024-04-07,SoundWave Smartwatch,Electronics,459.86
997,T00922,C0018,P037,2024-04-05 13:05:32,4,1839.44,459.86,Tyler Haynes,North America,2024-09-21,SoundWave Smartwatch,Electronics,459.86
998,T00959,C0115,P037,2024-09-29 10:16:02,2,919.72,459.86,Joshua Hamilton,Asia,2024-11-11,SoundWave Smartwatch,Electronics,459.86
