In [5]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors
import numpy as np

# Load datasets
customers_url = 'https://drive.google.com/uc?id=1bu_--mo79VdUG9oin4ybfFGRUSXAe-WE'
products_url = 'https://drive.google.com/uc?id=1IKuDizVapw-hyktwfpoAoaGtHtTNHfd0'
transactions_url = 'https://drive.google.com/uc?id=1saEqdbBB-vuk2hxoAf4TzDEsykdKlzbF'

customers = pd.read_csv(customers_url)
products = pd.read_csv(products_url)
transactions = pd.read_csv(transactions_url)

# Merge transactions with products to get product details in transactions
transactions_products = pd.merge(transactions, products, on='ProductID')

# Merge with customers to get customer details in transactions
full_data = pd.merge(transactions_products, customers, on='CustomerID')

# Display the combined data
print(full_data.head())

# Example feature: Total number of transactions per customer
customer_transactions = full_data.groupby('CustomerID').size().reset_index(name='TotalTransactions')

# Example feature: Average transaction value per customer
customer_avg_value = full_data.groupby('CustomerID')['TotalValue'].mean().reset_index(name='AvgTransactionValue')

# Combine these features into one DataFrame
customer_features = pd.merge(customer_transactions, customer_avg_value, on='CustomerID')

# Show the customer features
print(customer_features.head())

# Convert features to numerical format for modeling
features = customer_features.drop('CustomerID', axis=1).values

# Initialize the Nearest Neighbors model
nn = NearestNeighbors(n_neighbors=4, algorithm='auto')

# Fit the model
nn.fit(features)

# Find top 3 lookalike customers for each customer
distances, indices = nn.kneighbors(features)

# Store the results in a dictionary
lookalikes = {}
for i in range(features.shape[0]):
    similar_indices = indices[i][1:]  # Exclude the customer itself
    similar_customers = [(customer_features.iloc[j]['CustomerID'], distances[i][k+1]) for k, j in enumerate(similar_indices)]
    lookalikes[customer_features.iloc[i]['CustomerID']] = similar_customers

# Display the lookalike results for the first 20 customers
result_dict = {k: lookalikes[k] for k in list(lookalikes.keys())[:20]}
print(result_dict)

import csv

# Save the results to a CSV file
with open('Lookalike.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['CustomerID', 'Lookalikes'])
    for customer_id, lookalikes in result_dict.items():
        writer.writerow([customer_id, lookalikes])

print('Lookalike results saved to Lookalike.csv!')


  TransactionID CustomerID ProductID      TransactionDate  Quantity  \
0        T00001      C0199      P067  2024-08-25 12:38:23         1   
1        T00112      C0146      P067  2024-05-27 22:23:54         1   
2        T00166      C0127      P067  2024-04-25 07:38:55         1   
3        T00272      C0087      P067  2024-03-26 22:55:37         2   
4        T00363      C0070      P067  2024-03-21 15:10:10         3   

   TotalValue  Price_x                      ProductName     Category  Price_y  \
0      300.68   300.68  ComfortLiving Bluetooth Speaker  Electronics   300.68   
1      300.68   300.68  ComfortLiving Bluetooth Speaker  Electronics   300.68   
2      300.68   300.68  ComfortLiving Bluetooth Speaker  Electronics   300.68   
3      601.36   300.68  ComfortLiving Bluetooth Speaker  Electronics   300.68   
4      902.04   300.68  ComfortLiving Bluetooth Speaker  Electronics   300.68   

      CustomerName         Region  SignupDate  
0   Andrea Jenkins         Europe  202