# **eCommerce Transactions**

# Lookalike Model


In [1]:


# Import necessary libraries
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

# Load datasets
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

# Merge datasets for comprehensive analysis
merged_data = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

# Feature engineering
customer_features = merged_data.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'Price_y': 'mean'
}).reset_index()

# Calculate similarity matrix
similarity_matrix = cosine_similarity(customer_features.drop('CustomerID', axis=1))

# Function to find top 3 similar customers
def find_similar_customers(customer_id, top_n=3):
    customer_idx = customer_features[customer_features['CustomerID'] == customer_id].index[0]
    similarity_scores = list(enumerate(similarity_matrix[customer_idx]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    similar_customers = [(customer_features.iloc[i]['CustomerID'], score) for i, score in similarity_scores[1:top_n+1]]
    return similar_customers

# Generate Lookalike.csv for first 20 customers
lookalike_data = {}
for customer_id in customers['CustomerID'][:20]:
    lookalike_data[customer_id] = find_similar_customers(customer_id)

# Save to CSV
import csv
with open('Lookalike.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['CustomerID', 'SimilarCustomers'])
    for cust_id, similar in lookalike_data.items():
        writer.writerow([cust_id, similar])