In [27]:
# Import necessary libraries
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [None]:
# Load the datasets
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

In [22]:
# Step 1: Data Preparation
# Aggregate transaction data for customer behavior
customer_transactions = transactions.groupby('CustomerID').agg(
    total_spent=('TotalValue', 'sum'),
    total_quantity=('Quantity', 'sum'),
    transaction_count=('TransactionID', 'count')
).reset_index()

In [24]:
# Merge with customer data for a full profile
customer_data = pd.merge(customers, customer_transactions, on='CustomerID', how='left').fillna(0)

In [25]:
# Encode categorical data (Region)
customer_data = pd.get_dummies(customer_data, columns=['Region'], drop_first=True)

In [28]:
# Normalize numerical features
scaler = StandardScaler()
numeric_features = ['total_spent', 'total_quantity', 'transaction_count']
customer_data[numeric_features] = scaler.fit_transform(customer_data[numeric_features])

In [29]:
# Step 2: Compute Similarity
# Extract customer IDs and feature matrix
customer_ids = customer_data['CustomerID']
customer_profiles = customer_data.drop(columns=['CustomerID', 'CustomerName', 'SignupDate'])

In [30]:
# Compute cosine similarity
similarity_matrix = cosine_similarity(customer_profiles)

In [31]:
# Step 3: Find Top 3 Lookalikes for Customers C0001 to C0020
lookalike_results = {}

In [32]:
for idx, customer_id in enumerate(customer_ids[:20]):  # First 20 customers
    similarity_scores = list(enumerate(similarity_matrix[idx]))
    # Exclude self-comparison and sort by similarity score
    similarity_scores = [(customer_ids[i], score) for i, score in similarity_scores if customer_ids[i] != customer_id]
    top_3 = sorted(similarity_scores, key=lambda x: x[1], reverse=True)[:3]
    lookalike_results[customer_id] = top_3

In [34]:
# Step 4: Save results to Lookalike.csv
lookalike_df = pd.DataFrame([
    {'CustomerID': cust_id, 'Lookalikes': lookalikes} for cust_id, lookalikes in lookalike_results.items()
])
lookalike_df.to_csv('Lookalike.csv', index=False)

print("Lookalike Model completed. Results saved to Lookalike.csv.")

Lookalike Model completed. Results saved to Lookalike.csv.
