In [1]:
# Lookalike Model for eCommerce Transactions Dataset

import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler


In [3]:
# Load datasets
customers_df = pd.read_csv('Customers.csv')
products_df = pd.read_csv('Products.csv')
transactions_df = pd.read_csv('Transactions.csv')

In [5]:
# Merge datasets to include both profile and transaction information
merged_df = transactions_df.merge(customers_df, on='CustomerID').merge(products_df, on='ProductID')

In [7]:
# Aggregate data to create a customer-product matrix
customer_product_matrix = merged_df.pivot_table(
    index='CustomerID',
    columns='ProductID',
    values='Quantity',
    aggfunc='sum',
    fill_value=0
)

In [9]:
# Normalize data using customer profile information
# Adding one-hot encoding for 'Region' and normalizing transaction data
customer_profiles = pd.get_dummies(customers_df.set_index('CustomerID')['Region'], prefix='Region')
customer_product_matrix = customer_product_matrix.join(customer_profiles, how='left').fillna(0)

In [11]:
# Standardize data
scaler = StandardScaler()
normalized_matrix = scaler.fit_transform(customer_product_matrix)

In [13]:
# Compute cosine similarity between customers
similarity_matrix = cosine_similarity(normalized_matrix)

In [15]:
# Generate lookalike recommendations for the first 20 customers
customer_ids = customer_product_matrix.index.tolist()
lookalikes = {}

for i, customer in enumerate(customer_ids[:20]):
    # Get similarity scores for the customer
    similarity_scores = list(enumerate(similarity_matrix[i]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    
    # Exclude the customer itself and select the top 3 similar customers
    top_3_similar = [(customer_ids[j], round(score, 4)) for j, score in similarity_scores[1:4]]
    lookalikes[customer] = top_3_similar



In [17]:
# Save lookalike recommendations to a CSV file
lookalike_df = pd.DataFrame([{"CustomerID": key, "Lookalikes": value} for key, value in lookalikes.items()])
lookalike_df.to_csv('Lookalike_Model_Output.csv', index=False)

In [19]:
# Display output
print("Lookalike recommendations saved to Lookalike_Model_Output.csv")

Lookalike recommendations saved to Lookalike_Model_Output.csv
