In [7]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [8]:
# Load data
customers_df = pd.read_csv('Customers.csv')
products_df = pd.read_csv('Products.csv')
transactions_df = pd.read_csv('Transactions.csv')

In [9]:
# Merge customer and transaction data
customer_transactions = transactions_df.merge(customers_df, on='CustomerID')

In [12]:
# Step 4: Aggregate transaction data to build customer profiles
customer_profiles = customer_transactions.groupby('CustomerID').agg(
    total_transactions=('TransactionID', 'count'),
    total_spent=('TotalValue', 'sum'),
    unique_products=('ProductID', 'nunique')
).reset_index()

In [13]:
# Step 5: Scale the features for better similarity calculation (optional)
scaler = StandardScaler()
customer_profiles[['total_transactions', 'total_spent', 'unique_products']] = scaler.fit_transform(
    customer_profiles[['total_transactions', 'total_spent', 'unique_products']]
)

In [14]:
# Step 6: Calculate similarity between customers using cosine similarity
cos_sim = cosine_similarity(customer_profiles[['total_transactions', 'total_spent', 'unique_products']])

In [15]:
# Step 7: Find top 3 lookalikes for each customer (C0001 to C0020)
lookalike_results = []

for i in range(20):  # First 20 customers
    customer_id = customer_profiles['CustomerID'][i]
    sim_scores = list(enumerate(cos_sim[i]))  # Pair (index, similarity_score)
    sorted_sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:4]  # Top 3 excluding self
    lookalike_results.append({
        'CustomerID': customer_id,
        'Lookalikes': [(customer_profiles['CustomerID'][j[0]], j[1]) for j in sorted_sim_scores]
    })

In [16]:
# Step 8: Convert results into a DataFrame
lookalike_df = pd.DataFrame(lookalike_results)

In [17]:
# Step 9: Save the results to 'Lookalike.csv'
lookalike_df.to_csv('Lookalike.csv', index=False)

print("Lookalike model has been built and results saved to 'Lookalike.csv'.")

Lookalike model has been built and results saved to 'Lookalike.csv'.
