# Lookalike Analysis

In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load the datasets
customers = pd.read_csv('./Customers.csv')
products = pd.read_csv('./Products.csv')
transactions = pd.read_csv('./Transactions.csv')

# Aggregating transaction data for each customer
customer_transactions = transactions.groupby('CustomerID').agg(
    total_spent=('TotalValue', 'sum'),
    total_transactions=('TransactionID', 'count')
).reset_index()

# Merge customer profile with transaction data
customer_data = pd.merge(customers, customer_transactions, on='CustomerID')

# Feature selection for similarity calculation
scaler = StandardScaler()
scaled_data = scaler.fit_transform(customer_data[['total_spent', 'total_transactions']])

# Compute cosine similarity between customers
similarity_matrix = cosine_similarity(scaled_data)

# Prepare Lookalike model data
lookalike_data = {}

# Get top 3 lookalikes for customers C0001 to C0020
for i in range(20):
    customer_id = customer_data['CustomerID'].iloc[i]
    similarities = similarity_matrix[i]
    # Get indices of top 3 similar customers (excluding the customer itself)
    top_3_indices = similarities.argsort()[-4:-1][::-1]
    top_3_customers = [
        (customer_data['CustomerID'].iloc[idx], similarities[idx]) for idx in top_3_indices if customer_data['CustomerID'].iloc[idx] != customer_id
    ]
    lookalike_data[customer_id] = top_3_customers

# Convert lookalike data to a DataFrame for saving
lookalike_df = pd.DataFrame([
    {'CustomerID': cust_id, 'Lookalike': str([f"{cust_id_} : {score}" for cust_id_, score in lookalikes])}
    for cust_id, lookalikes in lookalike_data.items()
])

# Save the lookalike data to a CSV file
lookalike_df.to_csv('FirstName_LastName_Lookalike.csv', index=False)

# Success message
print('Lookalike model created and saved as FirstName_LastName_Lookalike.csv.')