In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

# 1. Data Loading
customers_df = pd.read_csv('Customers.csv')
products_df = pd.read_csv('Products.csv')
transactions_df = pd.read_csv('Transactions.csv')

# 2. Feature Engineering
def create_customer_profile(customers_df, transactions_df, products_df):
    """
    Creates comprehensive customer profiles combining:
    - Transaction behavior
    - Category preferences
    - Regional information
    - Monetary patterns
    """
    # Transaction metrics
    transaction_metrics = transactions_df.groupby('CustomerID').agg({
        'TransactionID': 'count',
        'TotalValue': ['sum', 'mean'],
        'Quantity': ['sum', 'mean']
    })
    transaction_metrics.columns = ['_'.join(col).strip() for col in transaction_metrics.columns.values]
    
    # Category preferences (percentage of purchases in each category)
    category_data = pd.merge(transactions_df, products_df[['ProductID', 'Category']], on='ProductID')
    category_prefs = pd.crosstab(category_data['CustomerID'], category_data['Category'], normalize='index')
    
    # Combine with regional data
    region_dummies = pd.get_dummies(customers_df.set_index('CustomerID')['Region'], prefix='Region')
    
    # Merge all features
    customer_profile = pd.merge(transaction_metrics, category_prefs, 
                              left_index=True, right_index=True, how='left')
    customer_profile = pd.merge(customer_profile, region_dummies, 
                              left_index=True, right_index=True, how='left')
    
    return customer_profile

# 3. Create and normalize features
customer_profiles = create_customer_profile(customers_df, transactions_df, products_df)
scaler = StandardScaler()
scaled_profiles = pd.DataFrame(
    scaler.fit_transform(customer_profiles),
    index=customer_profiles.index,
    columns=customer_profiles.columns
)

# 4. Similarity Calculation
def calculate_similarity(customer_id, scaled_profiles, n_recommendations=3):
    """
    Calculates cosine similarity between customers and returns top N similar customers
    """
    target_profile = scaled_profiles.loc[customer_id].values
    
    similarities = {}
    for other_id in scaled_profiles.index:
        if other_id != customer_id:
            other_profile = scaled_profiles.loc[other_id].values
            # Using cosine similarity
            similarity = np.dot(target_profile, other_profile) / (
                np.linalg.norm(target_profile) * np.linalg.norm(other_profile)
            )
            similarities[other_id] = similarity
    
    # Get top N similar customers
    top_similar = sorted(similarities.items(), key=lambda x: x[1], reverse=True)[:n_recommendations]
    return top_similar

# 5. Generate recommendations for first 20 customers
lookalike_map = {}
for customer_id in customers_df['CustomerID'].iloc[:20]:
    similar_customers = calculate_similarity(customer_id, scaled_profiles)
    lookalike_map[customer_id] = [
        {'customer_id': cust_id, 'similarity_score': round(score, 4)}
        for cust_id, score in similar_customers
    ]

# 6. Save results in required format
result_rows = []
for customer_id, recommendations in lookalike_map.items():
    row = {
        'customer_id': customer_id,
        'similar_customer_1': recommendations[0]['customer_id'],
        'score_1': recommendations[0]['similarity_score'],
        'similar_customer_2': recommendations[1]['customer_id'],
        'score_2': recommendations[1]['similarity_score'],
        'similar_customer_3': recommendations[2]['customer_id'],
        'score_3': recommendations[2]['similarity_score']
    }
    result_rows.append(row)

# Save to CSV
lookalike_df = pd.DataFrame(result_rows)
lookalike_df.to_csv('FirstName_LastName_Lookalike.csv', index=False)

# Display sample results
print("\nSample Lookalike Recommendations:")
print(lookalike_df.head())


Sample Lookalike Recommendations:
  customer_id similar_customer_1  score_1 similar_customer_2  score_2  \
0       C0001              C0048   0.8629              C0112   0.8508   
1       C0002              C0106   0.9063              C0159   0.9025   
2       C0003              C0129   0.8464              C0195   0.8029   
3       C0004              C0113   0.9601              C0012   0.9106   
4       C0005              C0007   0.9599              C0140   0.9053   

  similar_customer_3  score_3  
0              C0120   0.8391  
1              C0134   0.8628  
2              C0151   0.7457  
3              C0104   0.8936  
4              C0146   0.8940  
