In [49]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

In [50]:
customers_df = pd.read_csv('/content/Customers.csv')
products_df = pd.read_csv('/content/Products.csv')
transactions_df = pd.read_csv('/content/Transactions.csv')

**Feature Engineering**

In [51]:
def create_customer_features(customers_df, products_df, transactions_df):

    customer_features = pd.get_dummies(customers_df[['CustomerID', 'Region']], columns=['Region'])
    transaction_metrics = transactions_df.groupby('CustomerID').agg({
        'TransactionID': 'count',
        'TotalValue': ['sum', 'mean'],
        'Quantity': 'sum'
    }).fillna(0)

    transaction_metrics.columns = ['transaction_count', 'total_spend', 'avg_transaction_value', 'total_items']

    trans_with_categories = pd.merge(
        transactions_df,
        products_df[['ProductID', 'Category']],
        on='ProductID'
    )

    category_spending = pd.pivot_table(
        trans_with_categories,
        values='TotalValue',
        index='CustomerID',
        columns='Category',
        aggfunc='sum',
        fill_value=0
    )

    category_preferences = category_spending.div(category_spending.sum(axis=1), axis=0)
    category_preferences.columns = [f'category_pref_{col.lower()}' for col in category_preferences.columns]

    final_features = pd.merge(
        customer_features.set_index('CustomerID'),
        transaction_metrics,
        left_index=True,
        right_index=True,
        how='left'
    )

    final_features = pd.merge(
        final_features,
        category_preferences,
        left_index=True,
        right_index=True,
        how='left'
    )

    final_features = final_features.fillna(0)

    return final_features

**Creating Feature Matrix**

In [52]:
feature_matrix = create_customer_features(customers_df, products_df, transactions_df)

**Scaling Features**

In [53]:
scaler = StandardScaler()
scaled_features = scaler.fit_transform(feature_matrix)
scaled_features_df = pd.DataFrame(
    scaled_features,
    index=feature_matrix.index,
    columns=feature_matrix.columns
)

**Calculating Similarity Scores**

In [54]:
similarity_matrix = cosine_similarity(scaled_features_df)
similarity_df = pd.DataFrame(
    similarity_matrix,
    index=feature_matrix.index,
    columns=feature_matrix.index
)

Getting top 3 similar Customers for a given Customer_id

In [55]:
def get_top_lookalikes(customer_id, similarity_df, n=3):

    customer_similarities = similarity_df[customer_id].sort_values(ascending=False)

    customer_similarities = customer_similarities[customer_similarities.index != customer_id]

    top_similar = customer_similarities.head(n)

    return [(cust_id, round(float(score), 4)) for cust_id, score in top_similar.items()]

**Generating Recommendations**

In [56]:
lookalike_results = {}
for i in range(1, 21):
    customer_id = f'C{i:04d}'
    lookalikes = get_top_lookalikes(customer_id, similarity_df)
    lookalike_results[customer_id] = lookalikes

**Saving Results**

In [57]:
print("Saving results...")

flattened_results = []
for cust_id, lookalikes in lookalike_results.items():
    row = {
        'customer_id': cust_id,
        'recommendations': str(lookalikes)
    }
    flattened_results.append(row)

output_df = pd.DataFrame(flattened_results)
output_df.to_csv('FirstName_LastName_Lookalike.csv', index=False)

Saving results...


In [47]:
print("\nSample Results (First 5 customers):")
for cust_id in list(lookalike_results.keys())[:5]:
    print(f"\nCustomer {cust_id}:")
    for similar_id, score in lookalike_results[cust_id]:
        print(f"Similar customer: {similar_id}, Similarity score: {score}")


Sample Results (First 5 customers):

Customer C0001:
Similar customer: C0120, Similarity score: 0.9383
Similar customer: C0192, Similarity score: 0.9031
Similar customer: C0181, Similarity score: 0.8957

Customer C0002:
Similar customer: C0159, Similarity score: 0.9376
Similar customer: C0178, Similarity score: 0.9308
Similar customer: C0106, Similarity score: 0.9306

Customer C0003:
Similar customer: C0152, Similarity score: 0.8922
Similar customer: C0031, Similarity score: 0.8809
Similar customer: C0085, Similarity score: 0.8499

Customer C0004:
Similar customer: C0113, Similarity score: 0.9197
Similar customer: C0104, Similarity score: 0.8905
Similar customer: C0165, Similarity score: 0.8471

Customer C0005:
Similar customer: C0007, Similarity score: 0.9643
Similar customer: C0140, Similarity score: 0.8651
Similar customer: C0186, Similarity score: 0.7886
