In [5]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

In [6]:
customers = pd.read_csv("/content/drive/MyDrive/Customers.csv")
products = pd.read_csv("/content/drive/MyDrive/Products.csv")
transactions = pd.read_csv("/content/drive/MyDrive/Transactions.csv")

In [16]:
# Step 1: Data Merging and Feature Engineering
def prepare_data():
    # Merge transactions with customer and product data
    combined = pd.merge(transactions, customers, on='CustomerID', how='left')
    combined = pd.merge(combined, products, on='ProductID', how='left')

    # Feature engineering: Customer total spend, avg spend, purchase frequency, etc.
    customer_features = combined.groupby('CustomerID').agg({
        'TotalValue': ['sum', 'mean'],
        'TransactionID': 'count',
        'Category': lambda x: x.value_counts().idxmax(),  # Most purchased category
        'Region': 'first'  # Region as a categorical feature
    }).reset_index()
    customer_features.columns = ['CustomerID', 'TotalSpend', 'AvgSpend', 'PurchaseFrequency', 'TopCategory', 'Region']

    # Encode categorical features (e.g., Region, TopCategory)
    customer_features = pd.get_dummies(customer_features, columns=['TopCategory', 'Region'], drop_first=True)

    return customer_features

In [17]:
# Step 2: Calculate Similarity Scores
def calculate_similarity(features):
    # Normalize numerical features
    scaler = StandardScaler()
    feature_matrix = scaler.fit_transform(features.drop(columns=['CustomerID']))

    # Compute cosine similarity
    similarity_matrix = cosine_similarity(feature_matrix)
    similarity_df = pd.DataFrame(similarity_matrix, index=features['CustomerID'], columns=features['CustomerID'])

    return similarity_df

In [18]:
# Step 3: Recommend Lookalikes
def recommend_lookalikes(similarity_df, customer_ids, top_n=3):
    recommendations = {}
    for cust_id in customer_ids:
        # Sort similarity scores for the customer
        similar_customers = similarity_df.loc[cust_id].sort_values(ascending=False)
        # Exclude the customer itself (similarity score = 1)
        similar_customers = similar_customers[similar_customers.index != cust_id]
        # Get top N similar customers
        top_similar = similar_customers.head(top_n)
        recommendations[cust_id] = [{"cust_id": lookalike_id, "score": round(score, 4)} for lookalike_id, score in top_similar.items()]

    return recommendations

In [19]:
# Step 4: Export Lookalike Map to CSV
def save_lookalikes(recommendations, output_file="Lookalike.csv"):
    # Convert recommendations to a single map structure for CSV
    lookalike_map = []
    for cust_id, lookalikes in recommendations.items():
        lookalike_map.append({"cust_id": cust_id, "lookalikes": lookalikes})

    # Save as a JSON-like structure in a CSV
    lookalike_df = pd.DataFrame(lookalike_map)
    lookalike_df.to_csv(output_file, index=False)

In [20]:
# Main Function
def main():
    # Prepare the data
    customer_features = prepare_data()

    # Calculate similarity scores
    similarity_df = calculate_similarity(customer_features)

    # Recommend lookalikes for the first 20 customers (C0001 - C0020)
    customer_ids = customers['CustomerID'][:20].tolist()
    recommendations = recommend_lookalikes(similarity_df, customer_ids)

    # Save recommendations to CSV
    save_lookalikes(recommendations)

    print("Lookalike recommendations saved to Lookalike.csv.")

if __name__ == "__main__":
    main()

Lookalike recommendations saved to Lookalike.csv.
