# Sanket Aasabe Lookalike Model

Recommendation system to find similar customers.

In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

## Load and Merge Data

In [None]:
customers_df = pd.read_csv('Customers.csv')
products_df = pd.read_csv('Products.csv')
transactions_df = pd.read_csv('Transactions.csv')

merged_df = transactions_df.merge(customers_df, on='CustomerID', how='left').merge(products_df, on='ProductID', how='left')

## Feature Engineering

In [None]:
customer_summary = merged_df.groupby('CustomerID').agg({
    'Region': 'first',
    'Category': lambda x: ', '.join(x.unique()),
    'TotalValue': 'sum',
    'Quantity': 'sum'
}).reset_index()

encoded_df = pd.get_dummies(customer_summary[['Region', 'Category']])
encoded_df['TotalValue'] = customer_summary['TotalValue']
encoded_df['Quantity'] = customer_summary['Quantity']

## Similarity Calculation

In [None]:
scaler = StandardScaler()
normalized_data = scaler.fit_transform(encoded_df)
similarity_matrix = cosine_similarity(normalized_data)

def recommend_similar(customers, similarity_matrix, customer_summary):
    recommendations = {}
    for idx, customer_id in enumerate(customers):
        similarity_scores = list(enumerate(similarity_matrix[idx]))
        similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
        similar_customers = [
            (customer_summary['CustomerID'][sim_idx], score)
            for sim_idx, score in similarity_scores[1:4]
        ]
        recommendations[customer_id] = similar_customers
    return recommendations

top_customers = customer_summary['CustomerID'][:20]
lookalike_results = recommend_similar(top_customers, similarity_matrix, customer_summary)
lookalike_results