<a href="https://colab.research.google.com/github/Dharanidharan2813/Zeotap_Assignement/blob/main/Dharanidharan_S_Lookalike.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [19]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

customers = pd.read_csv('/content/Customers.csv')
transactions = pd.read_csv('/content/Transactions.csv')

# Merge Data
merged_data = pd.merge(customers, transactions, on='CustomerID')

# Create Customer Profiles
customer_profiles = merged_data.groupby('CustomerID').agg({
    'Price': ['sum', 'mean'],
    'ProductID': 'count'
}).reset_index()
customer_profiles.columns = ['CustomerID', 'TotalSpent', 'AvgSpent', 'TotalTransactions']

# Normalize Data
scaler = StandardScaler()
features = scaler.fit_transform(customer_profiles[['TotalSpent', 'AvgSpent', 'TotalTransactions']])

# Calculate Similarities
similarity_matrix = cosine_similarity(features)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_profiles['CustomerID'], columns=customer_profiles['CustomerID'])

# Find Top 3 Similar Customers
lookalike_results = {}
for customer in customer_profiles['CustomerID']:
    similar_customers = similarity_df[customer].sort_values(ascending=False)[1:4]
    lookalike_results[customer] = similar_customers.to_dict()

# Save Lookalike Results
lookalike_df = pd.DataFrame([
    {'cust_id': k, 'similar_cust_id': sim_id, 'score': score}
    for k, v in lookalike_results.items() for sim_id, score in v.items()
])
lookalike_df.to_csv('Lookalike.csv', index=False)