In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

# Load data
customers = pd.read_csv('Customers.csv')
transactions = pd.read_csv('Transactions.csv')

# Data Preparation
# Merge transaction data to get total spend, frequency, and favorite products
transaction_summary = transactions.groupby('CustomerID').agg({
    'TotalValue': ['sum', 'mean'],
    'ProductID': lambda x: x.mode()[0]  # Most frequently purchased product
}).reset_index()
transaction_summary.columns = ['CustomerID', 'TotalSpend', 'AvgSpend', 'FavoriteProduct']

# Merge with customer data
customer_profiles = pd.merge(customers, transaction_summary, on='CustomerID', how='left')
customer_profiles.fillna({'TotalSpend': 0, 'AvgSpend': 0, 'FavoriteProduct': 'Unknown'}, inplace=True)

# Encoding categorical variables
customer_profiles['Region'] = customer_profiles['Region'].astype('category').cat.codes
customer_profiles['FavoriteProduct'] = customer_profiles['FavoriteProduct'].astype('category').cat.codes

# Feature selection
features = customer_profiles[['Region', 'TotalSpend', 'AvgSpend', 'FavoriteProduct']]
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

# Similarity Calculation
similarity_matrix = cosine_similarity(scaled_features)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_profiles['CustomerID'], columns=customer_profiles['CustomerID'])

# Lookalike Recommendation
lookalike_results = {}
for customer_id in customer_profiles['CustomerID'][:20]:  # First 20 customers (C0001 - C0020)
    similar_customers = similarity_df[customer_id].sort_values(ascending=False)[1:4]  # Top 3 excluding itself
    lookalike_results[customer_id] = [(similar_id, round(score, 4)) for similar_id, score in similar_customers.items()]

# Save results to Lookalike.csv
lookalike_df = pd.DataFrame({
    'cust_id': lookalike_results.keys(),
    'lookalikes': [str(val) for val in lookalike_results.values()]
})
lookalike_df.to_csv('Panmoni_Hansda_Lookalike.csv', index=False)

print("Lookalike model executed successfully and results saved to Lookalike.csv.")


Lookalike model executed successfully and results saved to Lookalike.csv.
