# Task 2: Lookalike Model

In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import davies_bouldin_score

In [5]:
# Load datasets
customers_df = pd.read_csv('/content/Customers.csv')
products_df = pd.read_csv('/content/Products.csv')
transactions_df = pd.read_csv('/content/Transactions.csv')

In [8]:
customers_df['SignupDate'] = pd.to_datetime(customers_df['SignupDate'])
transactions_df['TransactionDate'] = pd.to_datetime(transactions_df['TransactionDate'])

In [9]:
# Aggregate transaction data by customer
customer_transactions = transactions_df.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum'
}).reset_index()
customer_profiles = pd.merge(customers_df, customer_transactions, on='CustomerID', how='left').fillna(0)

In [10]:
# Normalize features
scaler = StandardScaler()
normalized_features = scaler.fit_transform(customer_profiles[['TotalValue', 'Quantity']])

In [11]:
# Compute similarity
similarity_matrix = cosine_similarity(normalized_features)

In [12]:
# Generate lookalikes for first 20 customers
lookalike_results = {}
for i, cust_id in enumerate(customer_profiles['CustomerID'][:20]):
    similar_indices = np.argsort(-similarity_matrix[i])[1:4]  # Top 3 similar customers
    similar_customers = customer_profiles.iloc[similar_indices]
    scores = similarity_matrix[i, similar_indices]
    lookalike_results[cust_id] = list(zip(similar_customers['CustomerID'], scores))

In [13]:
# Save to CSV
lookalike_df = pd.DataFrame({
    'CustomerID': list(lookalike_results.keys()),
    'Lookalikes': [str(v) for v in lookalike_results.values()]
})
lookalike_df.to_csv('Harsh_kalburgi_Lookalike.csv', index=False)