In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.cluster import KMeans

# Load datasets
customers = pd.read_csv(r"C:\Users\dhanu\Downloads\Customers.csv")
products = pd.read_csv(r"C:\Users\dhanu\Downloads\Products.csv")
transactions = pd.read_csv(r"C:\Users\dhanu\Downloads\Transactions.csv")

# Convert dates to datetime
customers['SignupDate'] = pd.to_datetime(customers['SignupDate'])
transactions['TransactionDate'] = pd.to_datetime(transactions['TransactionDate'])
# --- Task 2: Lookalike Model --- #

# Merge data for feature engineering
customer_transactions = transactions.groupby('CustomerID').agg({'TotalValue': 'sum', 'Quantity': 'sum'}).reset_index()
customer_data = pd.merge(customers, customer_transactions, on='CustomerID', how='left').fillna(0)

# Normalize data
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_data[['TotalValue', 'Quantity']])

# Compute cosine similarity
similarity_matrix = cosine_similarity(scaled_features)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_data['CustomerID'], columns=customer_data['CustomerID'])

# Generate lookalike recommendations
def get_lookalikes(similarity_df, top_n=3):
    lookalike_dict = {}

    for customer in similarity_df.index[:20]:  # First 20 customers
        similar_customers = similarity_df.loc[customer].sort_values(ascending=False).iloc[1:top_n+1]
        lookalike_dict[customer] = list(similar_customers.index) + list(similar_customers.values)

    return lookalike_dict

lookalike_results = get_lookalikes(similarity_df)

# Save lookalikes to CSV
lookalike_df = pd.DataFrame.from_dict(lookalike_results, orient='index', columns=['Lookalike1', 'Score1', 'Lookalike2', 'Score2', 'Lookalike3', 'Score3'])
lookalike_df.to_csv('Lookalike.csv', index_label='CustomerID')
df = pd.read_csv('Lookalike.csv')
print(df.head()) 

  CustomerID Lookalike1 Score1 Lookalike2    Score2  Lookalike3    Score3
0      C0001      C0164  C0085      C0127  0.999925    0.999596  0.999569
1      C0002      C0157  C0094      C0029  0.999994    0.999828  0.999825
2      C0003      C0111  C0160      C0147  0.995616    0.989199  0.986027
3      C0004      C0162  C0165      C0175  1.000000    0.999964  0.998590
4      C0005      C0080  C0167      C0177  0.999981    0.999974  0.999929
