In [5]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load the datasets
customers = pd.read_csv('Downloads/Customers.csv')
transactions = pd.read_csv('Downloads/Transactions.csv')
products = pd.read_csv('Downloads/Products.csv')

# Preprocessing Customers
customers['SignupDate'] = pd.to_datetime(customers['SignupDate'])
customers['SignupDays'] = (customers['SignupDate'].max() - customers['SignupDate']).dt.days
customer_features = customers[['CustomerID', 'Region', 'SignupDays']]

# One-hot encode region for profile representation
customer_features = pd.get_dummies(customer_features, columns=['Region'], drop_first=True)

# Aggregate Transaction Data
transactions_agg = transactions.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum'
}).reset_index()

# Combine Profile and Transaction Data
customer_data = pd.merge(customer_features, transactions_agg, on='CustomerID', how='left').fillna(0)

# Normalize Data
scaler = StandardScaler()
customer_data_scaled = pd.DataFrame(
    scaler.fit_transform(customer_data.iloc[:, 1:]), 
    columns=customer_data.columns[1:],
    index=customer_data.CustomerID
)

# Compute Cosine Similarity
similarity_matrix = cosine_similarity(customer_data_scaled)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_data['CustomerID'], columns=customer_data['CustomerID'])

# Recommend Top 3 Lookalikes for the First 20 Customers
lookalike_map = {}
for customer_id in customers['CustomerID'][:20]:
    # Exclude the customer themselves from similarity rankings
    similar_customers = similarity_df[customer_id].drop(customer_id).sort_values(ascending=False).head(3)
    lookalike_map[customer_id] = [(idx, round(score, 4)) for idx, score in similar_customers.items()]

# Save Lookalike Map to CSV
lookalike_list = [{'CustomerID': key, 'Lookalikes': value} for key, value in lookalike_map.items()]
lookalike_df = pd.DataFrame(lookalike_list)

# Save as CSV in Downloads folder
lookalike_df.to_csv('~/Downloads/FirstName_LastName_Lookalike.csv', index=False)

print("Lookalike model completed. File saved as 'FirstName_LastName_Lookalike.csv' in Downloads.")


Lookalike model completed. File saved as 'FirstName_LastName_Lookalike.csv' in Downloads.
