In [2]:
import pandas as pd

# Load the data
customers_df = pd.read_csv('Customers.csv')
products_df = pd.read_csv('Products.csv')
transactions_df = pd.read_csv('Transactions.csv')

# Merge transactions with products to get product details
transactions_df = pd.merge(transactions_df, products_df, on='ProductID')

# Merge transactions with customers to get customer details
customer_transactions_df = pd.merge(transactions_df, customers_df, on='CustomerID')

from datetime import datetime

# Convert SignupDate to days since signup
customer_transactions_df['SignupDate'] = pd.to_datetime(customer_transactions_df['SignupDate'])
customer_transactions_df['DaysSinceSignup'] = (datetime.now() - customer_transactions_df['SignupDate']).dt.days

# Aggregate transaction data to create customer features
customer_features = customer_transactions_df.groupby('CustomerID').agg({
    'TotalValue': ['sum', 'mean'],
    'Category': lambda x: x.mode()[0],  # Most frequent category
    'DaysSinceSignup': 'first',
    'Region': 'first'
}).reset_index()

# Flatten the multi-index columns
customer_features.columns = ['CustomerID', 'TotalSpending', 'AvgSpending', 'FavoriteCategory', 'DaysSinceSignup', 'Region']

# One-hot encode the Region and FavoriteCategory
customer_features = pd.get_dummies(customer_features, columns=['Region', 'FavoriteCategory'])

from sklearn.metrics.pairwise import cosine_similarity

# Normalize the features
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
customer_features_scaled = scaler.fit_transform(customer_features.drop('CustomerID', axis=1))

# Calculate cosine similarity
similarity_matrix = cosine_similarity(customer_features_scaled)

# Create a dictionary to store the recommendations
lookalike_recommendations = {}

# Get the indices of the first 20 customers
first_20_customers = customer_features['CustomerID'].iloc[:20]

for i, cust_id in enumerate(first_20_customers):
    # Get the similarity scores for the current customer
    similarity_scores = similarity_matrix[i]
    
    # Sort the similarity scores in descending order
    sorted_indices = similarity_scores.argsort()[::-1]
    
    # Get the top 3 similar customers (excluding the customer itself)
    top_3_indices = sorted_indices[1:4]
    
    # Get the customer IDs and similarity scores
    top_3_customers = customer_features.iloc[top_3_indices]['CustomerID'].values
    top_3_scores = similarity_scores[top_3_indices]
    
    # Store the recommendations
    lookalike_recommendations[cust_id] = list(zip(top_3_customers, top_3_scores))

# Convert the dictionary to a DataFrame
lookalike_df = pd.DataFrame(list(lookalike_recommendations.items()), columns=['CustomerID', 'Lookalikes'])

# Save the DataFrame to a CSV file
lookalike_df.to_csv('Lookalike.csv', index=False)