In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load datasets
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')


In [2]:

# Data Preparation
# Merge transactions with customer and product data
merged_data = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

# Create a customer profile based on total spent and number of transactions
customer_profile = merged_data.groupby('CustomerID').agg({
    'TotalValue': 'sum',  # Total amount spent
    'TransactionID': 'count'  # Number of transactions
}).reset_index()

# Rename columns for clarity
customer_profile.rename(columns={'TransactionID': 'NumTransactions'}, inplace=True)

# Normalize the data for similarity calculation
scaler = StandardScaler()
scaled_profiles = scaler.fit_transform(customer_profile[['TotalValue', 'NumTransactions']])

# Calculate cosine similarity between customers
similarity_matrix = cosine_similarity(scaled_profiles)

# Create a DataFrame to hold similarity scores
similarity_df = pd.DataFrame(similarity_matrix, index=customer_profile['CustomerID'], columns=customer_profile['CustomerID'])

# Function to get top N lookalikes for a given customer ID
def get_top_lookalikes(customer_id, n=3):
    if customer_id not in similarity_df.index:
        return [], []
    similar_customers = similarity_df[customer_id].nlargest(n + 1)  # Get top N + 1 to exclude self
    return similar_customers.index[1:], similar_customers.values[1:]

# Generate lookalikes for the first 20 customers (C0001 - C0020)
lookalikes_data = []
for cust_id in customer_profile['CustomerID'].head(20):
    lookalikes, scores = get_top_lookalikes(cust_id)
    lookalike_str = '; '.join([f"{look}({score:.4f})" for look, score in zip(lookalikes, scores)])
    lookalikes_data.append({'CustomerID': cust_id, 'Lookalikes': lookalike_str})

# Create DataFrame and save to CSV
lookalike_df = pd.DataFrame(lookalikes_data)
lookalike_df.to_csv('Lookalike.csv', index=False)

print("Lookalike model generated successfully and saved to Lookalike.csv.")

Lookalike model generated successfully and saved to Lookalike.csv.
