In [3]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split

# Load the customer data (Customers.csv), transactions data (Transactions.csv), and product data (Products.csv)
customers_df = pd.read_csv('Customers.csv')
transactions_df = pd.read_csv('Transactions.csv')
products_df = pd.read_csv('Products.csv')

# Step 1: Preprocess the transaction data to merge customer and product information

# Merge Transactions with Products to get product information in each transaction
merged_data = pd.merge(transactions_df, products_df, on="ProductID", how="left")

# Step 2: Aggregate the transaction data by CustomerID and ProductID
customer_product_data = merged_data.groupby(['CustomerID', 'ProductID']).agg(
    quantity=('Quantity', 'sum'),
    total_spent=('TotalValue', 'sum')
).reset_index()

# Step 3: Create a pivot table where rows are customers, columns are products, and values are total spent on that product
pivot_data = customer_product_data.pivot_table(index='CustomerID', columns='ProductID', values='total_spent', fill_value=0)

# Step 4: Standardize the data for similarity calculation
scaler = StandardScaler()
scaled_data = scaler.fit_transform(pivot_data)

# Step 5: Compute Cosine Similarity between customers
similarity_matrix = cosine_similarity(scaled_data)

# Step 6: Create a function to find the top 3 similar customers for each customer
def get_top_3_similar_customers(customer_id, similarity_matrix, pivot_data):
    customer_index = pivot_data.index.get_loc(customer_id)  # Find the index of the customer
    similarity_scores = similarity_matrix[customer_index]  # Get similarity scores for the customer
    
    # Create a list of (CustomerID, SimilarityScore) pairs
    similar_customers = [(pivot_data.index[i], similarity_scores[i]) for i in range(len(similarity_scores)) if i != customer_index]
    
    # Sort by similarity score in descending order and get the top 3
    similar_customers = sorted(similar_customers, key=lambda x: x[1], reverse=True)[:3]
    
    return similar_customers

# Step 7: Generate the Lookalike Model for the first 20 customers
lookalike_map = {}

for customer_id in customers_df['CustomerID'][:20]:  # Limit to first 20 customers
    similar_customers = get_top_3_similar_customers(customer_id, similarity_matrix, pivot_data)
    lookalike_map[customer_id] = similar_customers

# Step 8: Prepare the data for saving to CSV
# Flatten the lookalike map into a list of rows
flattened_data = []

for customer_id, similar_customers in lookalike_map.items():
    row = [customer_id]
    for similar_customer, similarity_score in similar_customers:
        row.append(similar_customer)
        row.append(similarity_score)
    flattened_data.append(row)

# Define the column names dynamically
columns = ['CustomerID', 'Lookalike_1', 'Similarity_1', 'Lookalike_2', 'Similarity_2', 'Lookalike_3', 'Similarity_3']

# Convert the flattened data into a DataFrame
lookalike_df = pd.DataFrame(flattened_data, columns=columns)

# Step 9: Save the lookalike model to a CSV file
lookalike_df.to_csv('FirstName_LastName_Lookalike.csv', index=False)

print("Lookalike model has been created and saved to Lookalike.csv.")

Lookalike model has been created and saved to Lookalike.csv.
