In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

# Load data (replace with actual file paths)
customers_df = pd.read_csv("Customers.csv")
products_df = pd.read_csv("Products.csv")
transactions_df = pd.read_csv("Transactions.csv")

# Data preprocessing (example - adjust based on your data)
customers_df['SignupDate'] = pd.to_datetime(customers_df['SignupDate'])
customers_df['CustomerTenure'] = (pd.to_datetime('today') - customers_df['SignupDate']).dt.days

# Feature engineering (example - adjust based on your data)
customer_features = customers_df[['CustomerID', 'Region', 'CustomerTenure']]
product_features = products_df[['ProductID', 'Category']] 

# Create customer-product interaction matrix
customer_product_matrix = pd.pivot_table(
    transactions_df, 
    index='CustomerID', 
    columns='ProductID', 
    values='Quantity', 
    fill_value=0
)
# Calculate cosine similarity (adjust based on your needs)
customer_similarities = cosine_similarity(customer_product_matrix)

# Find top-3 lookalikes for each customer
lookalike_df = pd.DataFrame(columns=['CustomerID', 'Lookalike1', 'Lookalike1_Score', 
                                   'Lookalike2', 'Lookalike2_Score', 'Lookalike3', 'Lookalike3_Score'])

# Ensure consistent data types
customers_df['CustomerID'] = customers_df['CustomerID'].astype(str)
customer_product_matrix.index = customer_product_matrix.index.astype(str)

# Initialize results
results = []

for customer_id in customers_df['CustomerID'][:20]:
    if customer_id not in customer_product_matrix.index:
        print(f"CustomerID {customer_id} not found in customer_product_matrix. Skipping...")
        continue  # Skip if not found
    
    customer_index = customer_product_matrix.index.get_loc(customer_id)
    similarity_scores = customer_similarities[customer_index]
    top_k_indices = similarity_scores.argsort()[-4:-1][::-1]
    top_k_ids = customer_product_matrix.index[top_k_indices]
    top_k_scores = similarity_scores[top_k_indices]

    results.append({
        'CustomerID': customer_id,
        'Lookalike1': top_k_ids[0],
        'Lookalike1_Score': top_k_scores[0],
        'Lookalike2': top_k_ids[1],
        'Lookalike2_Score': top_k_scores[1],
        'Lookalike3': top_k_ids[2],
        'Lookalike3_Score': top_k_scores[2],
    })

lookalike_df = pd.DataFrame(results)

# Save results
lookalike_df.to_csv("Lookalike.csv", index=False)
print("Lookalike Model generated and saved to Lookalike.csv")

Lookalike Model generated and saved to Lookalike.csv
