In [5]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import json

# Load data
customers = pd.read_csv("Customers.csv.csv")
products = pd.read_csv("Products.csv.csv")
transactions = pd.read_csv("Transactions.csv.csv")

# Step 1: Merge transaction and product data
transactions = transactions.merge(products, on="ProductID", how="left")
print("Merged transactions and products:")
print(transactions.head())

# Step 2: Prepare customer-product matrix for similarity computation
customer_product_matrix = transactions.pivot_table(
    index="CustomerID", columns="ProductID", values="Quantity", aggfunc="sum", fill_value=0
)
print("\nCustomer-product matrix:")
print(customer_product_matrix.head())

# Step 3: Compute cosine similarity
similarity_matrix = cosine_similarity(customer_product_matrix)
similarity_df = pd.DataFrame(
    similarity_matrix, index=customer_product_matrix.index, columns=customer_product_matrix.index
)
print("\nSimilarity matrix:")
print(similarity_df.head())

# Step 4: Generate lookalike recommendations
def generate_lookalikes(top_n=3):
    """
    Generate lookalike recommendations for the first 20 customers.
    
    Returns:
    - A dictionary mapping CustomerID to their top N lookalikes and similarity scores.
    """
    lookalikes = {}
    
    for customer_id in customers.loc[:19, "CustomerID"]:  # First 20 customers (C0001 - C0020)
        # Fetch similarity scores for the customer
        customer_similarity = similarity_df.loc[customer_id]
        
        # Get the top N similar customers (excluding the customer itself)
        similar_customers = customer_similarity.drop(index=customer_id).sort_values(ascending=False).head(top_n)
        
        # Store in dictionary
        lookalikes[customer_id] = [
            {"cust_id": sim_customer_id, "score": round(score, 3)}
            for sim_customer_id, score in similar_customers.items()
        ]
    
    return lookalikes

# Generate lookalike recommendations
lookalikes = generate_lookalikes()
print("\nGenerated lookalike recommendations:")
print(json.dumps(lookalikes, indent=4))

# Step 5: Save lookalikes to a CSV file
lookalike_csv_data = {
    "CustomerID": list(lookalikes.keys()),
    "Lookalikes": [json.dumps(value) for value in lookalikes.values()]  # Save as JSON strings for clarity
}

lookalike_df = pd.DataFrame(lookalike_csv_data)
lookalike_df.to_csv("Senthuran_Subramani_Lookalike.csv", index=False)

print("\nLookalike recommendations saved to 'NEW_Lookalike.csv'.")


Merged transactions and products:
  TransactionID CustomerID ProductID      TransactionDate  Quantity  \
0        T00001      C0199      P067  2024-08-25 12:38:23         1   
1        T00112      C0146      P067  2024-05-27 22:23:54         1   
2        T00166      C0127      P067  2024-04-25 07:38:55         1   
3        T00272      C0087      P067  2024-03-26 22:55:37         2   
4        T00363      C0070      P067  2024-03-21 15:10:10         3   

   TotalValue  Price_x                      ProductName     Category  Price_y  
0      300.68   300.68  ComfortLiving Bluetooth Speaker  Electronics   300.68  
1      300.68   300.68  ComfortLiving Bluetooth Speaker  Electronics   300.68  
2      300.68   300.68  ComfortLiving Bluetooth Speaker  Electronics   300.68  
3      601.36   300.68  ComfortLiving Bluetooth Speaker  Electronics   300.68  
4      902.04   300.68  ComfortLiving Bluetooth Speaker  Electronics   300.68  

Customer-product matrix:
ProductID   P001  P002  P003  P00