In [None]:
# Import required libraries
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [None]:
# Load datasets
customers = pd.read_csv("Customers.csv")
transactions = pd.read_csv("Transactions.csv")
products = pd.read_csv("Products.csv")

# Merge datasets for analysis
data = transactions.merge(customers, on="CustomerID").merge(products, on="ProductID")

In [None]:

# Data preparation: Feature extraction
# Calculate total spend, average spend, and product preferences per customer
customer_features = data.groupby("CustomerID").agg(
    TotalSpent=("TotalValue", "sum"),
    AvgSpend=("TotalValue", "mean"),
    TransactionCount=("TransactionID", "count"),
).reset_index()

# Pivot table for product preferences (CustomerID vs ProductName)
product_pivot = data.pivot_table(index="CustomerID", columns="ProductName", values="Quantity", fill_value=0)

# Combine numerical features and product preferences
combined_features = customer_features.merge(product_pivot, on="CustomerID")
combined_features.set_index("CustomerID", inplace=True)

# Normalize the data for similarity calculation
scaler = StandardScaler()
normalized_features = scaler.fit_transform(combined_features)

# Calculate cosine similarity
similarity_matrix = cosine_similarity(normalized_features)

In [None]:

# Function to get top 3 lookalike customers
def get_top_3_similar(customers_list, similarity_matrix, customer_ids):
    lookalike_dict = {}
    for idx in customers_list:
        similarities = similarity_matrix[idx]
        similar_customers = sorted(
            [(customer_ids[i], score) for i, score in enumerate(similarities) if i != idx],
            key=lambda x: -x[1],
        )[:3]
        lookalike_dict[customer_ids[idx]] = similar_customers
    return lookalike_dict

# Get top 3 lookalikes for the first 20 customers
customer_ids = combined_features.index.tolist()
lookalikes = get_top_3_similar(range(20), similarity_matrix, customer_ids)

# Convert lookalikes to a CSV-compatible format
lookalike_list = []
for customer, similar_customers in lookalikes.items():
    lookalike_entry = {
        "CustomerID": customer,
        "Lookalikes": [
            {"SimilarCustomerID": similar_customer[0], "SimilarityScore": similar_customer[1]}
            for similar_customer in similar_customers
        ],
    }
    lookalike_list.append(
        {
            "CustomerID": customer,
            "Lookalikes": str(
                [(entry["SimilarCustomerID"], round(entry["SimilarityScore"], 2)) for entry in lookalike_entry["Lookalikes"]]
            ),
        }
    )


In [None]:
# Save results to a CSV
lookalike_df = pd.DataFrame(lookalike_list)
lookalike_df.to_csv("SaiRishisri_Vadluri_Lookalike.csv", index=False)

print("Lookalike results saved to SaiRishisri_Vadluri_Lookalike.csv.")

Lookalike results saved to SaiRishisri_Vadluri_Lookalike.csv.
