In [5]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load datasets
customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")

# Merge datasets
data = pd.merge(transactions, customers, on="CustomerID")
data = pd.merge(data, products, on="ProductID")

# Feature engineering
customer_features = data.groupby("CustomerID").agg(
    total_spent=("TotalValue", "sum"),
    avg_transaction_value=("TotalValue", "mean"),
    total_transactions=("TransactionID", "count"),
    preferred_category=("Category", lambda x: x.value_counts().idxmax()),
).reset_index()



In [6]:
# Encoding categorical features (e.g., preferred_category)
customer_features = pd.get_dummies(customer_features, columns=["preferred_category"])

# Normalize features for similarity
scaler = StandardScaler()
features_scaled = scaler.fit_transform(customer_features.drop("CustomerID", axis=1))

# Compute similarity matrix
similarity_matrix = cosine_similarity(features_scaled)


In [7]:
# Create lookalike recommendations for the first 20 customers
customer_ids = customer_features["CustomerID"].values[:20]
lookalike_results = {}

for idx, cust_id in enumerate(customer_ids):
    similarities = similarity_matrix[idx]
    similar_indices = similarities.argsort()[::-1][1:4]  # Top 3 excluding self
    similar_customers = [
        (customer_features.iloc[i]["CustomerID"], similarities[i])
        for i in similar_indices
    ]
    lookalike_results[cust_id] = similar_customers

In [8]:
# Save results to Lookalike.csv
lookalike_df = pd.DataFrame(
    {
        "cust_id": lookalike_results.keys(),
        "lookalikes": [
            [{"cust_id": lc[0], "score": lc[1]} for lc in lookalike_results[cust_id]]
            for cust_id in lookalike_results.keys()
        ],
    }
)
lookalike_df.to_csv("Lookalike.csv", index=False)

print("Lookalike model results saved to Lookalike.csv")


Lookalike model results saved to Lookalike.csv
