In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Load the data
customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")

# Merge datasets
merged_data = transactions.merge(customers, on="CustomerID").merge(products, on="ProductID")

# Feature engineering: Summarize customer data
customer_features = merged_data.groupby("CustomerID").agg({
    "TotalValue": "sum",
    "Quantity": "mean",
    "ProductID": "count",  # Total transactions
    "Category": lambda x: x.mode()[0]  # Most common category
}).rename(columns={"TotalValue": "TotalSpending", "ProductID": "TotalTransactions"})

# Encode categorical data (e.g., Category) using one-hot encoding
customer_features = pd.get_dummies(customer_features, columns=["Category"])

# Compute similarity matrix
similarity_matrix = cosine_similarity(customer_features)

# Generate recommendations for the first 20 customers
customer_ids = customer_features.index
recommendations = {}

for idx, customer_id in enumerate(customer_ids[:20]):
    similar_indices = np.argsort(similarity_matrix[idx])[::-1][1:4]  # Top 3 similar customers
    similar_customers = customer_ids[similar_indices]
    similarity_scores = similarity_matrix[idx][similar_indices]
    recommendations[customer_id] = list(zip(similar_customers, similarity_scores))

# Save recommendations to a CSV
output_data = []
for customer_id, recs in recommendations.items():
    row = [customer_id]
    for rec in recs:
        row.extend(rec)  # Add similar customer and their score
    output_data.append(row)

output_df = pd.DataFrame(output_data, columns=[
    "CustomerID", "Lookalike1", "Score1", "Lookalike2", "Score2", "Lookalike3", "Score3"
])
output_df.to_csv("FirstName_LastName_Lookalike.csv", index=False)

print("Lookalike model recommendations saved!")


Lookalike model recommendations saved!
