In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

# Load datasets
customers = pd.read_csv("Customers.csv")
transactions = pd.read_csv("Transactions.csv")

# Merge datasets
merged_data = transactions.merge(customers, on="CustomerID")

# Feature Engineering
customer_summary = merged_data.groupby("CustomerID").agg(
    total_spent=("TotalValue", "sum"),
    avg_spent=("TotalValue", "mean"),
    total_transactions=("TransactionID", "count"),
    region=("Region", "first")  # Assuming region is categorical
).reset_index()

# One-hot encoding for categorical features (Region)
customer_summary = pd.get_dummies(customer_summary, columns=["region"], drop_first=True)

# Normalize features
features = customer_summary.drop("CustomerID", axis=1)
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

# Calculate cosine similarity
similarity_matrix = cosine_similarity(scaled_features)

# Generate recommendations
lookalike_map = {}
for i in range(len(similarity_matrix)):
    # Get similarity scores for all customers except self
    similar_indices = similarity_matrix[i].argsort()[-4:-1][::-1]  # Top 3 excluding self
    similar_customers = customer_summary.iloc[similar_indices]["CustomerID"].values
    scores = similarity_matrix[i][similar_indices]
    
    # Create mapping for customer ID to their lookalikes and scores
    lookalike_map[customer_summary.iloc[i]["CustomerID"]] = list(zip(similar_customers, scores))

# Convert to DataFrame for saving to CSV
lookalike_df = pd.DataFrame([(cust_id, similar_cust, score) 
                              for cust_id, similar_list in lookalike_map.items() 
                              for similar_cust, score in similar_list],
                             columns=["CustomerID", "LookalikeID", "SimilarityScore"])

# Filter for first 20 customers (C0001 - C0020)
lookalike_df_filtered = lookalike_df[lookalike_df["CustomerID"].isin(customer_summary["CustomerID"].head(20))]

# Save to CSV
lookalike_df_filtered.to_csv("Lookalike.csv", index=False)

print("Lookalike model completed and results saved to Lookalike.csv.")


Lookalike model completed and results saved to Lookalike.csv.
