In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load datasets
customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")

# Merge datasets
data = transactions.merge(customers, on="CustomerID").merge(products, on="ProductID")

# Aggregate transaction data for each customer
customer_profiles = data.groupby("CustomerID").agg({
    "Region": "first",  # Take the first value for customer profile
    "Category": lambda x: ' '.join(x),  # Combine categories as a single string
    "TotalValue": "sum",  # Total spending
    "Quantity": "sum"     # Total quantity purchased
}).reset_index()

# Encode Region and Category using one-hot encoding
customer_profiles = pd.get_dummies(customer_profiles, columns=["Region"], prefix="Region")
customer_profiles = pd.get_dummies(customer_profiles, columns=["Category"], prefix="Category")

# Standardize numerical features
scaler = StandardScaler()
numerical_cols = ["TotalValue", "Quantity"]
customer_profiles[numerical_cols] = scaler.fit_transform(customer_profiles[numerical_cols])

# Compute similarity
similarity_matrix = cosine_similarity(customer_profiles.drop("CustomerID", axis=1))

# Store results in a DataFrame
similarity_df = pd.DataFrame(similarity_matrix, index=customer_profiles["CustomerID"], columns=customer_profiles["CustomerID"])

# Create a mapping for lookalikes
lookalike_map = {}

# Loop over first 20 customers
for cust_id in customer_profiles["CustomerID"][:20]:
    # Sort customers by similarity score (excluding self)
    similar_customers = similarity_df[cust_id].sort_values(ascending=False).drop(cust_id)
    top_3 = similar_customers.head(3)
    
    # Store in map
    lookalike_map[cust_id] = list(zip(top_3.index, top_3.values))

# Convert to DataFrame for Lookalike.csv
lookalike_df = pd.DataFrame({
    "CustomerID": lookalike_map.keys(),
    "Lookalikes": [str(v) for v in lookalike_map.values()]
})

# Save the results to Lookalike.csv
lookalike_df.to_csv("FirstName_LastName_Lookalike.csv", index=False)

print("Lookalike model completed. Results saved to 'FirstName_LastName_Lookalike.csv'.")
