### Lookalike Mode

In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

### Load datasets


In [7]:
customers = pd.read_csv('Customers.csv')
transactions = pd.read_csv('Transactions.csv')


### Data Preparation( Summarize transaction data for each customer)


In [10]:
tx_summary = transactions.groupby("CustomerID").agg({
    "TotalValue": "sum",  # Total spend
    "TransactionID": "count"  # Transaction frequency
}).rename(columns={"TotalValue": "TotalSpend", "TransactionID": "TransactionCount"})


### Merge customer data with transaction summary
  



In [13]:
customer_data = customers.merge(tx_summary, on="CustomerID", how="left")
customer_data.fillna(0, inplace=True)

###  Feature Scaling

In [20]:
scaler = StandardScaler()
features = [ "TotalSpend", "TransactionCount"]
customer_data_scaled = scaler.fit_transform(customer_data[features])



###  Compute Similarities

In [23]:
similarity_matrix = cosine_similarity(customer_data_scaled)

### Top Lookalikes


In [32]:
lookalikes = {}
customer_ids = customer_data["CustomerID"].values

In [36]:
for i, cust_id in enumerate(customer_ids):
    scores = list(enumerate(similarity_matrix[i]))
    # Sort by similarity score (descending) and exclude self-comparison
    sorted_scores = sorted(scores, key=lambda x: x[1], reverse=True)[1:4]  # Top 3 similar customers
    # Map customer IDs to similarity scores
    lookalikes[cust_id] = [(customer_ids[j], round(score, 2)) for j, score in sorted_scores]


### the results for the first 20 customers (C0001 - C0020)




In [39]:
lookalike_results = {
    cust_id: lookalikes[cust_id]
    for cust_id in customer_ids if cust_id in customer_ids[:20]
}


### Convert to a DataFrame for saving


In [48]:
lookalike_df = pd.DataFrame({
    "CustomerID": list(lookalike_results.keys()),
    "Lookalikes": [str(lookalike_results[cust_id]) for cust_id in lookalike_results.keys()]
})
lookalike_df.to_csv("FirstName_LastName_Lookalike.csv", index=False)
print("Lookalike recommendations saved to FirstName_LastName_Lookalike.csv.")


Lookalike recommendations saved to FirstName_LastName_Lookalike.csv.
