# **Lookalike Model**

In [1]:
import pandas as pd


In [2]:
# Load datasets
customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")

In [3]:
# Merge datasets
merged_data = transactions.merge(customers, on="CustomerID").merge(products, on="ProductID")


In [4]:
# Create customer-level features
customer_features = merged_data.groupby("CustomerID").agg({
    "TotalValue": "sum",
    "Quantity": "sum",
    "TransactionID": "count",
    "Category": lambda x: x.mode()[0]  # Most frequent category
}).reset_index()

customer_features.rename(columns={
    "TotalValue": "TotalTransactionValue",
    "Quantity": "TotalQuantity",
    "TransactionID": "TransactionCount",
    "Category": "PreferredCategory"
}, inplace=True)

In [5]:
# Preview customer-level features
print(customer_features.head())

  CustomerID  TotalTransactionValue  TotalQuantity  TransactionCount  \
0      C0001                3354.52             12                 5   
1      C0002                1862.74             10                 4   
2      C0003                2725.38             14                 4   
3      C0004                5354.88             23                 8   
4      C0005                2034.24              7                 3   

  PreferredCategory  
0       Electronics  
1          Clothing  
2        Home Decor  
3             Books  
4       Electronics  


# **Build the Lookalike Model**

In [6]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import LabelEncoder, MinMaxScaler


In [7]:
# Encode PreferredCategory
label_encoder = LabelEncoder()
customer_features["PreferredCategory"] = label_encoder.fit_transform(customer_features["PreferredCategory"])


In [8]:
# Normalize numerical features
scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(customer_features[["TotalTransactionValue", "TotalQuantity", "TransactionCount", "PreferredCategory"]])


In [9]:
# Compute similarity matrix
similarity_matrix = cosine_similarity(scaled_features)
customer_ids = customer_features["CustomerID"].tolist()


In [10]:
# Generate lookalikes for the first 20 customers
lookalike_results = {}
for i in range(20):  # First 20 customers
    customer_index = i
    scores = list(enumerate(similarity_matrix[customer_index]))
    scores = sorted(scores, key=lambda x: x[1], reverse=True)[1:4]  # Top 3 similar customers
    lookalike_results[customer_ids[customer_index]] = [(customer_ids[idx], round(score, 2)) for idx, score in scores]


In [11]:
# Save lookalikes to CSV
lookalike_df = pd.DataFrame.from_dict(lookalike_results, orient='index', columns=["Lookalike1", "Lookalike2", "Lookalike3"])
lookalike_df.to_csv("FirstName_LastName_Lookalike.csv", index=True)

print("Lookalike recommendations saved to CSV.")


Lookalike recommendations saved to CSV.
