# **Building lookalike model**

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import davies_bouldin_score
from sklearn.neighbors import NearestNeighbors
from scipy.spatial.distance import cosine


In [None]:
customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")

**Feature engineering and computing similarity**

In [None]:
# customer transaction history-aggregate
customer_transactions = transactions.groupby("CustomerID").agg({"TotalValue": "sum", "TransactionID": "count"}).reset_index()
customer_transactions.rename(columns={"TotalValue": "TotalSpend", "TransactionID": "TransactionCount"}, inplace=True)

customer_data = customers.merge(customer_transactions, on="CustomerID", how="left").fillna(0)
scaler = StandardScaler()
features = scaler.fit_transform(customer_data[["TotalSpend", "TransactionCount"]])

# Using Nearest Neighbors for similarity
nbrs = NearestNeighbors(n_neighbors=4, metric="cosine").fit(features)
distances, indices = nbrs.kneighbors(features)


# **Generating Recommendations**

In [None]:
lookalike_dict = {}

for i in range(len(customer_data)):
    customer_id = customer_data.iloc[i]["CustomerID"]

    similar_customers = [customer_data.iloc[j]["CustomerID"] for j in indices[i][1:4]]
    scores = [1 - distances[i][j] for j in range(1, min(4, len(indices[i])))]


    while len(similar_customers) < 3:
        similar_customers.append(None)
    while len(scores) < 3:
        scores.append(None)


    lookalike_dict[customer_id] = similar_customers + scores


lookalike_df = pd.DataFrame.from_dict(lookalike_dict, orient="index",
                                      columns=["Lookalike1", "Lookalike2", "Lookalike3", "Score1", "Score2", "Score3"])
lookalike_df.reset_index(inplace=True)
lookalike_df.rename(columns={"index": "CustomerID"}, inplace=True)

# Saving CSV file
lookalike_df.to_csv("Darshika_Pundir_Lookalike.csv", index=False)

print("CSV file saved successfully!")


CSV file saved successfully!
