In [7]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

In [8]:
customers = pd.read_csv("Customers.csv")
transactions = pd.read_csv("Transactions.csv")
products = pd.read_csv("Products.csv")

In [9]:
transactions = transactions.merge(products, on="ProductID", how="left")

In [10]:
customer_transactions = (
    transactions.groupby("CustomerID")
    .agg(
        total_spent=("TotalValue", "sum"),
        avg_spent=("TotalValue", "mean"),
        total_quantity=("Quantity", "sum"),
        unique_products=("ProductID", "nunique"),
        most_frequent_category=("Category", lambda x: x.mode()[0]),
    )
    .reset_index()
)

In [11]:
customer_profiles = customers.merge(customer_transactions, on="CustomerID", how="left")

In [12]:
customer_profiles.fillna(
    {
        "total_spent": 0,
        "avg_spent": 0,
        "total_quantity": 0,
        "unique_products": 0,
        "most_frequent_category": "None",
    },
    inplace=True,
)

In [13]:
customer_profiles_encoded = pd.get_dummies(
    customer_profiles[["Region", "most_frequent_category"]], drop_first=True
)

In [14]:
features = pd.concat(
    [
        customer_profiles[["total_spent", "avg_spent", "total_quantity", "unique_products"]],
        customer_profiles_encoded,
    ],
    axis=1,
)

In [15]:
scaler = MinMaxScaler()
features_scaled = scaler.fit_transform(features)

In [16]:
similarity_matrix = cosine_similarity(features_scaled)

In [17]:
lookalike_results = {}
for i, customer_id in enumerate(customer_profiles["CustomerID"]):
    similarities = list(enumerate(similarity_matrix[i]))
    similarities = sorted(similarities, key=lambda x: x[1], reverse=True)
    top_3 = similarities[1:4]
    lookalike_results[customer_id] = [
        (customer_profiles.iloc[idx]["CustomerID"], score) for idx, score in top_3
    ]

In [18]:
output_data = {
    "CustomerID": [],
    "Lookalikes": [],
}
for customer_id in customer_profiles["CustomerID"][:20]:
    lookalikes = lookalike_results[customer_id]
    output_data["CustomerID"].append(customer_id)
    output_data["Lookalikes"].append(lookalikes)

In [19]:
lookalike_df = pd.DataFrame(output_data)
lookalike_df.to_csv("Koushik_Akula_Lookalike.csv", index=False)
print("Lookalike model completed successfully and saved as Lookalike.csv.")

Lookalike model completed successfully and saved as Lookalike.csv.
