In [1]:
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

In [2]:
customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")

In [3]:
merged_data = pd.merge(transactions, customers, on="CustomerID", how="left")
merged_data = pd.merge(merged_data, products, on="ProductID", how="left")

In [4]:
customer_features = merged_data.groupby('CustomerID').agg({
    'TotalValue': 'sum',  
    'ProductID': 'count',  
    'Category': lambda x: x.mode()[0],  
}).rename(columns={'TotalValue': 'TotalSpending', 'ProductID': 'TotalTransactions'})

In [5]:
customer_features = pd.get_dummies(customer_features, columns=['Category'])

In [6]:
scaler = MinMaxScaler()
numerical_cols = ['TotalSpending', 'TotalTransactions']
customer_features[numerical_cols] = scaler.fit_transform(customer_features[numerical_cols])

In [7]:
similarity_matrix = cosine_similarity(customer_features)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_features.index, columns=customer_features.index)

In [8]:
lookalike_map = {}
for customer_id in customers['CustomerID'].iloc[:20]:
    if customer_id in similarity_df.index:
        similar_customers = similarity_df[customer_id].sort_values(ascending=False).drop(customer_id).head(3)
        lookalike_map[customer_id] = list(zip(similar_customers.index, similar_customers.values))

In [9]:
lookalike_df = pd.DataFrame({
    "CustomerID": lookalike_map.keys(),
    "Lookalikes": [str(v) for v in lookalike_map.values()]
})
lookalike_df.to_csv("Potnuru_Sathvik_Lookalike.csv", index=False)
print("Potnuru_Sathvik_Lookalike.csv has been created.")

Potnuru_Sathvik_Lookalike.csv has been created.
