In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

transactions = pd.read_csv("Transactions.csv")
customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")

In [None]:
data = transactions.merge(customers, on="CustomerID").merge(products, on="ProductID")
data.head()

Unnamed: 0,TransactionID,CustomerID,ProductID,TransactionDate,Quantity,TotalValue,Price_x,CustomerName,Region,SignupDate,ProductName,Category,Price_y
0,T00001,C0199,P067,2024-08-25 12:38:23,1,300.68,300.68,Andrea Jenkins,Europe,2022-12-03,ComfortLiving Bluetooth Speaker,Electronics,300.68
1,T00112,C0146,P067,2024-05-27 22:23:54,1,300.68,300.68,Brittany Harvey,Asia,2024-09-04,ComfortLiving Bluetooth Speaker,Electronics,300.68
2,T00166,C0127,P067,2024-04-25 07:38:55,1,300.68,300.68,Kathryn Stevens,Europe,2024-04-04,ComfortLiving Bluetooth Speaker,Electronics,300.68
3,T00272,C0087,P067,2024-03-26 22:55:37,2,601.36,300.68,Travis Campbell,South America,2024-04-11,ComfortLiving Bluetooth Speaker,Electronics,300.68
4,T00363,C0070,P067,2024-03-21 15:10:10,3,902.04,300.68,Timothy Perez,Europe,2022-03-15,ComfortLiving Bluetooth Speaker,Electronics,300.68


In [None]:
customer_features = data.groupby('CustomerID').agg({'TotalValue': 'sum', 'Quantity': 'sum',}).reset_index()

customer_ids = customer_features['CustomerID']
features_only = customer_features.drop('CustomerID', axis=1)

In [None]:
similarity_matrix = cosine_similarity(features_only)

In [None]:
lookalike_results = {}

for i in range(20):
    similarities = list(enumerate(similarity_matrix[i]))
    sorted_similarities = sorted(similarities, key=lambda x: x[1], reverse=True)[1:4]
    lookalike_results[customer_ids.iloc[i]] = [
        (customer_ids.iloc[j], round(score, 4)) for j, score in sorted_similarities
    ]

In [None]:
lookalike_df = pd.DataFrame({
    "CustomerID": list(lookalike_results.keys()),
    "TopLookalikes": [str(v) for v in lookalike_results.values()]
})
lookalike_df.to_csv("Lookalike.csv", index=False)

print("Lookalike model completed. Results saved to 'Lookalike.csv'.")


Lookalike model completed. Results saved to 'Lookalike.csv'.
