In [3]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

customers = pd.read_csv("Customers.csv")
transactions = pd.read_csv("Transactions.csv")

customer_transactions = transactions.groupby("CustomerID").agg({
    "TotalValue": "sum",
    "Quantity": "sum"
}).reset_index()

customer_data = customers.merge(customer_transactions, on="CustomerID", how="left").fillna(0)

customer_data = pd.get_dummies(customer_data, columns=["Region"], drop_first=True)

scaler = StandardScaler()
numeric_cols = ["TotalValue", "Quantity"]
customer_data[numeric_cols] = scaler.fit_transform(customer_data[numeric_cols])

similarity_matrix = cosine_similarity(customer_data[numeric_cols])
similarity_df = pd.DataFrame(similarity_matrix, index=customer_data["CustomerID"], columns=customer_data["CustomerID"])

lookalike_results = []
for cust_id in customers["CustomerID"].head(20):
    similar_customers = similarity_df[cust_id].nlargest(4).iloc[1:]
    lookalike_results.append([cust_id] + list(sum(zip(similar_customers.index, similar_customers.values), ())))

columns = ["CustomerID", "Lookalike_1", "Score_1", "Lookalike_2", "Score_2", "Lookalike_3", "Score_3"]
lookalike_df = pd.DataFrame(lookalike_results, columns=columns)
lookalike_df.to_csv("Lookalike.csv", index=False)
