In [31]:
# Import necessary libraries
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
import csv

In [33]:
# Load datasets
customers = pd.read_csv("/content/Customers.csv")
transactions = pd.read_csv("/content/Transactions.csv")

In [34]:
# Merge customer and transaction data
customer_transactions = pd.merge(customers, transactions, on='CustomerID')

In [35]:
# Create customer profiles
customer_profile = customer_transactions.groupby('CustomerID').agg({
    'TotalValue': 'sum',  # Total spending
    'TransactionID': 'count',  # Number of transactions
    'ProductID': lambda x: x.mode()[0]  # Favorite product category
}).reset_index()

In [36]:
# Normalize features for similarity calculation
scaler = StandardScaler()
customer_profile_scaled = scaler.fit_transform(customer_profile[['TotalValue', 'TransactionID']])

In [37]:
# Calculate similarity matrix
similarity_matrix = cosine_similarity(customer_profile_scaled)

In [38]:
# Get top 3 lookalikes for the first 20 customers
lookalike_map = {}
for i in range(20):
    customer_id = customer_profile.iloc[i]['CustomerID']
    similarities = similarity_matrix[i]
    top_3_indices = similarities.argsort()[-4:-1][::-1]  # Exclude self
    top_3_customers = customer_profile.iloc[top_3_indices]['CustomerID'].tolist()
    top_3_scores = similarities[top_3_indices].tolist()
    lookalike_map[customer_id] = list(zip(top_3_customers, top_3_scores))

In [39]:
# Save to CSV
with open('Lookalike.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['CustomerID', 'LookalikeID', 'SimilarityScore'])
    for cust_id, lookalikes in lookalike_map.items():
        for lookalike_id, score in lookalikes:
            writer.writerow([cust_id, lookalike_id, score])

print("Lookalike model results saved to 'Lookalike.csv'.")

Lookalike model results saved to 'Lookalike.csv'.
