In [2]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler
import pandas as pd

# Example customer data (you need to use the actual customer_features from your analysis)
customer_features = pd.DataFrame({
    'CustomerID': ['C0001', 'C0002', 'C0003', 'C0004'],
    'TotalSpent': [500, 300, 700, 200],
    'TransactionCount': [5, 3, 7, 2]
})

# Normalize the features
scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(customer_features[['TotalSpent', 'TransactionCount']])

# Compute cosine similarity
similarity_matrix = cosine_similarity(scaled_features)

# Generate lookalikes for the first 20 customers (adjust if fewer customers exist)
lookalikes = {}
for idx, customer_id in enumerate(customer_features["CustomerID"]):
    sim_scores = list(enumerate(similarity_matrix[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:4]  # Get top 3 similar customers
    lookalikes[customer_id] = [(customer_features["CustomerID"][i], round(score, 2)) for i, score in sim_scores]

print("Lookalike Recommendations:", lookalikes)


Lookalike Recommendations: {'C0001': [('C0001', np.float64(1.0)), ('C0003', np.float64(1.0)), ('C0004', np.float64(0.0))], 'C0002': [('C0002', np.float64(1.0)), ('C0003', np.float64(1.0)), ('C0004', np.float64(0.0))], 'C0003': [('C0001', np.float64(1.0)), ('C0003', np.float64(1.0)), ('C0004', np.float64(0.0))], 'C0004': [('C0002', np.float64(0.0)), ('C0003', np.float64(0.0)), ('C0004', np.float64(0.0))]}


In [3]:
import csv

# Save Lookalike results to a CSV file
with open("Lookalike.csv", "w", newline="") as file:
    writer = csv.writer(file)
    writer.writerow(["CustomerID", "SimilarCustomers"])
    for key, value in lookalikes.items():
        writer.writerow([key, value])
print("Lookalike.csv has been created.")


Lookalike.csv has been created.
