In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings('ignore')
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')


merged = transactions.merge(products, on='ProductID').merge(customers, on='CustomerID')


customer_features = merged.groupby('CustomerID').agg({
    'Price': 'sum',              # Total expenditure
    'Quantity': 'sum',           # Total quantity purchased
    'ProductID': 'nunique'       # Number of unique products purchased
}).reset_index()


scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_features.iloc[:, 1:])


similarity_matrix = cosine_similarity(scaled_features)


lookalike_map = {}
customer_ids = customer_features['CustomerID'].tolist()

for idx, customer_id in enumerate(customer_ids[:20]):
   
    similarity_scores = list(enumerate(similarity_matrix[idx]))
    
   
    similar_customers = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    similar_customers = [(customer_ids[i], score) for i, score in similar_customers if customer_ids[i] != customer_id]
    
   
    lookalike_map[customer_id] = similar_customers[:3]


lookalike_df = pd.DataFrame({
    'CustomerID': lookalike_map.keys(),
    'Lookalikes': [', '.join([f"({cust}, {score:.2f})" for cust, score in lookalikes]) for lookalikes in lookalike_map.values()]
})
lookalike_df.to_csv('Lookalike.csv', index=False)

print("Lookalike.csv has been generated successfully.")