In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
customers = pd.read_csv('/content/Customers.csv')
products = pd.read_csv('/content/Products.csv')
transactions = pd.read_csv('/content/Transactions.csv')

merged_data = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

customer_features = merged_data.groupby('CustomerID').agg({
    'TransactionID': 'count',
    'TotalValue': 'mean',
    'ProductID': lambda x: len(set(x))
}).reset_index()

customer_features.columns = ['CustomerID', 'TotalTransactions', 'AvgTransactionValue', 'UniqueProducts']
scaler = StandardScaler()
features_scaled = scaler.fit_transform(customer_features[['TotalTransactions', 'AvgTransactionValue', 'UniqueProducts']])

similarity_matrix = cosine_similarity(features_scaled)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_features['CustomerID'], columns=customer_features['CustomerID'])
lookalike_results = {}
for customer in customer_features['CustomerID'][:20]:
    similar_customers = similarity_df[customer].nlargest(4).iloc[1:]
    lookalike_results[customer] = [(similar_customer, score) for similar_customer, score in zip(similar_customers.index, similar_customers.values)]

lookalike_data = []
for cust_id, lookalikes in lookalike_results.items():
    for lookalike, score in lookalikes:
        lookalike_data.append({'CustomerID': cust_id, 'LookalikeID': lookalike, 'SimilarityScore': score})

lookalike_df = pd.DataFrame(lookalike_data)

lookalike_df.to_csv('Lookalike.csv', index=False)