In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
import os

customers_path = os.path.join(os.getcwd(), 'Customers.csv')
products_path = os.path.join(os.getcwd(), 'Products.csv')
transactions_path = os.path.join(os.getcwd(), 'Transactions.csv')

customers = pd.read_csv(customers_path, dtype={'CustomerID': 'string', 'Region': 'string', 'CustomerName': 'string'})
products = pd.read_csv(products_path, dtype={'ProductID': 'string', 'Category': 'string', 'ProductName': 'string', 'Price': 'float'})
transactions = pd.read_csv(transactions_path, dtype={'TransactionID': 'string', 'CustomerID': 'string', 'ProductID': 'string', 'Quantity': 'int', 'TotalValue': 'float'}, parse_dates=['TransactionDate'])

merged_data = transactions.merge(customers, on="CustomerID", how="inner").merge(products, on="ProductID", how="inner")

customer_features = merged_data.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'Region': 'first',
    'ProductID': 'nunique'
}).rename(columns={
    'TotalValue': 'TotalSpending',
    'Quantity': 'TotalQuantity',
    'ProductID': 'UniqueProducts'
}).reset_index()

customer_features = pd.get_dummies(customer_features, columns=['Region'], drop_first=True)

scaler = StandardScaler()
numeric_features = ['TotalSpending', 'TotalQuantity', 'UniqueProducts']
customer_features[numeric_features] = scaler.fit_transform(customer_features[numeric_features])

similarity_matrix = cosine_similarity(customer_features.drop('CustomerID', axis=1))

similarity_df = pd.DataFrame(similarity_matrix, index=customer_features['CustomerID'], columns=customer_features['CustomerID'])

top_lookalikes = {}
for customer in customer_features['CustomerID'][:20]:
    similar_customers = similarity_df[customer].sort_values(ascending=False)[1:4]
    top_lookalikes[customer] = list(zip(similar_customers.index, similar_customers.values))

lookalike_results = []
for customer, lookalikes in top_lookalikes.items():
    for similar_customer, score in lookalikes:
        lookalike_results.append({'CustomerID': customer, 'SimilarCustomerID': similar_customer, 'SimilarityScore': score})

lookalike_df = pd.DataFrame(lookalike_results)
lookalike_df.to_csv('Lookalike.csv', index=False)

print("Lookalike model results saved to 'Lookalike.csv'")
