# Lookalike Model

In [None]:

import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

# Load datasets
customers = pd.read_csv('Customers.csv')
transactions = pd.read_csv('Transactions.csv')

# Aggregate transaction data
customer_transactions = transactions.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum'
}).reset_index()

# Merge with customer profile
customer_profile = pd.merge(customers, customer_transactions, on='CustomerID', how='left').fillna(0)
customer_profile_encoded = pd.get_dummies(customer_profile[['Region']], drop_first=True)

# Standardize features
features = pd.concat([customer_profile[['TotalValue', 'Quantity']], customer_profile_encoded], axis=1)
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# Cosine similarity
similarity_matrix = cosine_similarity(features_scaled)

# Generate recommendations
customer_ids = customer_profile['CustomerID'].values
lookalike_results = {}
for i in range(20):
    similarities = similarity_matrix[i]
    similar_indices = similarities.argsort()[-4:-1][::-1]
    similar_customers = [(customer_ids[idx], round(similarities[idx], 4)) for idx in similar_indices]
    lookalike_results[customer_ids[i]] = similar_customers

# Save to CSV
lookalike_df = pd.DataFrame([{'cust_id': k, 'lookalikes': v} for k, v in lookalike_results.items()])
lookalike_df.to_csv('Lookalike.csv', index=False)
