In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

# Load datasets
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

In [2]:
# Merge datasets
data = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

In [3]:
# Aggregate transaction data per customer
customer_features = data.groupby('CustomerID').agg({
    'TotalValue': 'sum',  # Total spending
    'Quantity': 'sum',     # Total quantity purchased
    'ProductID': 'nunique', # Unique products purchased
    'Category': lambda x: x.mode()[0]  # Most purchased category
}).reset_index()

In [4]:
# Encode categorical variables
customer_features = pd.get_dummies(customer_features, columns=['Category'], drop_first=True)



In [5]:

# Standardize features
scaler = StandardScaler()
customer_features_scaled = scaler.fit_transform(customer_features.drop(columns=['CustomerID']))



In [6]:
# Compute similarity matrix
similarity_matrix = cosine_similarity(customer_features_scaled)


In [7]:
# Create lookalike recommendations
lookalike_dict = {}
customer_ids = customer_features['CustomerID'].values

for idx, customer_id in enumerate(customer_ids[:20]):  # First 20 customers
    similar_indices = similarity_matrix[idx].argsort()[::-1][1:4]  # Top 3 similar
    lookalike_dict[customer_id] = [(customer_ids[i], similarity_matrix[idx][i]) for i in similar_indices]



In [8]:


# Convert to DataFrame and save as CSV
lookalike_df = pd.DataFrame.from_dict(lookalike_dict, orient='index')
lookalike_df.to_csv('Lookalike.csv', header=False)