In [2]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity


customers = pd.read_csv('/content/Customers.csv')
products = pd.read_csv('/content/Products.csv')
transactions = pd.read_csv('/content/Transactions.csv')


data = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

In [4]:
# Feature engineering
customer_features = data.groupby('CustomerID').agg({
    'TotalValue': ['sum', 'mean'],
    'TransactionID': 'count',
    'Category': lambda x: x.mode()[0],  # Most common category
}).reset_index()
customer_features.columns = ['CustomerID', 'TotalSpending', 'AvgSpending', 'TransactionCount', 'PreferredCategory']

In [5]:
# Normalize features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_features[['TotalSpending', 'AvgSpending', 'TransactionCount']])

In [6]:
# Add one-hot encoding for categorical features
categories = pd.get_dummies(customer_features['PreferredCategory'], prefix='Category')
features = pd.concat([pd.DataFrame(scaled_features), categories], axis=1)

# Calculate similarity
similarity_matrix = cosine_similarity(features)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_features['CustomerID'], columns=customer_features['CustomerID'])


In [7]:
# Generate recommendations
lookalike_dict = {}
for customer in customers['CustomerID'][:20]:
    similar_customers = similarity_df[customer].nlargest(4).iloc[1:]  # Exclude self
    lookalike_dict[customer] = list(zip(similar_customers.index, similar_customers.values))

In [8]:
# Save to CSV
lookalike_df = pd.DataFrame({'CustomerID': list(lookalike_dict.keys()),
                             'Lookalikes': [str(val) for val in lookalike_dict.values()]})
lookalike_df.to_csv('/content/Chetnya_Sharma_Lookalike.csv', index=False)
