In [1]:
pip install pandas numpy scikit-learn


Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler


In [3]:
customers = pd.read_csv(r'C:\Users\HP\Downloads\Customers.csv')
products = pd.read_csv(r'C:\Users\HP\Downloads\Products.csv')
transactions = pd.read_csv(r'C:\Users\HP\Downloads\Transactions.csv')

In [7]:
customer_transactions = pd.merge(transactions, customers, on='CustomerID', how='left')
customer_transactions = pd.merge(customer_transactions, products, on='ProductID', how='left')


In [8]:
customer_summary = customer_transactions.groupby('CustomerID').agg(
    total_spent=('TotalValue', 'sum'),
    avg_spent=('TotalValue', 'mean'),
    purchase_frequency=('TransactionID', 'count'),
    last_purchase_date=('TransactionDate', 'max')
)


In [9]:
customer_summary.fillna(0, inplace=True)


In [10]:
customer_summary['recency'] = (pd.to_datetime('today') - pd.to_datetime(customer_summary['last_purchase_date'])).dt.days


In [12]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

customer_features = customer_summary[['total_spent', 'avg_spent', 'purchase_frequency', 'recency']]
customer_features_scaled = scaler.fit_transform(customer_features)


In [13]:
from sklearn.metrics.pairwise import cosine_similarity

similarity_matrix = cosine_similarity(customer_features_scaled)


In [14]:
similarity_df = pd.DataFrame(similarity_matrix, index=customer_summary.index, columns=customer_summary.index)


In [15]:
lookalike_map = {}

for customer_id in customer_summary.index[:20]:
    similar_customers = similarity_df[customer_id].sort_values(ascending=False).iloc[1:4]  # Exclude the customer itself
    lookalike_map[customer_id] = [(cust_id, score) for cust_id, score in zip(similar_customers.index, similar_customers.values)]


In [17]:
# Flatten the lookalike map into a list of rows
flattened_data = []
for customer_id, lookalikes in lookalike_map.items():
    for lookalike in lookalikes:
        flattened_data.append([customer_id, lookalike[0], lookalike[1]])

# Create a DataFrame
lookalike_df = pd.DataFrame(flattened_data, columns=['CustomerID', 'LookalikeCustomerID', 'SimilarityScore'])

# Save the result to CSV
lookalike_df.to_csv('Lookalike.csv', index=False)
