Import the required Libraries and Load the Datasets

In [18]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

Convert TransactionDate and SignUp into Datetime Format

In [4]:
customers['SignupDate'] = pd.to_datetime(customers['SignupDate'])
transactions['TransactionDate'] = pd.to_datetime(transactions['TransactionDate'])

Merge transactions with product information

In [5]:
transactions_products = transactions.merge(products, on='ProductID')

Aggregate transaction data per customer

In [6]:
customer_transactions = transactions_products.groupby('CustomerID').agg(
    TotalSpent=('TotalValue', 'sum'),
    TotalQuantity=('Quantity', 'sum'),
    TransactionCount=('TransactionID', 'count'),
    **{f"Category_{cat}": ('TotalValue', lambda x: x.sum()) for cat in products['Category'].unique()}
).reset_index()

Merge with customer profile data

In [7]:
data = pd.merge(customers, customer_transactions, on='CustomerID', how='inner')

Encode categorical data (e.g., Region)

In [8]:
data = pd.get_dummies(data, columns=['Region'], drop_first=True)

Standardize numerical features

In [10]:
features = [
    'TotalSpent', 'TotalQuantity', 'TransactionCount',
    *[col for col in data.columns if col.startswith('Category_')]
]
scaler = StandardScaler()
data[features] = scaler.fit_transform(data[features])

Compute similarity matrix using cosine similarity

In [11]:
similarity_matrix = cosine_similarity(data[features])

Generate top 3 lookalikes for each customer

In [29]:
lookalikes = {}
for i in range(min(20, len(data))):
    customer_id = data.iloc[i]['CustomerID']
    similarities = list(enumerate(similarity_matrix[i]))
    similarities = sorted(similarities, key=lambda x: x[1], reverse=True)
    top_3 = similarities[1:4]
    lookalikes[customer_id] = [(data.iloc[j]['CustomerID'], score) for j, score in top_3]

Prepare the output file

In [30]:
lookalike_df = pd.DataFrame(lookalikes).T.reset_index()
lookalike_df.columns = ['CustomerID', 'Recommended_1', 'Recommended_2', 'Recommended_3']

Save the results to CSV

In [32]:
lookalike_df.to_csv('Lookalike.csv', index=False)

Download the CSV file

In [35]:
from google.colab import files
files.download('Lookalike.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>