In [2]:
# Step 1: Define Customer-Level Questions

# Before code, ask:

# Who are our top customers?

# What is the average spend per customer?

# Do customers purchase once or repeatedly?

# Which customer segments are most valuable?

In [14]:
# Step 2: Load Data

import pandas as pd

dataset = pd.read_csv("Datasets/retail_sales_dataset.csv")

In [16]:
# Step 3: Create Customer-Level Table
# Aggregate transactions â†’ customers.

customer_dataset = (
    dataset.groupby('Customer ID')
      .agg(
          total_spend=('Total Amount', 'sum'),
          total_transactions=('Transaction ID', 'nunique'),
          avg_transaction_value=('Total Amount', 'mean')
      )
      .reset_index()
)

customer_dataset.head()

Unnamed: 0,Customer ID,total_spend,total_transactions,avg_transaction_value
0,CUST001,150,1,150.0
1,CUST002,1000,1,1000.0
2,CUST003,30,1,30.0
3,CUST004,500,1,500.0
4,CUST005,100,1,100.0


In [20]:
# Step 4: Identify High-Value Customers

top_customers = customer_dataset.sort_values(
    by='total_spend',
    ascending=False
).head(10)

top_customers

Unnamed: 0,Customer ID,total_spend,total_transactions,avg_transaction_value
487,CUST487,2000,1,2000.0
476,CUST476,2000,1,2000.0
773,CUST773,2000,1,2000.0
503,CUST503,2000,1,2000.0
92,CUST093,2000,1,2000.0
88,CUST089,2000,1,2000.0
946,CUST946,2000,1,2000.0
157,CUST157,2000,1,2000.0
155,CUST155,2000,1,2000.0
420,CUST420,2000,1,2000.0


In [22]:
# Step 5: Repeat vs One-Time Buyers

customer_dataset['buyer_type'] = customer_dataset['total_transactions'].apply(
    lambda x: 'Repeat Buyer' if x > 1 else 'One-Time Buyer'
)

customer_dataset['buyer_type'].value_counts(normalize=True)

buyer_type
One-Time Buyer    1.0
Name: proportion, dtype: float64

In [24]:
# Step 6: Revenue Contribution by Buyer Type

customer_revenue_split = (
    customer_dataset.groupby('buyer_type')['total_spend']
      .sum()
)

customer_revenue_split

buyer_type
One-Time Buyer    456000
Name: total_spend, dtype: int64

In [26]:
# Step 7: Merge Customer Data Back (Optional but Pro)

dataset = dataset.merge(customer_dataset, on='Customer ID', how='left')

In [28]:
dataset.head()

Unnamed: 0,Transaction ID,Date,Customer ID,Gender,Age,Product Category,Quantity,Price per Unit,Total Amount,total_spend,total_transactions,avg_transaction_value,buyer_type
0,1,2023-11-24,CUST001,Male,34,Beauty,3,50,150,150,1,150.0,One-Time Buyer
1,2,2023-02-27,CUST002,Female,26,Clothing,2,500,1000,1000,1,1000.0,One-Time Buyer
2,3,2023-01-13,CUST003,Male,50,Electronics,1,30,30,30,1,30.0,One-Time Buyer
3,4,2023-05-21,CUST004,Male,37,Clothing,1,500,500,500,1,500.0,One-Time Buyer
4,5,2023-05-06,CUST005,Male,30,Beauty,2,50,100,100,1,100.0,One-Time Buyer
