In [2]:
# Goal

# Learn how to:
# Define a funnel
# Measure drop-offs at each step
# Find the biggest leakage point
# Dataset: Retail Transactions

In [4]:
# What is a Funnel? (Plain English)
# A funnel is a sequence of steps users go through.
# Example (Retail):
# Customer visits
# Customer buys 1 item
# Customer buys multiple items
# Customer becomes repeat buyer
# At each step, some users drop out.

In [6]:
# Step 1: Define Funnel Steps (VERY IMPORTANT)

# For our dataset, we’ll define a simple funnel:
# 1️ All Customers
# 2️ Customers with ≥1 transaction
# 3️ Customers with Quantity > 1 (bigger purchase)
# 4️ Repeat Customers (more than 1 transaction)

In [8]:
import pandas as pd

df = pd.read_csv('Datasets/retail_sales_dataset.csv')

In [10]:
# Step 2: Calculate Funnel Counts
# count 1: Total customers
total_customers = df['Customer ID'].nunique()

In [12]:
# Count 2: Customers who purchased (≥1 transaction)
buyers = df[df['Quantity'] >= 1]['Customer ID'].nunique()

In [14]:
# Count 3: Customers with Quantity > 1
multi_item_buyers = df[df['Quantity'] > 1]['Customer ID'].nunique()

In [18]:
# Count 4: Repeat buyers
repeat_buyers = (
    df.groupby('Customer ID')['Transaction ID']
      .nunique()
      .gt(1)
      .sum()
)

In [20]:
# Step 3: Build Funnel Table
funnel = pd.DataFrame({
    'Stage': [
        'All Customers',
        'At Least One Purchase',
        'Multi-Item Purchase',
        'Repeat Customers'
    ],
    'Users': [
        total_customers,
        buyers,
        multi_item_buyers,
        repeat_buyers
    ]
})

funnel

Unnamed: 0,Stage,Users
0,All Customers,1000
1,At Least One Purchase,1000
2,Multi-Item Purchase,747
3,Repeat Customers,0


In [22]:
# Step 4: Conversion Rates
funnel['Conversion %'] = (
    funnel['Users'] / funnel['Users'].iloc[0] * 100
)

funnel

Unnamed: 0,Stage,Users,Conversion %
0,All Customers,1000,100.0
1,At Least One Purchase,1000,100.0
2,Multi-Item Purchase,747,74.7
3,Repeat Customers,0,0.0


In [24]:
# Step 5: Identify Drop-Offs (THIS IS THE POINT)
funnel['Drop-off %'] = (
    funnel['Conversion %'].shift(1) - funnel['Conversion %']
)

funnel

# The largest drop-off = biggest problem.

Unnamed: 0,Stage,Users,Conversion %,Drop-off %
0,All Customers,1000,100.0,
1,At Least One Purchase,1000,100.0,0.0
2,Multi-Item Purchase,747,74.7,25.3
3,Repeat Customers,0,0.0,74.7
