# Notebook 04: Promotion strategy sequencer


## Why this notebook ?

1. Assign discounts to each customer segment
2. Use research-validated discount percentages
3. Save results for financial impact calculation

We stick to the methodology used by Kobets and Yashyna on the discounts applied and the sequences chosen. This is a deliberated choice as we don't have enough data to calculate the supply elasticity (no price variation history to define its impact on customers)


In [7]:
## Step 1: Import Libraries

import pandas as pd
import os
from datetime import datetime

print("="*60)
print("NOTEBOOK 05: PROMOTION STRATEGY")
print("="*60)

NOTEBOOK 05: PROMOTION STRATEGY


In [8]:
#STEP 2 : Load rfm segments 

DATA_DIR = '../data/processed'
OUTPUT_DIR = '../data/processed'

print("\n[1/3] Loading data...")

rfm = pd.read_csv(f'{DATA_DIR}/rfm_customer_segments.csv')

print(f"  ✓ RFM segments: {len(rfm):,} customers")
print(f"\nSegment distribution:")
print(rfm['segment'].value_counts())


[1/3] Loading data...
  ✓ RFM segments: 206,209 customers

Segment distribution:
segment
Lost          50282
Premium       45125
Loyal         34570
Sleeping      26772
New           21555
Promising     13952
Frugal         7652
High_Check     6301
Name: count, dtype: int64


In [10]:
# STEP 4: APPLY DISCOUNT RULES (Kobets & Yashyna, 2025, Table 4)


print("\n[2/4] Applying discount rules...")

# Discount table from Kobets & Yashyna (2025), Table 4, p. 41
# These are EXACT values from the research paper - DO NOT MODIFY
discount_table = {
    'Promising': {1: 22.9, 2: 3.6, 3: 1.7},    # After 1st, 2nd, 3rd purchase
    'Frugal': {4: 6.4},                         # After 4th purchase
    'Loyal': {2: 1.6},                          # After 2nd purchase
    'Lost': {1: 15.0},                          # Win-back (1st reactivation)
    'Sleeping': {1: 10.0},                      # Reactivation (1st)
    'New': {2: 10.0},                           # 2nd purchase incentive
    'Premium': {},                              # No discount - experiential focus
    'High_Check': {}                            # No discount - value messaging
}

def calculate_personalized_discount(segment, purchase_sequence):
    """
    Calculate personalized discount per Kobets & Yashyna (2025), Table 4.
    
    Parameters:
    -----------
    segment : str - Customer segment name
    purchase_sequence : int - Which purchase this is for the customer
    
    Returns:
    --------
    float - Recommended discount percentage
    """
    segment_discounts = discount_table.get(segment, {})
    return segment_discounts.get(purchase_sequence, 0.0)

# Calculate purchase sequence for each customer
# This tells us which purchase number each customer is at
rfm['purchase_sequence'] = rfm.groupby('user_id')['num_orders'].rank(
    method='first'
).astype(int)

print(f"  ✓ Purchase sequence calculated")
print(f"    Range: {rfm['purchase_sequence'].min()} to {rfm['purchase_sequence'].max()}")

# Apply discounts
rfm['recommended_discount'] = rfm.apply(
    lambda row: calculate_personalized_discount(
        row['segment'], 
        row['purchase_sequence']
    ),
    axis=1
)

print(f"  ✓ Discounts assigned")
print(f"\n  Discount summary:")
print(f"    Customers with discounts: {(rfm['recommended_discount'] > 0).sum():,}")
print(f"    Average discount: {rfm['recommended_discount'].mean():.2f}%")


[2/4] Applying discount rules...
  ✓ Purchase sequence calculated
    Range: 1 to 1
  ✓ Discounts assigned

  Discount summary:
    Customers with discounts: 91,006
    Average discount: 6.51%


In [11]:

# STEP 5: SAVE RESULTS FOR FINANCIAL IMPACT CALCULATOR


print("\n[3/4] Saving results...")

# Save RFM with discounts (required by Notebook 06)
rfm.to_csv(f'{OUTPUT_DIR}/rfm_with_discounts.csv', index=False)
print(f"  ✓ rfm_with_discounts.csv saved")

# Save simple summary metadata
summary = {
    'notebook': '04_promotion_sequencer.ipynb',
    'date': datetime.now().strftime('%Y-%m-%d'),
    'total_customers': len(rfm),
    'customers_with_discounts': int((rfm['recommended_discount'] > 0).sum()),
    'average_discount': float(rfm['recommended_discount'].mean()),
    'discount_table': discount_table,
    'methodology': 'Kobets & Yashyna (2025), Table 4'
}

with open(f'{OUTPUT_DIR}/metadata_04.json', 'w') as f:
    json.dump(summary, f, indent=2)

print(f"  ✓ metadata_04.json saved")


[3/4] Saving results...
  ✓ rfm_with_discounts.csv saved
  ✓ metadata_04.json saved
