In [14]:
import pandas as pd

# ===== Step 1: Create sales dataset =====
data = {
    'CustomerID': [101, 102, 101, 103, 104, 102],
    'InvoiceDate': ['2024-07-01', '2024-07-05', '2024-07-10',
                    '2024-06-15', '2024-06-20', '2024-07-15'],
    'Amount': [200, 150, 300, 250, 100, 200]
}

df = pd.DataFrame(data)

In [15]:
# Convert InvoiceDate from text to datetime
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])

In [16]:
# ===== Step 2: Set a "today" date for Recency calculation =====
today = df['InvoiceDate'].max() + pd.Timedelta(days=1)

In [17]:
# ===== Step 3: Calculate Recency, Frequency, Monetary =====
rfm = df.groupby('CustomerID').agg(
    Recency=('InvoiceDate', lambda x: (today - x.max()).days),  # Days since last purchase
    Frequency=('InvoiceDate', 'count'),                        # Number of purchases
    Monetary=('Amount', 'sum')                                 # Total amount spent
).reset_index()

In [18]:
# ===== Step 4: Assign customer segments =====
def get_segment(row):
    if row['Recency'] <= 7 and row['Frequency'] >= 2:
        return 'Champion'
    elif row['Recency'] <= 15:
        return 'Loyal Customer'
    else:
        return 'Needs Attention'

rfm['Segment'] = rfm.apply(get_segment, axis=1)

In [19]:
# ===== Step 5: Display final RFM table =====
print(rfm)

   CustomerID  Recency  Frequency  Monetary          Segment
0         101        6          2       500         Champion
1         102        1          2       350         Champion
2         103       31          1       250  Needs Attention
3         104       26          1       100  Needs Attention
