In [9]:
import pandas as pd
import datetime as dt
df_cleaned = pd.read_csv('cleaned_online_retail.csv')
df_cleaned['InvoiceDate'] = pd.to_datetime(df_cleaned['InvoiceDate'])
print(df_cleaned.columns)
pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)
# reference date for "Recency" calculation
reference_date = df_cleaned['InvoiceDate'].max() + pd.Timedelta(days=1)
print("Reference Date for Recency:", reference_date)

# 2. caluclation of  RFM metrics
rfm = df_cleaned.groupby('Customer ID').agg({
    'InvoiceDate': lambda x: (reference_date - x.max()).days,  # Recency
    'Invoice': 'nunique',                                      # Frequency
    'TotalPrice': 'sum'                                        # Monetary
})

# Rename columns
rfm.columns = ['Recency', 'Frequency', 'Monetary']
rfm = rfm.reset_index()
print(rfm.head())

# 3. RFM Segmentation - Score each metric from 1 to 4 using quantiles function
rfm['R_score'] = pd.qcut(rfm['Recency'], 4, labels=[4, 3, 2, 1])
rfm['F_score'] = pd.qcut(rfm['Frequency'].rank(method="first"), 4, labels=[1, 2, 3, 4])
rfm['M_score'] = pd.qcut(rfm['Monetary'], 4, labels=[1, 2, 3, 4])

# 4. Combine scores into RFM Segment
rfm['RFM_Segment'] = rfm['R_score'].astype(str) + rfm['F_score'].astype(str) + rfm['M_score'].astype(str)

# 5. Calculate RFM Score
rfm['RFM_Score'] = rfm[['R_score', 'F_score', 'M_score']].astype(int).sum(axis=1)

# Display top RFM customers
print("\nTop 10 Customers by RFM Score:")
print(rfm.sort_values(by='RFM_Score', ascending=False).head(10))


Index(['Invoice', 'StockCode', 'Description', 'Quantity', 'InvoiceDate',
       'Price', 'Customer ID', 'Country', 'TotalPrice'],
      dtype='object')
Reference Date for Recency: 2010-12-10 20:01:00
   Customer ID  Recency  Frequency  Monetary
0      12346.0      165         11    372.86
1      12347.0        3          2   1323.32
2      12348.0       74          1    222.16
3      12349.0       43          3   2671.14
4      12351.0       11          1    300.93

Top 10 Customers by RFM Score:
      Customer ID  Recency  Frequency  Monetary R_score F_score M_score RFM_Segment  RFM_Score
1687      14709.0       15         12   3965.29       4       4       4         444         12
118       12523.0       10         12   2582.38       4       4       4         444         12
2892      16353.0        5         16   3323.75       4       4       4         444         12
2583      15950.0       11          6   5415.20       4       4       4         444         12
2585      15953.0      