In [1]:
import pandas as pd
import numpy as np

In [2]:
df_RFM=pd.read_csv("RFM_retail.csv")
df_RFM.head()

Unnamed: 0,Customer ID,Duration,Freq,Tot_Rev,Avg_Rev
0,12346,347 days 13:43:00,48,155177.6,3232.866667
1,12347,61 days 11:35:00,242,5408.5,22.349174
2,12348,97 days 10:47:00,51,2019.4,39.596078
3,12349,40 days 14:09:00,180,4452.84,24.738
4,12350,332 days 07:59:00,17,334.4,19.670588


In [3]:
#Converting Duration to timedelta data type to be able to make deciles
df_RFM.Duration=pd.to_timedelta(df_RFM.Duration)
df_RFM.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5924 entries, 0 to 5923
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype          
---  ------       --------------  -----          
 0   Customer ID  5924 non-null   int64          
 1   Duration     5924 non-null   timedelta64[ns]
 2   Freq         5924 non-null   int64          
 3   Tot_Rev      5924 non-null   float64        
 4   Avg_Rev      5924 non-null   float64        
dtypes: float64(2), int64(2), timedelta64[ns](1)
memory usage: 231.5 KB


### Task 2:

In [4]:
# Creating recency deciles using pd.qcut
df_RFM['Recency_Deciles'] = pd.qcut(df_RFM['Duration'], q=10)

In [5]:
df_RFM.head()

Unnamed: 0,Customer ID,Duration,Freq,Tot_Rev,Avg_Rev,Recency_Deciles
0,12346,347 days 13:43:00,48,155177.6,3232.866667,"(341 days 12:02:06, 436 days 09:43:48]"
1,12347,61 days 11:35:00,242,5408.5,22.349174,"(60 days 11:07:24.000000001, 85 days 13:16:36]"
2,12348,97 days 10:47:00,51,2019.4,39.596078,"(85 days 13:16:36, 129 days 07:56:30]"
3,12349,40 days 14:09:00,180,4452.84,24.738,"(34 days 11:39:18, 45 days 09:56:12]"
4,12350,332 days 07:59:00,17,334.4,19.670588,"(220 days 10:59:00.000000004, 341 days 12:02:06]"


In [6]:
# Calculating total revenue for each decile
vintage = df_RFM.groupby('Recency_Deciles')['Tot_Rev'].sum().reset_index()
vintage

Unnamed: 0,Recency_Deciles,Tot_Rev
0,"(27 days 10:44:59.999999999, 34 days 11:39:18]",6053845.434
1,"(34 days 11:39:18, 45 days 09:56:12]",3344295.921
2,"(45 days 09:56:12, 60 days 11:07:24.000000001]",2272404.261
3,"(60 days 11:07:24.000000001, 85 days 13:16:36]",1627891.291
4,"(85 days 13:16:36, 129 days 07:56:30]",1243729.153
5,"(129 days 07:56:30, 220 days 10:59:00.000000004]",1143205.594
6,"(220 days 10:59:00.000000004, 341 days 12:02:06]",843832.521
7,"(341 days 12:02:06, 436 days 09:43:48]",735116.181
8,"(436 days 09:43:48, 564 days 10:45:00]",597776.63
9,"(564 days 10:45:00, 760 days 14:05:00]",383669.892


In [7]:
# Calculating cumulative total revenue
vintage['cumsum_rev'] = vintage['Tot_Rev'].cumsum()
vintage

Unnamed: 0,Recency_Deciles,Tot_Rev,cumsum_rev
0,"(27 days 10:44:59.999999999, 34 days 11:39:18]",6053845.434,6053845.0
1,"(34 days 11:39:18, 45 days 09:56:12]",3344295.921,9398141.0
2,"(45 days 09:56:12, 60 days 11:07:24.000000001]",2272404.261,11670550.0
3,"(60 days 11:07:24.000000001, 85 days 13:16:36]",1627891.291,13298440.0
4,"(85 days 13:16:36, 129 days 07:56:30]",1243729.153,14542170.0
5,"(129 days 07:56:30, 220 days 10:59:00.000000004]",1143205.594,15685370.0
6,"(220 days 10:59:00.000000004, 341 days 12:02:06]",843832.521,16529200.0
7,"(341 days 12:02:06, 436 days 09:43:48]",735116.181,17264320.0
8,"(436 days 09:43:48, 564 days 10:45:00]",597776.63,17862100.0
9,"(564 days 10:45:00, 760 days 14:05:00]",383669.892,18245770.0


In [8]:
# Calculating the total revenue across all deciles
vintage['total_rev_across_deciles'] = vintage['Tot_Rev'].sum()

In [9]:
# Calculating the percentage of cumulative revenue each decile represents
vintage['Perc_Tot_Rev'] = (vintage['cumsum_rev'] / vintage['total_rev_across_deciles'][0])
vintage

Unnamed: 0,Recency_Deciles,Tot_Rev,cumsum_rev,total_rev_across_deciles,Perc_Tot_Rev
0,"(27 days 10:44:59.999999999, 34 days 11:39:18]",6053845.434,6053845.0,18245770.0,0.331795
1,"(34 days 11:39:18, 45 days 09:56:12]",3344295.921,9398141.0,18245770.0,0.515086
2,"(45 days 09:56:12, 60 days 11:07:24.000000001]",2272404.261,11670550.0,18245770.0,0.63963
3,"(60 days 11:07:24.000000001, 85 days 13:16:36]",1627891.291,13298440.0,18245770.0,0.728851
4,"(85 days 13:16:36, 129 days 07:56:30]",1243729.153,14542170.0,18245770.0,0.797016
5,"(129 days 07:56:30, 220 days 10:59:00.000000004]",1143205.594,15685370.0,18245770.0,0.859672
6,"(220 days 10:59:00.000000004, 341 days 12:02:06]",843832.521,16529200.0,18245770.0,0.90592
7,"(341 days 12:02:06, 436 days 09:43:48]",735116.181,17264320.0,18245770.0,0.94621
8,"(436 days 09:43:48, 564 days 10:45:00]",597776.63,17862100.0,18245770.0,0.978972
9,"(564 days 10:45:00, 760 days 14:05:00]",383669.892,18245770.0,18245770.0,1.0


In [10]:
# Finding the recency deciles where the company is realizing around 75% of the total revenue
vintage[(vintage['Perc_Tot_Rev']*100 >=70) & (vintage['Perc_Tot_Rev']*100<=80)]

Unnamed: 0,Recency_Deciles,Tot_Rev,cumsum_rev,total_rev_across_deciles,Perc_Tot_Rev
3,"(60 days 11:07:24.000000001, 85 days 13:16:36]",1627891.291,13298440.0,18245770.0,0.728851
4,"(85 days 13:16:36, 129 days 07:56:30]",1243729.153,14542170.0,18245770.0,0.797016
