In [1]:
import pandas as pd
import numpy as np

In [2]:
df_RFM=pd.read_csv("RFM_retail.csv")
df_RFM.head()

Unnamed: 0,Customer ID,Duration,Freq,Tot_Rev,Avg_Rev
0,12346,347 days 13:43:00,48,155177.6,3232.866667
1,12347,61 days 11:35:00,242,5408.5,22.349174
2,12348,97 days 10:47:00,51,2019.4,39.596078
3,12349,40 days 14:09:00,180,4452.84,24.738
4,12350,332 days 07:59:00,17,334.4,19.670588


In [3]:
#Converting Duration to timedelta data type to be able to make deciles
df_RFM.Duration=pd.to_timedelta(df_RFM.Duration)
df_RFM.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5924 entries, 0 to 5923
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype          
---  ------       --------------  -----          
 0   Customer ID  5924 non-null   int64          
 1   Duration     5924 non-null   timedelta64[ns]
 2   Freq         5924 non-null   int64          
 3   Tot_Rev      5924 non-null   float64        
 4   Avg_Rev      5924 non-null   float64        
dtypes: float64(2), int64(2), timedelta64[ns](1)
memory usage: 231.5 KB


### Task 3:

In [4]:
# Creating frequency deciles using pd.qcut
df_RFM['Frequency_Deciles'] = pd.qcut(df_RFM['Freq'], q=10)
df_RFM.head()

Unnamed: 0,Customer ID,Duration,Freq,Tot_Rev,Avg_Rev,Frequency_Deciles
0,12346,347 days 13:43:00,48,155177.6,3232.866667,"(37.0, 53.0]"
1,12347,61 days 11:35:00,242,5408.5,22.349174,"(180.0, 319.7]"
2,12348,97 days 10:47:00,51,2019.4,39.596078,"(37.0, 53.0]"
3,12349,40 days 14:09:00,180,4452.84,24.738,"(114.0, 180.0]"
4,12350,332 days 07:59:00,17,334.4,19.670588,"(16.0, 25.0]"


In [5]:
# Calculating total revenue for each frequency decile
df_freq_rev = df_RFM.groupby('Frequency_Deciles')['Tot_Rev'].sum().reset_index()
df_freq_rev.head()

Unnamed: 0,Frequency_Deciles,Tot_Rev
0,"(0.999, 8.0]",404327.92
1,"(8.0, 16.0]",323577.17
2,"(16.0, 25.0]",290291.051
3,"(25.0, 37.0]",435255.361
4,"(37.0, 53.0]",846488.273


In [6]:
# Calculating cumulative total revenue
df_freq_rev['cumsum'] = df_freq_rev['Tot_Rev'].cumsum()
df_freq_rev.head()

Unnamed: 0,Frequency_Deciles,Tot_Rev,cumsum
0,"(0.999, 8.0]",404327.92,404327.92
1,"(8.0, 16.0]",323577.17,727905.09
2,"(16.0, 25.0]",290291.051,1018196.141
3,"(25.0, 37.0]",435255.361,1453451.502
4,"(37.0, 53.0]",846488.273,2299939.775


In [7]:
# Calculating the total revenue across all deciles
df_freq_rev['Total'] =df_freq_rev['Tot_Rev'].sum()
df_freq_rev.head()

Unnamed: 0,Frequency_Deciles,Tot_Rev,cumsum,Total
0,"(0.999, 8.0]",404327.92,404327.92,18245770.0
1,"(8.0, 16.0]",323577.17,727905.09,18245770.0
2,"(16.0, 25.0]",290291.051,1018196.141,18245770.0
3,"(25.0, 37.0]",435255.361,1453451.502,18245770.0
4,"(37.0, 53.0]",846488.273,2299939.775,18245770.0


In [8]:
# Calculating the percentage of cumulative revenue each decile represents
df_freq_rev['Perc_Tot_Rev'] = (df_freq_rev['cumsum'] / df_freq_rev['Total'][0])
df_freq_rev

Unnamed: 0,Frequency_Deciles,Tot_Rev,cumsum,Total,Perc_Tot_Rev
0,"(0.999, 8.0]",404327.92,404327.9,18245770.0,0.02216
1,"(8.0, 16.0]",323577.17,727905.1,18245770.0,0.039894
2,"(16.0, 25.0]",290291.051,1018196.0,18245770.0,0.055805
3,"(25.0, 37.0]",435255.361,1453452.0,18245770.0,0.07966
4,"(37.0, 53.0]",846488.273,2299940.0,18245770.0,0.126053
5,"(53.0, 76.0]",840613.7,3140553.0,18245770.0,0.172125
6,"(76.0, 114.0]",1134640.972,4275194.0,18245770.0,0.234312
7,"(114.0, 180.0]",1763356.203,6038551.0,18245770.0,0.330956
8,"(180.0, 319.7]",3008457.952,9047009.0,18245770.0,0.495842
9,"(319.7, 12780.0]",9198758.276,18245770.0,18245770.0,1.0


In [9]:
# Finding the range of frequency where ~65% of the revenue is realized
df_freq_rev[(df_freq_rev['Perc_Tot_Rev']*100 >=45) & (df_freq_rev['Perc_Tot_Rev']*100<=85)]

Unnamed: 0,Frequency_Deciles,Tot_Rev,cumsum,Total,Perc_Tot_Rev
8,"(180.0, 319.7]",3008457.952,9047008.602,18245770.0,0.495842


#### In one frequency decile the percentage of revenue realized is a little close to 65. But in fact, there is no decile in which the revenue percentage is very close to 65%. The closest is 49.5%