In [1]:
! pip install rfm

Collecting rfm
  Downloading rfm-1.0.9-py3-none-any.whl (7.8 kB)
Installing collected packages: rfm
Successfully installed rfm-1.0.9


In [2]:
import pandas as pd
import plotly.graph_objects as go
from rfm import RFM

In [3]:
df = pd.read_excel("/content/Online Retail.xlsx")

In [4]:
df.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 541909 entries, 0 to 541908
Data columns (total 8 columns):
 #   Column       Non-Null Count   Dtype         
---  ------       --------------   -----         
 0   InvoiceNo    541909 non-null  object        
 1   StockCode    541909 non-null  object        
 2   Description  540455 non-null  object        
 3   Quantity     541909 non-null  int64         
 4   InvoiceDate  541909 non-null  datetime64[ns]
 5   UnitPrice    541909 non-null  float64       
 6   CustomerID   406829 non-null  float64       
 7   Country      541909 non-null  object        
dtypes: datetime64[ns](1), float64(2), int64(1), object(4)
memory usage: 33.1+ MB


In [6]:
for column in df.columns:
    if df[column].dtype == 'object':
      df[column] = df[column].astype('category')

In [7]:
df['revenue'] = df['Quantity'] * df['UnitPrice']

In [8]:
df.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country,revenue
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom,15.3
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom,20.34
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom,22.0
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom,20.34
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom,20.34


# **RFM**

In [10]:
r = RFM(df,customer_id="CustomerID",transaction_date="InvoiceDate",amount="revenue")
rfm_dataframe = pd.DataFrame(r.rfm_table)

In [11]:
rfm_dataframe.head()

Unnamed: 0,CustomerID,recency,frequency,monetary_value,r,f,m,rfm_score,segment
0,15039,9,1483,19637.76,5,5,5,555,Champions
1,17735,1,690,13109.06,5,5,5,555,Champions
2,16549,9,925,4033.58,5,5,5,555,Champions
3,15218,10,166,5756.81,5,5,5,555,Champions
4,13767,1,399,16942.31,5,5,5,555,Champions


In [12]:
total_customers = len(rfm_dataframe)

freq_more_than_500 = len(rfm_dataframe[rfm_dataframe['frequency'] > 500])
freq_less_than_50 = len(rfm_dataframe[rfm_dataframe['frequency'] < 50])
freq_between_500_and_50 = len(rfm_dataframe[(rfm_dataframe['frequency'] > 50) & (rfm_dataframe['frequency'] < 500)])


percentage_more_than_500 = (freq_more_than_500 / total_customers) * 100
percentage_less_than_50 = (freq_less_than_50 / total_customers) * 100
percentage_between_500_and_50 = (freq_between_500_and_50 / total_customers) * 100

print("Customers with frequency more than 500:", freq_more_than_500, f"({percentage_more_than_500:.2f}%)")
print("Customers with frequency less than 50:", freq_less_than_50, f"({percentage_less_than_50:.2f}%)")
print("Customers with frequency between 500 and 50:", freq_between_500_and_50, f"({percentage_between_500_and_50:.2f}%)")


Customers with frequency more than 500: 81 (1.85%)
Customers with frequency less than 50: 2434 (55.67%)
Customers with frequency between 500 and 50: 1836 (41.99%)


In [26]:
filtered_data = rfm_dataframe[rfm_dataframe['frequency'] > 1]

fig = go.Figure()

fig.add_trace(go.Histogram(x=filtered_data['recency'], nbinsx=20,
                            marker_color='skyblue', opacity=0.7,
                            text=filtered_data['frequency'],
                            textposition="outside",
                            textfont_color='black'))

fig.update_layout(
    title='Distribution of Recency for Returning Customers (More than 1 Purchase)',
    xaxis_title='Recency',
    yaxis_title='Frequency',
    bargap=0.05,
    xaxis=dict(tickfont=dict(color='black'), linecolor='black'),
    yaxis=dict(tickfont=dict(color='black'), linecolor='black'),
    title_font=dict(color='black')
)

fig.show()

In [None]:
rfm_dataframe.to_csv('/content/RFM.csv', index=False)
