In [20]:
import pandas as pd
df = pd.read_csv('Data/uci_online_retail_cleaned.csv')
df = df.iloc[: , 1:]
df.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country,ProductVariations
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom,2
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom,1
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom,1
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom,2
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom,2


## Segmenting customers with RFM
Before we dive into Customer Lifetime Value, let’s take a look at the foundational elements of analyzing customer value: **recency, frequency, and monetary value (RFM).**


- **Recency:** the last time that a customer made a purchase.

A customer who has made a purchase recently is more likely to make a repeat purchase than a customer who hasn’t made a purchase in a long time.


- **Frequency:** how many times a customer has made a purchase within a given time frame.

A customer who makes purchases often is more likely to continue to come back than a customer who rarely makes purchases.


- **Monetary value:** the amount of money a customer has spent within that same time frame.

A customer who makes larger purchases is more likely to return than a customer who spends less.

https://www.shopify.com/blog/customer-lifetime-value
https://www.investopedia.com/terms/r/rfm-recency-frequency-monetary-value.asp

In [21]:
def recency(df):
    name = 'RECENCY'
    if name not in df.columns:
        temp1 = df.copy()
        from pandas.tseries.offsets import MonthEnd
        c = 'InvoiceDate'
        
        temp1[name] = pd.to_datetime((pd.to_datetime(temp1[c], format="%Y %m") + MonthEnd(0)).astype('str').str[:-8] + '23:59:59') - pd.to_datetime(temp1[c])
        temp2 = temp1[['CustomerID', name]].groupby(by='CustomerID', as_index=False).min()
        
        df = pd.merge(df, temp2, left_on='CustomerID', right_on='CustomerID', how='left')
        return df
    else:
        print('already calculated recency (in ' + name + ' column)')
        return df

In [22]:
def frequency(df):
    name = 'FREQUENCY'
    if name not in df.columns:
        temp1 = df.copy()
        temp1[name] = temp1['InvoiceDate']
        temp2 = temp1[['CustomerID', name]].groupby(by='CustomerID', as_index=False).count()
        
        df = pd.merge(df, temp2, left_on='CustomerID', right_on='CustomerID', how='left')
        return df
    else:
        print('already calculated frequency (in ' + name + ' column)')
        return df

In [23]:
def money(df):
    name = 'MONEY'
    if name not in df.columns:
        temp1 = df[['UnitPrice', 'Quantity', 'CustomerID']].copy()
        temp1[name] = temp1['UnitPrice']*temp1['Quantity']
        temp2 = temp1[['CustomerID', name]].groupby(by='CustomerID', as_index=False).sum()
        
        df = pd.merge(df, temp2, left_on='CustomerID', right_on='CustomerID', how='left')
        return df
    else:
        print('already calculated frequency (in ' + name + ' column)')
        return df

In [24]:
df = money(frequency(recency(df)))
df

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country,ProductVariations,RECENCY,FREQUENCY,MONEY
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom,2,29 days 08:32:59,297,5391.21
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom,1,29 days 08:32:59,297,5391.21
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom,1,29 days 08:32:59,297,5391.21
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom,2,29 days 08:32:59,297,5391.21
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom,2,29 days 08:32:59,297,5391.21
...,...,...,...,...,...,...,...,...,...,...,...,...
394266,548150,84978,HANGING HEART JAR T-LIGHT HOLDER,35,2011-03-29 12:55:00,1.06,17315.0,United Kingdom,1,0 days 08:10:59,470,6237.05
394267,550828,84978,HANGING HEART JAR T-LIGHT HOLDER,5,2011-04-21 10:19:00,1.25,17315.0,United Kingdom,1,0 days 08:10:59,470,6237.05
394268,552537,84978,HANGING HEART JAR T-LIGHT HOLDER,35,2011-05-10 10:11:00,1.06,17315.0,United Kingdom,1,0 days 08:10:59,470,6237.05
394269,566624,84978,HANGING HEART JAR T-LIGHT HOLDER,11,2011-09-14 09:36:00,1.25,17315.0,United Kingdom,1,0 days 08:10:59,470,6237.05


In [25]:
import numpy as np
df['NEW RECENCY'] = df['RECENCY'] / np.timedelta64(1, 's')
df

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country,ProductVariations,RECENCY,FREQUENCY,MONEY,NEW RECENCY
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom,2,29 days 08:32:59,297,5391.21,2536379.0
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom,1,29 days 08:32:59,297,5391.21,2536379.0
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom,1,29 days 08:32:59,297,5391.21,2536379.0
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom,2,29 days 08:32:59,297,5391.21,2536379.0
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom,2,29 days 08:32:59,297,5391.21,2536379.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
394266,548150,84978,HANGING HEART JAR T-LIGHT HOLDER,35,2011-03-29 12:55:00,1.06,17315.0,United Kingdom,1,0 days 08:10:59,470,6237.05,29459.0
394267,550828,84978,HANGING HEART JAR T-LIGHT HOLDER,5,2011-04-21 10:19:00,1.25,17315.0,United Kingdom,1,0 days 08:10:59,470,6237.05,29459.0
394268,552537,84978,HANGING HEART JAR T-LIGHT HOLDER,35,2011-05-10 10:11:00,1.06,17315.0,United Kingdom,1,0 days 08:10:59,470,6237.05,29459.0
394269,566624,84978,HANGING HEART JAR T-LIGHT HOLDER,11,2011-09-14 09:36:00,1.25,17315.0,United Kingdom,1,0 days 08:10:59,470,6237.05,29459.0


## Customer lifetime value formula
To calculate the customer lifetime value for each of your customer segments, you’ll need to track down three key pieces of data within your pre-established timeframe: **average order value, purchase frequency, and customer value.**

- **Average order value:** average amount of money a customer spends every time they place an order, i.e. take total revenue and divide it by the total number of orders.

- **Purchase frequency:** average amount of orders placed by each customer, i.e. divide total number of orders by total number of unique customers. The result will be your purchase frequency.

- **Customer lifetime value:** average monetary value each customer brings to your business during a time frame, i.e. multiply your average order value by your purchase frequency.

https://stacktome.com/blog/diving-deeper-into-customer-segmentation-rfm-and-clv
https://www.shopify.com/blog/customer-lifetime-value

In [26]:
def getOrdVal(df):
    temp = df.copy()
    temp['total'] = df['UnitPrice']*df['Quantity']
    totalRev = sum(temp['total'])
    totalOrd = len(df['InvoiceDate'])
    return totalRev/totalOrd

In [27]:
getOrdVal(df)

21.221299406948052

In [28]:
def getPurFre(df):
    totalOrd = len(df['InvoiceDate'])
    totalCus = len(df['CustomerID'].unique())
    return totalOrd/totalCus

In [29]:
getPurFre(df)

91.22420175844516

In [30]:
def getCLV(df):
    return getOrdVal(df)*getPurFre(df)

In [31]:
getCLV(df)

1935.8960986758018

In [32]:
df.to_csv('Data/uci_online_retail_cleaned_CLV.csv')

## Ways to Predict CLV

- historic approach

- regression approach
    
https://www.analyticsvidhya.com/blog/2020/10/a-definitive-guide-for-predicting-customer-lifetime-value-clv/