# Customer Lifetime Value With Buy Till You Die Models

In [5]:
# Import Important Libraries
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

In [6]:
#Import and preprocess dataset
df = pd.read_csv("uk_dataset_full.csv")
df.columns = df.columns.str.replace(' ', '')
df.rename(columns={"Price":"UnitPrice"},inplace=True)
df.head(5)

Unnamed: 0,Unnamed:0,Invoice,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,0,489434,85048,15CM CHRISTMAS GLASS BALL 20 LIGHTS,12,2009-12-01 07:45:00,6.95,13085.0,United Kingdom
1,1,489434,79323P,PINK CHERRY LIGHTS,12,2009-12-01 07:45:00,6.75,13085.0,United Kingdom
2,2,489434,79323W,WHITE CHERRY LIGHTS,12,2009-12-01 07:45:00,6.75,13085.0,United Kingdom
3,3,489434,22041,"RECORD FRAME 7"" SINGLE SIZE",48,2009-12-01 07:45:00,2.1,13085.0,United Kingdom
4,4,489434,21232,STRAWBERRY CERAMIC TRINKET BOX,24,2009-12-01 07:45:00,1.25,13085.0,United Kingdom


In [11]:
#Remove return orders and cancelled orders
# Handling Return Orders
# Extracting rows where InvoiceNo starts with "C"
data_raw = df.copy()
cancelled_orders = data_raw[data_raw['Invoice'].astype(str).str.startswith("C")]

# Create a temporary DataFrame with the columns we want to match on, and also negate the 'Quantity' column
cancelled_orders['Quantity'] = -cancelled_orders['Quantity']

# Merge the original DataFrame with the temporary DataFrame on the columns we want to match
merged_data = pd.merge(data_raw, cancelled_orders[['CustomerID', 'StockCode', 'Quantity', 'UnitPrice']], 
                       on=['CustomerID', 'StockCode', 'Quantity', 'UnitPrice'], 
                       how='left', indicator=True)

# Filter out rows where the merge found a match, and also filter out the original return orders
data_raw = merged_data[(merged_data['_merge'] == 'left_only') & (~merged_data['Invoice'].astype(str).str.startswith("C"))]

# Drop the indicator column
data_raw = data_raw.drop(columns=['_merge'])

# Selecting relevant features and calculating total sales
features = ['CustomerID', 'Invoice', 'InvoiceDate', 'StockCode','Quantity', 'UnitPrice', 'Country']
data = data_raw[features]
data['TotalSales'] = data['Quantity'].multiply(data['UnitPrice'])

# Removing transactions with missing customer IDs as they don't contribute to individual customer behavior
data = data[data['CustomerID'].notna()]
data['CustomerID'] = data['CustomerID'].astype(int).astype(str)
data.head()

Unnamed: 0,CustomerID,Invoice,InvoiceDate,StockCode,Quantity,UnitPrice,Country,TotalSales
0,13085,489434,2009-12-01 07:45:00,85048,12,6.95,United Kingdom,83.4
1,13085,489434,2009-12-01 07:45:00,79323P,12,6.75,United Kingdom,81.0
2,13085,489434,2009-12-01 07:45:00,79323W,12,6.75,United Kingdom,81.0
3,13085,489434,2009-12-01 07:45:00,22041,48,2.1,United Kingdom,100.8
4,13085,489434,2009-12-01 07:45:00,21232,24,1.25,United Kingdom,30.0


In [12]:
df2 = data[["Invoice","StockCode","Quantity","InvoiceDate","UnitPrice","CustomerID"]]
df2.head()


Unnamed: 0,Invoice,StockCode,Quantity,InvoiceDate,UnitPrice,CustomerID
0,489434,85048,12,2009-12-01 07:45:00,6.95,13085
1,489434,79323P,12,2009-12-01 07:45:00,6.75,13085
2,489434,79323W,12,2009-12-01 07:45:00,6.75,13085
3,489434,22041,48,2009-12-01 07:45:00,2.1,13085
4,489434,21232,24,2009-12-01 07:45:00,1.25,13085


In [13]:
df.head()

Unnamed: 0,Unnamed:0,Invoice,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,0,489434,85048,15CM CHRISTMAS GLASS BALL 20 LIGHTS,12,2009-12-01 07:45:00,6.95,13085.0,United Kingdom
1,1,489434,79323P,PINK CHERRY LIGHTS,12,2009-12-01 07:45:00,6.75,13085.0,United Kingdom
2,2,489434,79323W,WHITE CHERRY LIGHTS,12,2009-12-01 07:45:00,6.75,13085.0,United Kingdom
3,3,489434,22041,"RECORD FRAME 7"" SINGLE SIZE",48,2009-12-01 07:45:00,2.1,13085.0,United Kingdom
4,4,489434,21232,STRAWBERRY CERAMIC TRINKET BOX,24,2009-12-01 07:45:00,1.25,13085.0,United Kingdom


In [14]:
df2["TotalAmount"] = df2["UnitPrice"].multiply(df2["Quantity"])
df2 = df2[["Invoice","StockCode","InvoiceDate","TotalAmount","CustomerID"]]
df2.head(5)

Unnamed: 0,Invoice,StockCode,InvoiceDate,TotalAmount,CustomerID
0,489434,85048,2009-12-01 07:45:00,83.4,13085
1,489434,79323P,2009-12-01 07:45:00,81.0,13085
2,489434,79323W,2009-12-01 07:45:00,81.0,13085
3,489434,22041,2009-12-01 07:45:00,100.8,13085
4,489434,21232,2009-12-01 07:45:00,30.0,13085


In [15]:
from lifetimes import GammaGammaFitter
from lifetimes import BetaGeoFitter
from lifetimes.plotting import plot_frequency_recency_matrix
from lifetimes.utils import summary_data_from_transaction_data

# create summary data from transaction data
summary1 = summary_data_from_transaction_data(df2, 
                                             customer_id_col = 'CustomerID', 
                                             datetime_col = 'InvoiceDate', 
                                             monetary_value_col='TotalAmount',
                                             observation_period_end = max(df["InvoiceDate"]))

summary1 = summary1[summary1["monetary_value"] > 0]

# fit the BG/NBD model
bgf1 = BetaGeoFitter(penalizer_coef=0.0)
bgf1.fit(summary1['frequency'], summary1['recency'], summary1['T'])

# fit the Gamma-Gamma submodel
ggf1 = GammaGammaFitter(penalizer_coef=0.0)
ggf1.fit(summary1['frequency'], summary1['monetary_value'])


# predict customer lifetime value
summary1['predicted_purchases'] = bgf1.predict(30, summary1['frequency'], summary1['recency'], summary1['T'])
summary1['predicted_clv'] = ggf1.customer_lifetime_value(bgf1, 
                                                       summary1['frequency'],
                                                       summary1['recency'],
                                                       summary1['T'],
                                                       summary1['monetary_value'],
                                                       time=1, #the lifetime expected for the user in months
                                                       freq='D', 
                                                       discount_rate=0.01)
# Calculate the probability that a customer is alive
summary1['probability_alive'] = bgf1.conditional_probability_alive(
    summary1['frequency'], 
    summary1['recency'], 
    summary1['T']
)
summary1["estimated_monetary_value"] = ggf1.conditional_expected_average_profit(
        summary1['frequency'],
        summary1['monetary_value']
    )
summary1.head()

Unnamed: 0_level_0,frequency,recency,T,monetary_value,predicted_purchases,predicted_clv,probability_alive,estimated_monetary_value
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
12346,7.0,400.0,725.0,11066.637143,0.151909,1431.931083,0.493155,9520.529482
12347,7.0,402.0,404.0,717.398571,0.494234,329.199287,0.98498,672.740683
12348,4.0,363.0,438.0,449.31,0.288289,125.652064,0.945105,440.212758
12349,3.0,571.0,589.0,1120.056667,0.188502,171.430749,0.963455,918.531323
12352,8.0,356.0,392.0,338.26125,0.559256,192.407218,0.974185,347.481829


In [None]:
# Results Interpretation:
# predicted_purchases: The expected number of future purchases in a specified period (e.g., next 30 days).
# predicted_clv: The estimated lifetime value of a customer over a specified period (e.g., 1 month).
# probability_alive: The likelihood that the customer is still active and will make future purchases.
# estimated_monetary_value: The expected average profit from each transaction for the customer.

In [None]:
# Provided Metrics:
# Customer ID: 15117\n
# Frequency: 5.0
# Recency: 620.0
# Age (T): 634.0
# Monetary Value: 424.926
# Probability Alive: 0.977284
# Predicted Purchases: 0.2585
# Predicted CLV: 107.585076
# Estimated Monetary Value: 420.352535
# Interpretation:
# Customer ID 15117:
# 
# The unique identifier for the customer in the dataset.
# Frequency (5.0):
# 
# This customer has made 5 purchases during the observation period.
# Recency (620.0):
# 
# The number of days since the customer's last purchase. Given that the age of the customer is 634.0 days, it indicates that this customer made their last purchase relatively recently compared to their entire history with the business.
# Age (T) (634.0):
# 
# The number of days since the customer's first purchase.
# Monetary Value (424.926):
# 
# The average transaction value for this customer is approximately $424.93.
# Probability Alive (0.977284):
# 
# There is a 97.73% probability that this customer is still active and likely to make future purchases. This high probability suggests that the customer is very engaged and loyal.
# Predicted Purchases (0.2585):
# 
# The customer is expected to make approximately 0.2585 purchases in the next 30 days. This value, while less than 1, still indicates some likelihood of future purchases within the next month.
# Predicted CLV (107.585076):
# 
# The expected customer lifetime value over the next month is approximately $107.59. This metric takes into account the frequency of purchases, recency, and monetary value, providing an estimate of the revenue this customer is expected to generate in the near future.
# Estimated Monetary Value (420.352535):
# 
# The expected average profit per transaction for this customer is approximately $420.35. This indicates that when the customer makes a purchase, it is usually of high value.