In [0]:
!pip install lifetimes

In [0]:
####################PACKAGES#######################
import pandas as pd
import numpy as np
from datetime import datetime
from lifetimes.utils import summary_data_from_transaction_data
from lifetimes import ParetoNBDFitter,  GammaGammaFitter
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

In [0]:
# Read data into pandas dataframe
data = pd.read_csv("clvdata1.csv",sep=',')
data.head(3)

Unnamed: 0,created_at,mandate_id,monetary
0,2016-12-29,890e1a010bc34c97bba55bffcf94d3e6,1.8
1,2016-05-27,80a4749041b043bb9adec2ba9feeb3ab,9.9
2,2016-12-29,38190c91f77b4227a935e97584561dce,37.8


In [0]:
def get_features(data):
#data: dataframe with timestamp, customer id, and amount of payment

    data=data.dropna(axis=1,how='all')
    # Define required variables
    data = data[['created_at','mandate_id','monetary']]
    # pre process date
    data['created_at'] = pd.to_datetime(data.created_at, format = '%Y-%m-%d')
    data['created_at'] = pd.to_datetime(data.created_at.dt.date)
    # Apply lifetimes packages
    data = summary_data_from_transaction_data(data,
                                             'mandate_id', 
                                             'created_at',
                                             monetary_value_col = 'monetary',
                                             freq='W') # Customers in this vertical are likely to do transaction on weekly basis
    # Only check mandate id that is already activated
    data = data.loc[data.frequency > 0, :]
    data = data.loc[data.monetary_value > 0, :]
    return data


In [0]:
data = get_features(data)
data.head(5)

Unnamed: 0_level_0,frequency,recency,T,monetary_value
mandate_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
002dba7356b64f7d835a866eabc22ed3,6.0,27.0,44.0,1329.0
019c07f9629d499db8d6c896945f1a72,8.0,35.0,35.0,26.875
02069745312447e8800281239d96184b,13.0,35.0,35.0,115.261538
0277061e66504c26bb500de41c75dd6f,4.0,18.0,27.0,24.28
02af8475dc9c4fc698335bd3c71d86d8,8.0,29.0,29.0,168.19625


In [0]:
def predicted_transactions(data,t, t_unit):
  """
  The model is estimated with a recency-frequency matrix with n transaction opportunities.
  t: number of predicted period, used in the name unit with frequency of transactions
  """ 
  model = ParetoNBDFitter()
  model.fit(data['frequency'], 
              data['recency'], 
              data['T'])
  data['predicted_transactions'] = model.conditional_expected_number_of_purchases_up_to_time(t,
                                                                                               data['frequency'], 
                                                                                               data['recency'], 
                                                                                               data['T'])
  data.sort_values(by='predicted_transactions', ascending=False)
  print("expected number of transaction in " + str(t) +" "+ t_unit)
  return data

In [0]:
predicted_transactions(data,t=8,t_unit="week")

Unnamed: 0_level_0,frequency,recency,T,monetary_value,predicted_transactions
mandate_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
002dba7356b64f7d835a866eabc22ed3,6.0,27.0,44.0,1329.000000,1.051368
019c07f9629d499db8d6c896945f1a72,8.0,35.0,35.0,26.875000,1.787917
02069745312447e8800281239d96184b,13.0,35.0,35.0,115.261538,1.814474
0277061e66504c26bb500de41c75dd6f,4.0,18.0,27.0,24.280000,1.617440
02af8475dc9c4fc698335bd3c71d86d8,8.0,29.0,29.0,168.196250,1.795148
...,...,...,...,...,...
feb5fb097b8e4ccea6cb2612ff52ee2f,6.0,27.0,30.0,49.166667,1.758375
febc30e9b8e24b95bb5c50a323cbc799,3.0,10.0,35.0,931.853333,0.332807
ff378dfb3b0040b78b82ef20807e2671,5.0,22.0,23.0,18.000000,1.779805
ff667479d44342fc8f0e56cd3f0be16c,6.0,25.0,25.0,36.560000,1.789308


In [0]:
def expected_average_profit(data):
  ggf = GammaGammaFitter(penalizer_coef = 0)
  ggf.fit(data['frequency'],
          data['monetary_value'])
  data = ggf.conditional_expected_average_profit(data['frequency'],
                                        data['monetary_value'])
  data = pd.DataFrame(data)
  return data

In [0]:
expected_average_profit(data)

Unnamed: 0_level_0,0
mandate_id,Unnamed: 1_level_1
002dba7356b64f7d835a866eabc22ed3,1334.463266
019c07f9629d499db8d6c896945f1a72,29.743362
02069745312447e8800281239d96184b,117.077313
0277061e66504c26bb500de41c75dd6f,30.017240
02af8475dc9c4fc698335bd3c71d86d8,171.197867
...,...
feb5fb097b8e4ccea6cb2612ff52ee2f,53.020386
febc30e9b8e24b95bb5c50a323cbc799,941.793445
ff378dfb3b0040b78b82ef20807e2671,22.578580
ff667479d44342fc8f0e56cd3f0be16c,40.397865
