-
Notifications
You must be signed in to change notification settings - Fork 371
/
gamma_gamma_fitter.py
299 lines (250 loc) · 9.94 KB
/
gamma_gamma_fitter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
# -*- coding: utf-8 -*-
"""Gamma-Gamma Model."""
from __future__ import print_function
from __future__ import division
import warnings
import pandas as pd
from autograd import numpy as np
from pandas import DataFrame
from autograd.scipy.special import gammaln
from . import BaseFitter
from ..utils import _check_inputs, _customer_lifetime_value
class GammaGammaFitter(BaseFitter):
"""
Fitter for the gamma-gamma model.
It is used to estimate the average monetary value of customer transactions.
This implementation is based on the Excel spreadsheet found in [3]_.
More details on the derivation and evaluation can be found in [4]_.
Parameters
----------
penalizer_coef: float
The coefficient applied to an l2 norm on the parameters
Attributes
----------
penalizer_coef: float
The coefficient applied to an l2 norm on the parameters
params_: :obj: OrderedDict
The fitted parameters of the model
data: :obj: DataFrame
A DataFrame with the columns given in the call to `fit`
References
----------
.. [3] http://www.brucehardie.com/notes/025/
The Gamma-Gamma Model of Monetary Value.
.. [4] Peter S. Fader, Bruce G. S. Hardie, and Ka Lok Lee (2005),
"RFM and CLV: Using iso-value curves for customer base analysis",
Journal of Marketing Research, 42 (November), 415-430.
Attributes
-----------
penalizer_coef: float
The coefficient applied to an l2 norm on the parameters
params_: :obj: Series
The fitted parameters of the model
data: :obj: DataFrame
A DataFrame with the values given in the call to `fit`
variance_matrix_: :obj: DataFrame
A DataFrame with the variance matrix of the parameters.
confidence_intervals_: :obj: DataFrame
A DataFrame 95% confidence intervals of the parameters
standard_errors_: :obj: Series
A Series with the standard errors of the parameters
summary: :obj: DataFrame
A DataFrame containing information about the fitted parameters
"""
def __init__(
self,
penalizer_coef=0.0
):
"""
Initialization, set penalizer_coef.
"""
self.penalizer_coef = penalizer_coef
@staticmethod
def _negative_log_likelihood(
log_params,
frequency,
avg_monetary_value,
weights,
penalizer_coef
):
"""
Computes the Negative Log-Likelihood for the Gamma-Gamma Model as in:
http://www.brucehardie.com/notes/025/
This also applies a penalizer to the log-likelihood.
Equivalent to equation (1a).
Hardie's implementation of this method can be seen on page 8.
"""
warnings.simplefilter(action="ignore", category=FutureWarning)
params = np.exp(log_params)
p, q, v = params
x = frequency
m = avg_monetary_value
negative_log_likelihood_values = (
gammaln(p * x + q)
- gammaln(p * x)
- gammaln(q)
+ q * np.log(v)
+ (p * x - 1) * np.log(m)
+ (p * x) * np.log(x)
- (p * x + q) * np.log(x * m + v)
) * weights
penalizer_term = penalizer_coef * sum(params ** 2)
return -negative_log_likelihood_values.sum() / weights.sum() + penalizer_term
def conditional_expected_average_profit(
self,
frequency=None,
monetary_value=None
):
"""
Conditional expectation of the average profit.
This method computes the conditional expectation of the average profit
per transaction for a group of one or more customers.
Equation (5) from:
http://www.brucehardie.com/notes/025/
Parameters
----------
frequency: array_like, optional
a vector containing the customers' frequencies.
Defaults to the whole set of frequencies used for fitting the model.
monetary_value: array_like, optional
a vector containing the customers' monetary values.
Defaults to the whole set of monetary values used for
fitting the model.
Returns
-------
array_like:
The conditional expectation of the average profit per transaction
"""
if monetary_value is None:
monetary_value = self.data["monetary_value"]
if frequency is None:
frequency = self.data["frequency"]
p, q, v = self._unload_params("p", "q", "v")
# The expected average profit is a weighted average of individual
# monetary value and the population mean.
individual_weight = p * frequency / (p * frequency + q - 1)
population_mean = v * p / (q - 1)
return (1 - individual_weight) * population_mean + individual_weight * monetary_value
def fit(
self,
frequency,
monetary_value,
weights=None,
initial_params=None,
verbose=False,
tol=1e-7,
index=None,
q_constraint=False,
**kwargs
):
"""
Fit the data to the Gamma/Gamma model.
Parameters
----------
frequency: array_like
the frequency vector of customers' purchases
(denoted x in literature).
monetary_value: array_like
the monetary value vector of customer's purchases
(denoted m in literature).
weights: None or array_like
Number of customers with given frequency/monetary_value,
defaults to 1 if not specified. Fader and
Hardie condense the individual RFM matrix into all
observed combinations of frequency/monetary_value. This
parameter represents the count of customers with a given
purchase pattern. Instead of calculating individual
loglikelihood, the loglikelihood is calculated for each
pattern and multiplied by the number of customers with
that pattern.
initial_params: array_like, optional
set the initial parameters for the fitter.
verbose : bool, optional
set to true to print out convergence diagnostics.
tol : float, optional
tolerance for termination of the function minimization process.
index: array_like, optional
index for resulted DataFrame which is accessible via self.data
q_constraint: bool, optional
when q < 1, population mean will result in a negative value
leading to negative CLV outputs. If True, we penalize negative values of q to avoid this issue.
kwargs:
key word arguments to pass to the scipy.optimize.minimize
function as options dict
Returns
-------
GammaGammaFitter
fitted and with parameters estimated
"""
_check_inputs(frequency, monetary_value=monetary_value)
frequency = np.asarray(frequency).astype(float)
monetary_value = np.asarray(monetary_value).astype(float)
if weights is None:
weights = np.ones_like(frequency, dtype=int)
else:
weights = np.asarray(weights)
log_params, self._negative_log_likelihood_, self._hessian_ = self._fit(
(frequency, monetary_value, weights, self.penalizer_coef),
initial_params,
3,
verbose,
tol=tol,
bounds=((None, None), (0, None), (None, None)) if q_constraint else None,
**kwargs
)
self.data = DataFrame(
{"monetary_value": monetary_value, "frequency": frequency, "weights": weights}, index=index
)
self.params_ = pd.Series(np.exp(log_params), index=["p", "q", "v"])
self.variance_matrix_ = self._compute_variance_matrix()
self.standard_errors_ = self._compute_standard_errors()
self.confidence_intervals_ = self._compute_confidence_intervals()
return self
def customer_lifetime_value(
self,
transaction_prediction_model,
frequency,
recency,
T,
monetary_value,
time=12,
discount_rate=0.01,
freq="D"
):
"""
Return customer lifetime value.
This method computes the average lifetime value for a group of one
or more customers.
Parameters
----------
transaction_prediction_model: model
the model to predict future transactions, literature uses
pareto/ndb models but we can also use a different model like beta-geo models
frequency: array_like
the frequency vector of customers' purchases
(denoted x in literature).
recency: the recency vector of customers' purchases
(denoted t_x in literature).
T: array_like
customers' age (time units since first purchase)
monetary_value: array_like
the monetary value vector of customer's purchases
(denoted m in literature).
time: float, optional
the lifetime expected for the user in months. Default: 12
discount_rate: float, optional
the monthly adjusted discount rate. Default: 0.01
freq: string, optional
{"D", "H", "M", "W"} for day, hour, month, week. This represents what unit of time your T is measure in.
Returns
-------
Series:
Series object with customer ids as index and the estimated customer
lifetime values as values
"""
frequency, recency, T, monetary_value = np.asarray(frequency), np.asarray(recency), np.asarray(T), np.asarray(monetary_value)
# use the Gamma-Gamma estimates for the monetary_values
adjusted_monetary_value = self.conditional_expected_average_profit(frequency, monetary_value)
return _customer_lifetime_value(
transaction_prediction_model, frequency, recency, T, adjusted_monetary_value, time, discount_rate, freq=freq
)