# Bayesian Models

## BG-NBD (Beta Geometric Negative Binomial Distribution)

https://juanitorduz.github.io/bg_nbd_pymc/

predict future purchasing patterns, which can then serve as an input into “lifetime value” calculations, in the “non-contractual” setting (i.e., where the opportunities for transactions are continuous and the time at which customers become inactive is unobserved).

In [1]:
import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from lifetimes.datasets import load_cdnow_summary
from lifetimes import BetaGeoFitter
import pymc3 as pm
from scipy.special import expit
import theano.tensor as tt

plt.style.use("bmh")
plt.rcParams["figure.figsize"] = [10, 6]
plt.rcParams["figure.dpi"] = 100

%load_ext autoreload
%autoreload 2
%config InlineBackend.figure_format = "svg"

In [2]:
data_df = load_cdnow_summary(index_col=[0])
data_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2357 entries, 1 to 2357
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   frequency  2357 non-null   int64  
 1   recency    2357 non-null   float64
 2   T          2357 non-null   float64
dtypes: float64(2), int64(1)
memory usage: 73.7 KB


In [3]:
data_df.head(5)

Unnamed: 0_level_0,frequency,recency,T
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,2,30.43,38.86
2,1,1.71,38.86
3,0,0.0,38.86
4,0,0.0,38.86
5,0,0.0,38.86


In [4]:
n = data_df.shape[0]
x = data_df["frequency"].to_numpy()
t_x = data_df["recency"].to_numpy()
T = data_df["T"].to_numpy()

# convenient indicator function
int_vec = np.vectorize(int)
x_zero = int_vec(x > 0)

**Model Explaination**

<img src="bg_nbd_model_description.png" alt="drawing" width="400"/>

In [5]:
# fit BG/NBD model
bgf = BetaGeoFitter()
bgf.fit(frequency=x, recency=t_x, T=T)

<lifetimes.BetaGeoFitter: fitted with 2357 subjects, a: 0.79, alpha: 4.41, b: 2.43, r: 0.24>

In [6]:
bgf.summary

Unnamed: 0,coef,se(coef),lower 95% bound,upper 95% bound
r,0.242593,0.012557,0.217981,0.267205
alpha,4.413532,0.378221,3.672218,5.154846
a,0.792886,0.185719,0.428877,1.156895
b,2.425752,0.705345,1.043276,3.808229


### Full Baysian Model

<img src="full_bayesian_bg-nbd.png" alt="drawing" width="500"/>

In [7]:
with pm.Model() as model_full:

    # hyper priors for the Gamma params    
    a = pm.HalfNormal(name="a", sigma=10)
    b = pm.HalfNormal(name="b", sigma=10)

    # hyper priors for the Beta params
    alpha = pm.HalfNormal(name="alpha", sigma=10)
    r = pm.HalfNormal(name="r", sigma=10)

    lam = pm.Gamma(name="lam", alpha=r, beta=alpha, shape=n)
    p = pm.Beta(name="p", alpha=a, beta=b, shape=n)

    def logp(x, t_x, T, x_zero):
        log_term_a = x * tt.log(1 - p) + x * tt.log(lam) - t_x * lam
        term_b_1 = -lam * (T - t_x)
        term_b_2 = tt.log(p) - tt.log(1 - p)
        log_term_b = pm.math.switch(x_zero, pm.math.logaddexp(term_b_1, term_b_2), term_b_1)

        return tt.sum(log_term_a) + tt.sum(log_term_b)

    likelihood = pm.DensityDist(
        name="likelihood",
        logp=logp,
        observed = {"x":x, "t_x":t_x, "T":T, "x_zero": x_zero}
    )

In [8]:
# We now can run the sampler:

with model_full:
    trace_full = pm.sample(
        tune=3000,
        draws=6000,
        chains=4,
        target_accept=0.95,
        return_inferencedata=True
    )

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 2 jobs)
NUTS: [p, lam, r, alpha, b, a]


ValueError: Not enough samples to build a trace.

In [None]:
axes = az.plot_trace(
    data=trace_full,
    var_names=["a", "b", "alpha", "r"],
    lines=[(k, {}, [v]) for k, v in bgf.summary["coef"].items()],
    compact=True,
    backend_kwargs={
        "figsize": (12, 9),
        "layout": "constrained"
    },
)
fig = axes[0][0].get_figure()
fig.suptitle("Full BG/NBD Model Trace");

In [None]:
axes = az.plot_pair(data=trace_full, var_names=["a", "b", "alpha", "r"], figsize=(12, 12))
fig = axes[0][0].get_figure()
fig.suptitle("Full BG/NBD Model Parameters Pairplot", y=0.95, fontsize=16);

In [None]:
az.summary(data=trace_full, var_names=["a", "b", "alpha", "r"])

## Gamma - Gamma Model of Monetary Value in PyMC

https://juanitorduz.github.io/gamma_gamma_pymc/