# Subcription Covariates Model

Also known as the G2G + Covariates Model. Developed by Fader & Hardie in ["Incorporating Time-Varying Covariates in
a Simple Mixture Model for
Discrete-Time Duration Data"](https://www.brucehardie.com/notes/037/time-varying_covariates_in_BG.pdf).

### Setup Notebook

In [1]:
import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pymc as pm
import seaborn as sb
import xarray as xr
from fastprogress.fastprogress import progress_bar
import pymc as pm
import pytensor as pt

from pymc_marketing import clv
from pymc_marketing.prior import Prior

#set flag to fix open issue
pt.config.cxx = '/usr/bin/clang++'

# Plotting configuration
az.style.use("arviz-darkgrid")
plt.rcParams["figure.figsize"] = [12, 7]
plt.rcParams["figure.dpi"] = 100
plt.rcParams["figure.facecolor"] = "white"

%load_ext autoreload
%autoreload 2
%config InlineBackend.figure_format = "retina"



### Generate Data

In [60]:
# Set random seed
rng = np.random.seed(45)

# Number of customers
N = 1000

# customer IDs
customer_id = np.array(np.arange(N))

# define probabiilty distributions
demographic_dist = pm.Bernoulli.dist(p=.5,rng=rng)
t_periods_dist = pm.Geometric.dist(p=.5, rng=rng)
active_dist = pm.Bernoulli.dist(p=.3,rng=rng)
n_purchases_dist = pm.Poisson.dist(mu=1, rng=rng)
avg_purchase_price_dist = pm.Gamma.dist(alpha=10, beta=1, rng=rng)

# draws for each customer
demographic = pm.draw(demographic_dist,draws=N)
t_periods = 13 - pm.draw(t_periods_dist,draws=N)
active = pm.draw(active_dist,draws=N)

In [61]:
# create time periods for each customer
customer_id_col = np.array([])

for idx in zip(customer_id, t_periods):
    customer_id_col = np.concatenate((customer_id_col, np.repeat(idx[0], idx[1])), axis=0)

n_time_periods = len(customer_id_col)

In [63]:
# draws for each time period
n_purchases = pm.draw(n_purchases_dist, draws=n_time_periods)
avg_purchase_price = pm.draw(avg_purchase_price_dist, draws=n_time_periods)

In [64]:
customer_df = pd.DataFrame(
    {
        "customer_id": customer_id,
        "demographic": demographic,
        "active": active,
    }
)

time_period_df = pd.DataFrame(
    {
        "customer_id": customer_id_col,
        "monthly_purchases": n_purchases,
        "avg_price": avg_purchase_price,
    }
)

In [65]:
customer_df

Unnamed: 0,customer_id,demographic,active
0,0,0,1
1,1,1,0
2,2,1,0
3,3,0,0
4,4,1,1
...,...,...,...
995,995,1,0
996,996,0,0
997,997,1,0
998,998,0,0


In [66]:
time_period_df

Unnamed: 0,customer_id,monthly_purchases,avg_price
0,0.0,0,8.218666
1,0.0,2,4.667740
2,0.0,3,9.887910
3,0.0,3,11.778152
4,0.0,0,5.806465
...,...,...,...
11025,999.0,2,9.951979
11026,999.0,2,11.650709
11027,999.0,2,8.623908
11028,999.0,1,7.000588
