### IMPORTS

In [1]:
import pymc as pm
import numpy as np
import pandas as pd
import arviz as az

pd.get_option("display.max_columns")

20

### READ DATA

In [2]:
anon_prices = pd.read_csv("anonymized_transactions.csv")

### DATA PREP

In [3]:
anon_prices[['transaction_date', 'product_level_1', 'product_level_2', 'zproduct_id']].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 999 entries, 0 to 998
Data columns (total 4 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   transaction_date  999 non-null    object
 1   product_level_1   999 non-null    object
 2   product_level_2   999 non-null    object
 3   zproduct_id       999 non-null    object
dtypes: object(4)
memory usage: 31.3+ KB


In [4]:
anon_prices['transaction_month'] = pd.to_datetime(anon_prices['transaction_date']).dt.month

In [5]:
anon_prices[['transaction_month', 'product_level_1', 'product_level_2', 'zproduct_id']].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 999 entries, 0 to 998
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   transaction_month  999 non-null    int64 
 1   product_level_1    999 non-null    object
 2   product_level_2    999 non-null    object
 3   zproduct_id        999 non-null    object
dtypes: int64(1), object(3)
memory usage: 31.3+ KB


In [6]:
anon_prices[ 'transaction_month'].value_counts()

3     282
1     271
12    231
2     215
Name: transaction_month, dtype: int64

In [7]:
anon_prices.describe()

Unnamed: 0,sm_transaction_0002_key,anchor_weight,test_field_2,targetpricetag,startpricetag,revenue,quantity,profit,price_unit,price_normalizer,...,lee_spend_bin,test_me_bin,snk_spend_bin,cv_split_1,target_nv,start_nv,floor_nv,cv_split_3,cv_split_2,transaction_month
count,0.0,0.0,0.0,0.0,0.0,999.0,999.0,999.0,999.0,999.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,999.0
mean,,,,,,2754.158627,244.094285,735.029055,11.547703,8.207517,...,,,,,,,,,,4.323323
std,,,,,,5691.460171,406.173816,1524.541181,5.555201,3.790929,...,,,,,,,,,,4.277518
min,,,,,,0.0,2.0,-4268.482,0.0,1.71526,...,,,,,,,,,,1.0
25%,,,,,,607.38115,50.0,134.67155,7.515714,5.439222,...,,,,,,,,,,1.0
50%,,,,,,1369.2189,100.0,326.9655,10.6897,7.396065,...,,,,,,,,,,3.0
75%,,,,,,2900.37925,200.0,824.42235,14.766741,10.336887,...,,,,,,,,,,3.0
max,,,,,,76633.8019,4000.0,19461.298,69.91203,37.31292,...,,,,,,,,,,12.0


In [8]:
anon_prices.head(3)

Unnamed: 0,sm_transaction_0002_key,transaction_id,anchor_weight,transaction_date,test_field_2,targetpricetag,startpricetag,revenue,region_group,quantity,...,lee_spend_bin,test_me_bin,snk_spend_bin,cv_split_1,target_nv,start_nv,floor_nv,cv_split_3,cv_split_2,transaction_month
0,,2-039995466:25,,2019-12-03 00:00:00.000000,,,,2924.7128,B9,300.0,...,,,,,,,,,,12
1,,2-039997979:21,,2019-12-05 00:00:00.000000,,,,26427.2451,SK,3000.0,...,,,,,,,,,,12
2,,2-040000029:11,,2019-12-09 00:00:00.000000,,,,4188.5119,WLS,500.0,...,,,,,,,,,,12


### Data Prep: Bayesian Hierarchical Regression

In [9]:
df_params = {"hbr_b_mu": -0.25,
             "hbr_b_sig": 0.05,
             "Test_Time_Index": [12, 13],
             "Min_Elast": 0.5,
             "Max_Elast": 2.5}

hbr_b_mu = float(df_params['hbr_b_mu'])
hbr_b_sig = float(df_params['hbr_b_sig'])

anon_prices['MCMC_Partition'] = 'Global_Partition'

MCMC_Partition = np.unique(anon_prices['MCMC_Partition'])

bayes_hier = anon_prices[['MCMC_Partition',
                          'product_level_1',
                          'product_level_2',
                          'zproduct_id',
                          'transaction_month',
                          'quantity',
                          'revenue']]

bayes_tree = bayes_hier[['product_level_1',
                         'product_level_2',
                         'zproduct_id']].columns
tree_levels = len(bayes_tree)

### Metrics: Bayesian Hierarchical Regression: Statistics

In [10]:
groupby_list = ['product_level_1', 'product_level_2', 'zproduct_id', 'transaction_month']
quantity_list = ['product_level_1', 'product_level_2', 'zproduct_id', 'transaction_month', 'quantity']
revenue_list = ['product_level_1', 'product_level_2', 'zproduct_id', 'transaction_month', 'revenue']

bayes_quantity = bayes_hier[quantity_list].groupby(by=groupby_list).sum()
bayes_revenue = bayes_hier[revenue_list].groupby(by=groupby_list).sum()
bayes_avg_price = bayes_revenue['revenue'] / bayes_quantity['quantity']

bayesXmean = bayes_avg_price.mean()
bayesXstd = bayes_avg_price.std()

bayesYmean = bayes_quantity['quantity'].mean()
bayesYstd = bayes_quantity['quantity'].std()

bayesX_zscore =  (bayes_avg_price - bayesXmean) / bayesXstd
bayesY_zscore =  (bayes_quantity - bayesYmean) / bayesYstd

bayesX_zscore = bayesX_zscore.to_numpy()
bayesY_zscore = bayesY_zscore['quantity'].to_numpy()

### Metrics: Bayesian Hierarchical Regression: Levels

In [11]:
new_bayes = bayes_hier[['MCMC_Partition', 'product_level_1', 'product_level_2', 'zproduct_id']]
sort_by_list1 = ['product_level_1']
sort_by_list2 = ['product_level_1', 'product_level_2']
sort_by_list3 = ['product_level_1', 'product_level_2', 'zproduct_id']

In [12]:
# level 1
new_bayes['level1']= new_bayes.sort_values(by=sort_by_list1)\
                               .groupby(by=['MCMC_Partition'])\
                               .cumcount() + 1

# level 2
new_bayes['level2'] = new_bayes.sort_values(by=sort_by_list2)\
                               .groupby(by=['MCMC_Partition'])\
                               .cumcount() + 1

# level 3
new_bayes['level3'] = new_bayes.sort_values(by=sort_by_list3)\
                               .groupby(by=['MCMC_Partition'])\
                               .cumcount() + 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_bayes['level1']= new_bayes.sort_values(by=sort_by_list1)\
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_bayes['level2'] = new_bayes.sort_values(by=sort_by_list2)\


In [45]:
level_1 = new_bayes['level1'].to_numpy()
level_1_unique = np.unique(level_1)
level_1_count = len(level_1_unique)

level_2 = new_bayes['level2'].to_numpy()
level_2_link = new_bayes[['level1', 'level2']].copy().drop_duplicates()['level1'].to_numpy()
level_2_unique = np.unique(level_2)
level_2_count = len(level_2_unique)

level_3 = new_bayes['level3'].to_numpy()
level_3_link = new_bayes[['level2', 'level3']].copy().drop_duplicates()['level2'].to_numpy()
level_3_unique = np.unique(level_3)
level_3_count = len(level_3_unique)

### PROGRAM

In [53]:
with pm.Model() as hierarchical_model:

    b_0 = pm.Normal(name='b_0', mu=hbr_b_mu, sigma=hbr_b_sig)

    b_1 = pm.Normal(name='b_1', mu=b_0,  sigma=hbr_b_sig, shape=level_1_count)

    b_2 = pm.Normal(name='b_2', mu=b_1[level_2_link - 1],  sigma=hbr_b_sig, shape=level_2_count)

    b_3 = pm.Normal(name='b_3', mu=b_2[level_3_link - 1],  sigma=hbr_b_sig, shape=level_3_count)

    eps = pm.HalfCauchy(name='eps', beta=1)

    # Likelihood
    Y_est = pm.Deterministic(name='Y_est', var=b_3[level_3[:689] - 1] * bayesX_zscore)
    Y_like = pm.Normal(name='Y_like', mu=Y_est, sigma=eps, observed=bayesY_zscore)

    trace = pm.sample(draws=2000, tune=2000, cores=4, chains=2, random_seed=1, progressbar=0)

In [54]:
az.summary(data=trace, hdi_prob=0.95)

Unnamed: 0,mean,sd,hdi_2.5%,hdi_97.5%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
b_0,-0.118,0.031,-0.172,-0.058,0.010,0.007,11.0,18.0,1.17
b_1[0],-0.119,0.059,-0.234,-0.002,0.009,0.006,44.0,220.0,1.04
b_1[1],-0.117,0.057,-0.230,-0.006,0.009,0.006,42.0,318.0,1.05
b_1[2],-0.117,0.059,-0.237,-0.007,0.009,0.006,45.0,351.0,1.04
b_1[3],-0.117,0.059,-0.231,-0.000,0.008,0.006,53.0,184.0,1.03
...,...,...,...,...,...,...,...,...,...
Y_est[684],0.068,0.053,-0.043,0.167,0.004,0.003,188.0,1812.0,1.02
Y_est[685],0.075,0.057,-0.035,0.188,0.004,0.003,164.0,1061.0,1.01
Y_est[686],0.073,0.057,-0.041,0.183,0.004,0.003,192.0,1595.0,1.01
Y_est[687],-0.210,0.153,-0.507,0.099,0.010,0.007,220.0,1547.0,1.01


In [None]:
# b_est = trace["b_3"].mean(axis=0)
x = bayes_avg_price and y = bayes_quantity
a_est_0 = y_mean - (b_est * x_mean * y_std) / x_std
b_est_0 = (b_est * y_std) / x_std
y_est = a_est_0 + b_est_0 * x
mape = avg(abs((y_est - y) / y))
prediction_vs_actual_graph = sum(x) vs sum(y) and sum(y_est)
elasticity = -1 * (b_est_0 * x) / (a_est_0 + b_est_0 * x)