# Computation

In [29]:
#prepare data

import pandas as pd

data = {'visitor':  ['James', 'Mike', 'Tommy', 'Meguy', 'Mia'],
        'variant': ['A', 'B', 'B', 'A', 'B'],
        'conversion': [0, 1, 0, 1, 1],
        'revenue': [0.00, 22.80, 0.00, 12.90, 37.5],
        }

df = pd.DataFrame (data, columns = ['visitor', 'variant', 'conversion', 'revenue'])

df.head()

Unnamed: 0,visitor,variant,conversion,revenue
0,James,A,0,0.0
1,Mike,B,1,22.8
2,Tommy,B,0,0.0
3,Meguy,A,1,12.9
4,Mia,B,1,37.5


In [30]:
#Split the data

import matplotlib.pyplot as plt
import numpy as np
import pandas
import pymc3 as pm

def data_split(df):
    variant = df[df['variant']!='A']
    default = df[df['variant']=='A']

    conv_variant = df[(df['variant']!='A') & (df['conversion']>0 )]
    conv_default = df[(df['variant']=='A') & (df['conversion']>0)]

    default = default['conversion']
    variant = variant['conversion']
    rev_default = conv_default['revenue']
    rev_variant = conv_variant['revenue']
    return [default,variant,rev_default,rev_variant]
      
dataframe = df
observations = data_split(dataframe);

In [31]:

with pm.Model() as ab_model:
  #Conversion a priori
  a_prior=pm.distributions.continuous.Beta('conversionA', alpha=0.1, beta=0.1)
  b_prior=pm.distributions.continuous.Beta('conversionB', alpha=0.1, beta=0.1)

  #Revenues a priori
  rev_a_prior=pm.distributions.continuous.Gamma('revenueA', alpha=0.1, beta=0.1)
  rev_b_prior=pm.distributions.continuous.Gamma('revenueB', alpha=0.1, beta=0.1)
  
  #Compute the likelihood
  pm.Bernoulli('likelihoodA', a_prior, observed=observations[0])
  pm.Bernoulli('likelihoodB', b_prior, observed=observations[1])
  pm.Poisson('likelihoodRevenueA', rev_a_prior, observed=observations[2])
  pm.Poisson('likelihoodRevenueB', rev_b_prior, observed=observations[3])

  #Compute metrics
  conv_a = pm.Deterministic('conversion_A', a_prior)
  conv_b = pm.Deterministic('conversion_B', b_prior)
  
  conv_rev_a = pm.Deterministic('conversionRevenueA', a_prior*rev_a_prior)
  conv_rev_b = pm.Deterministic('conversionRevenueB', b_prior*rev_b_prior)

  pm.Deterministic('lift', b_prior - a_prior)
  pm.Deterministic('revenueLift', conv_rev_b - conv_rev_a)
    
  step = pm.Slice()
  trace = pm.sample(1000, step=step) #Be aware that it will take some times to compute

Multiprocess sampling (4 chains in 4 jobs)
CompoundStep
>Slice: [revenueB]
>Slice: [revenueA]
>Slice: [conversionB]
>Slice: [conversionA]


Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 83 seconds.


# Visualisation

#Visualize some results

plt.figure(figsize=(20,5))
plt.subplot(1, 2,1)
plt.hist(trace['conversionRevenueA'][:], bins=35, histtype='stepfilled', 
color='#da6d75', label='Posterior of revenue A',alpha=0.5)
plt.legend()
plt.hist(trace['conversionRevenueB'][:], bins=35, histtype='stepfilled',
color='#52c4a8', label='Posterior of revenue B',alpha=0.5)
plt.legend()
plt.subplot(1, 2,2)
plt.hist(trace['conversion_A'][:], bins=35, histtype='stepfilled',
color='#da6d75', label='Posterior of A',alpha=0.5)
plt.legend()
plt.hist(trace['conversion_B'][:], bins=35, histtype='stepfilled',
color='#52c4a8', label='Posterior of B',alpha=0.5)
plt.legend()
plt.show()

# Interpretation

In [32]:
#Interpretation

difference_rev_B_A=trace['conversionRevenueB']-trace['conversionRevenueA']
difference_conversion_B_A=trace['conversionB']-trace['conversionA']
difference_rev_A_B=trace['conversionA']-trace['conversionB']
difference_conversion_A_B=trace['conversionA']-trace['conversionB']

In [33]:
#probability of 14% increase by group B revenue and conversion
print("Probability of 14% increase by group B revenue and conversion: ", 100*len(difference_rev_B_A[difference_rev_B_A>0.14])*1.0/len(difference_rev_B_A))
print("Probability of 100% increase by group B revenue and conversion: ", 100*len(difference_rev_B_A[difference_rev_B_A>1])*1.0/len(difference_rev_B_A))
print("_________________________________________________________________________________________________")
print("Probability of 14% increase by group A revenue and conversion: ", 100*len(difference_rev_A_B[difference_rev_A_B>0.14])*1.0/len(difference_rev_A_B))
print("Probability of 100% increase by group A revenue and conversion: ", 100*len(difference_rev_A_B[difference_rev_A_B>1])*1.0/len(difference_rev_A_B))


Probability of 14% increase by group B revenue and conversion:  93.75
Probability of 100% increase by group B revenue and conversion:  91.975
_________________________________________________________________________________________________
Probability of 14% increase by group A revenue and conversion:  22.625
Probability of 100% increase by group A revenue and conversion:  0.0
