In [1]:
import pandas as pd
import pymc3 as pm
import theano.tensor as tt
import numpy as np
from collections import Counter

import seaborn as sns
import matplotlib.pyplot as plt



In [2]:
from utils.generators import UserGenerator

In [3]:
# Automate the process from part 1

def calculate_posterior(ug, n_samples , initial_rate, lift,user_ratio=0.5 ):
    nr_of_days = 1
    hour_split = int((nr_of_days*24) * user_ratio )
    
    # create dataset
    dat = ug.generate_lift_data(n_samples,initial_rate,lift,nr_of_days, user_ratio=user_ratio)
    
    
    # parse_time
    dat['hour'] = dat.datetime.dt.hour
    dat = dat.reset_index()[['user_id','datetime','purchased','hour']]
    #return dat
    # sample it
    trace_sampled = run_sampler(dat)
    
    tau_post = trace_sampled['tau'][:5000]
    
    dat_time = dat[['hour']]
    dat_time['posterior_split'] = dat_time.index.to_series().map(Counter(tau_post))
    
    time_split = dat_time.groupby('hour')['posterior_split'].sum()
    lambda_1_trace = trace_sampled['lambda_1'] * dat.shape[0]
    lambda_2_trace = trace_sampled['lambda_2'] * dat.shape[0]
    
    # accuracy for time prediction
    try:
        print("timecalc", hour_split, time_split[hour_split],  time_split.sum())
    except:
        print(time_split)
    
    time_accuracy = time_split[hour_split] / time_split.sum()
    
    return lambda_1_trace, lambda_2_trace, time_accuracy
    # return accuracy
    

def run_sampler(dat):
    
    with pm.Model() as model:
    
        lambda_1 = pm.Uniform("lambda_1",  lower=0, upper=1)
        lambda_2 = pm.Uniform("lambda_2",  lower=0, upper=1)
    
        tau = pm.DiscreteUniform("tau", lower=0, upper=dat.shape[0] - 1)
    
        # to be able to tell the index point of change we need to add an index
        index = np.arange(dat.shape[0])
    
        # a switch to find the point of interest
        lambda_ = pm.math.switch(tau > index, lambda_1, lambda_2)
    
        observation = pm.Binomial("obs", p=lambda_, n=dat.shape[0], observed=dat.purchased)

        # initializer 
        start = pm.find_MAP()
        # NUTS sampler
        step = pm.NUTS()
        trace = pm.sample(15000, tune=7000,step=step, discard_tuned_samples=True)
        
        
    return trace[:20000] # return burned trace
    

In [None]:
user_numbers = [100, 500, 1000, 5000,10000]
lift_detection = {}
dataframe = pd.DataFrame()
for n_users in user_numbers:
    lift_detection = {}
    for lift in np.linspace(0.05,0.5,num=10):
        
        base_p = 0.1
    
        lambda_1, lambda_2, time_accuracy = calculate_posterior(ug, n_users, base_p , lift, user_ratio=0.25 )
        
        lift_detection[f'{n_users}-{lift}'] = {'lift':lift,
                                'l1':lambda_1.mean(), 
                                'l2':lambda_2.mean(), 
                                "time_accuracy":time_accuracy,
                                'user':n_users}
        
        
    dat = pd.DataFrame.from_dict(lift_detection, orient='index')
    #dat['usernumber'] = n_users
    
    dataframe = dataframe.append(dat)
    