In [30]:
#import statements
import pandas as pd
import numpy as np
import pymc as pm
import arviz as az
from dataclasses import dataclass
from typing import List

#dataclasses
@dataclass
class BetaPrior:
    alpha: float
    beta: float

@dataclass
class BinomialData:
    trials: int #total number of players in a variant
    successes: int #number of players who converted

#get data
df = pd.read_csv("https://github.com/dustywhite7/Econ8310/raw/master/AssignmentData/cookie_cats.csv")

#boolin
df["retention_1"] = df["retention_1"].astype(int)
df["retention_7"] = df["retention_7"].astype(int)

agg1 = df.groupby("version")["retention_1"].agg(successes = "sum", trials = "count")
agg1 = agg1.loc[["gate_30", "gate_40"]]
agg1

binom_data_1day = [
    BinomialData(trials=int(row.trials), successes=int(row.successes)) for _, row in agg1.iterrows()]

binom_data_1day

agg7 = df.groupby("version")["retention_7"].agg(successes = "sum", trials = "count")
agg7 = agg7.loc[["gate_30", "gate_40"]]

binom_data_7day = [BinomialData(trials = int(row.trials), successes=int(row.successes)) for _, row in agg7.iterrows()]

#classy
class ConversionModelTwoVariant:
    def __init__(self, priors: BetaPrior):
        self.priors = priors

    def create_model(self, data: List[BinomialData]) -> pm.Model:
        trials = np.array([d.trials for d in data])
        successes = np.array([d.successes for d in data])
        with pm.Model() as model:
            p = pm.Beta("p", alpha = self.priors.alpha, beta = self.priors.beta, shape = len(data))
            obs = pm.Binomial("y", n = trials, p = p, observed = successes)
            reluplift = pm.Deterministic("reuplift_b", p[1] / p[0] - 1)
        return model

#binom_data_1day
#binom_data_7day


In [36]:
#its modelin time
priors = BetaPrior(alpha = 2.0, beta = 2.0)
conv_model = ConversionModelTwoVariant(priors = priors)

model_1day = conv_model.create_model(binom_data_1day)

with model_1day:
    trace_1day = pm.sample(draws = 2000, tune = 2000, chains = 2, random_seed = np.random.default_rng(42), progressbar= "combined+stats", target_accept = 0.9)

az.summary(trace_1day, var_names=["p", "reuplift_b"]) #summarize posterior

[?25l                                                                                                                                  
 [1m [0m[1mProgress                                [0m[1m [0m [1m [0m[1mDraws[0m[1m [0m [1m [0m[1mDivergences[0m[1m [0m [1m [0m[1mStep size[0m[1m [0m [1m [0m[1mGrad evals[0m[1m [0m [1m [0m[1mSampling Speed[0m[1m [0m [1m [0m[1mElapsed[0m[1m [0m [1m [0m[1mRemaining[0m[1m [0m 
 ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── 
  [30m----------------------------------------[0m   0       0             0.000       0            0.00 draws/s     0:00:00   -:--:--    
[2K[1A[2K[1A[2K[1A[2K[1A[2K                                                                                                                                  
 [1m [0m[1mProgress                                [0m[1m [0m [1m [0m[1mDraws[0m[1m [0m [

Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
p[0],0.448,0.002,0.444,0.453,0.0,0.0,3691.0,2909.0,1.0
p[1],0.442,0.002,0.438,0.447,0.0,0.0,3580.0,2857.0,1.0
reuplift_b,-0.013,0.008,-0.027,0.002,0.0,0.0,3788.0,2698.0,1.0


In [37]:
#7day
model_7day = conv_model.create_model(binom_data_7day)

with model_7day:
    trace_7day = pm.sample(draws = 2000, tune = 2000, chains = 2, random_seed = np.random.default_rng(42), progressbar= "combined+stats", target_accept = 0.9)

az.summary(trace_7day, var_names=["p", "reuplift_b"]) #summarize posterior

[?25l                                                                                                                                  
 [1m [0m[1mProgress                                [0m[1m [0m [1m [0m[1mDraws[0m[1m [0m [1m [0m[1mDivergences[0m[1m [0m [1m [0m[1mStep size[0m[1m [0m [1m [0m[1mGrad evals[0m[1m [0m [1m [0m[1mSampling Speed[0m[1m [0m [1m [0m[1mElapsed[0m[1m [0m [1m [0m[1mRemaining[0m[1m [0m 
 ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── 
  [30m----------------------------------------[0m   0       0             0.000       0            0.00 draws/s     0:00:00   -:--:--    
[2K[1A[2K[1A[2K[1A[2K[1A[2K                                                                                                                                  
 [1m [0m[1mProgress                                [0m[1m [0m [1m [0m[1mDraws[0m[1m [0m [

Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
p[0],0.19,0.002,0.187,0.194,0.0,0.0,3511.0,2674.0,1.0
p[1],0.182,0.002,0.179,0.185,0.0,0.0,4494.0,2991.0,1.0
reuplift_b,-0.043,0.013,-0.067,-0.018,0.0,0.0,3673.0,2784.0,1.0
