## Library imports

In [1]:
import pandas as pd
import numpy as np
from statsmodels.stats.power import tt_ind_solve_power

from scipy import stats

import seaborn as sns

## Data check

In [2]:
# Loading data

df = pd.read_csv('ab_testing.csv')

df

Unnamed: 0,uid,country,gender,spent,purchases,date,group,device
0,11115722,MEX,F,1595,5,2016-03-08,GRP B,I
1,11122053,USA,M,498,2,2017-07-14,GRP B,I
2,11128688,USA,F,2394,6,2017-09-17,GRP A,I
3,11130578,USA,F,1197,3,2017-11-30,GRP A,I
4,11130759,ESP,M,1297,3,2018-01-10,GRP B,A
...,...,...,...,...,...,...,...,...
45878,99994149,DEU,F,1897,3,2017-02-05,GRP B,I
45879,99995092,USA,F,899,1,2017-08-18,GRP B,A
45880,99996986,BRA,M,2194,6,2014-12-06,GRP B,A
45881,99997035,USA,M,2196,4,2017-04-05,GRP A,I


In [3]:
# Checking NA's

df.isna().sum()

uid          0
country      0
gender       0
spent        0
purchases    0
date         0
group        0
device       0
dtype: int64

In [4]:
# Checking data types

df.dtypes

uid           int64
country      object
gender       object
spent         int64
purchases     int64
date         object
group        object
device       object
dtype: object

In [5]:
# Checking duplicated

df.duplicated().sum()

0

In [6]:
# Seting 'date' column as datetime

df['date'] = pd.to_datetime(df['date'])

In [7]:
# Checking if the same user id does not belong to more than 1 group

df[['uid','group']].groupby('uid').count().sort_values(by='group', ascending=False)

Unnamed: 0_level_0,group
uid,Unnamed: 1_level_1
11115722,1
70051633,1
70013059,1
70014245,1
70016073,1
...,...
40783010,1
40783265,1
40783344,1
40784302,1


In [8]:
# Checking gender ================================================


print('Gender')

print(df['gender'].value_counts())

print(df['gender'].value_counts(normalize=True))

Gender
M    23116
F    22767
Name: gender, dtype: int64
M    0.503803
F    0.496197
Name: gender, dtype: float64


In [9]:
# Checking Gender proportion - Group A

print('Group A gender proportion:')
print(df.loc[df['group']=='GRP A','gender'].value_counts(normalize=True))

# Checking Gender proportion - Group B

print('\nGroup B gender proportion:')
print(df.loc[df['group']=='GRP B','gender'].value_counts(normalize=True))

Group A gender proportion:
M    0.505802
F    0.494198
Name: gender, dtype: float64

Group B gender proportion:
M    0.501792
F    0.498208
Name: gender, dtype: float64


In [10]:
# Checking country ================================================

print('Country')

print(df['country'].value_counts())

print(df['country'].value_counts(normalize=True))

Country
USA    13998
BRA     9009
MEX     5528
DEU     3696
TUR     3537
FRA     2842
GBR     2809
ESP     1934
CAN     1513
AUS     1017
Name: country, dtype: int64
USA    0.305080
BRA    0.196347
MEX    0.120480
DEU    0.080553
TUR    0.077087
FRA    0.061940
GBR    0.061221
ESP    0.042151
CAN    0.032975
AUS    0.022165
Name: country, dtype: float64


In [11]:
# Checking Country proportion - Group A

print('Group A country proportion:')
print(df.loc[df['group']=='GRP A','country'].value_counts(normalize=True))


# Checking Country proportion - Group B

print('\nGroup B country proportion:')
print(df.loc[df['group']=='GRP B','country'].value_counts(normalize=True))

Group A country proportion:
USA    0.301056
BRA    0.196358
MEX    0.125429
DEU    0.082142
TUR    0.078013
FRA    0.062280
GBR    0.060237
ESP    0.042157
CAN    0.030336
AUS    0.021991
Name: country, dtype: float64

Group B country proportion:
USA    0.309128
BRA    0.196336
MEX    0.115502
DEU    0.078954
TUR    0.076156
GBR    0.062210
FRA    0.061598
ESP    0.042144
CAN    0.035630
AUS    0.022340
Name: country, dtype: float64


## Calculating sample size

In [12]:
# nivel de significancia
alpha = 0.05

# power
power = 0.8

lift = 1.063

country_list = list(df['country'].unique())

df_aux = pd.DataFrame({'Country':np.nan,
                       'Minimum sample number':np.nan,
                       'GRP A required':np.nan,
                       'GRP B required':np.nan,
                       'GRP A available':np.nan,
                       'GRP B available':np.nan,
                       'Actual spent': np.nan,
                       'Expected lift': np.nan,
                       'Test possible?': np.nan}, index=[i for i in range(len(df['country'].unique()))])

#df_aux['Country'] = country_list


for i, it in zip(country_list, df_aux.index):
    
    actual_mean_spent = df.loc[(df['country']==i) & (df['group']=='GRP A'), 'spent'].mean()
    
    expected_mean_spent = actual_mean_spent*lift
    
    
    # effect size
    effect_size = (expected_mean_spent - actual_mean_spent) / df.loc[df['group']=='GRP A', 'spent'].std()
    
    # sample_number
    nobs1 = tt_ind_solve_power(effect_size = effect_size, alpha = alpha, power = power)
    
    # preenchendo o df
    df_aux.loc[it, 'Country'] = i
    
    df_aux.loc[it, 'Minimum sample number'] = nobs1
    
    df_aux.loc[it, 'GRP A required'] = nobs1
    
    df_aux.loc[it, 'GRP B required'] = nobs1
    
    df_aux.loc[it, 'GRP A available'] = ((df['group']=='GRP A') & (df['country']==i)).sum()
    
    df_aux.loc[it, 'GRP B available'] = ((df['group']=='GRP A') & (df['country']==i)).sum()
    
    df_aux.loc[it, 'Actual spent'] = actual_mean_spent
    
    df_aux.loc[it, 'Expected lift'] = expected_mean_spent
    
    df_aux.loc[it, 'Test possible?'] = ['No' if df_aux.loc[it,'GRP B available'] < df_aux.loc[it,'GRP A required'] else 'Yes'][0]
    
    
df_aux[['Minimum sample number',
        'GRP A required',
        'GRP B required',
        'GRP A available',
        'GRP B available']] = np.ceil(df_aux[['Minimum sample number',
                                              'GRP A required',
                                              'GRP B required',
                                              'GRP A available',
                                              'GRP B available']]).astype('int')

df_aux

  return np.clip(_boost._nct_sf(x, df, nc), 0, 1)
  return np.clip(_boost._nct_cdf(x, df, nc), 0, 1)


Unnamed: 0,Country,Minimum sample number,GRP A required,GRP B required,GRP A available,GRP B available,Actual spent,Expected lift,Test possible?
0,MEX,2047,2047,2047,2886,2886,1902.705821,2022.576288,Yes
1,USA,2078,2078,2078,6927,6927,1888.376065,2007.343757,Yes
2,ESP,2302,2302,2302,970,970,1794.191753,1907.225833,No
3,GBR,2236,2236,2236,1386,1386,1820.470418,1935.160055,No
4,TUR,2070,2070,2070,1795,1795,1892.353203,2011.571455,No
5,DEU,2212,2212,2212,1890,1890,1830.453439,1945.772006,No
6,BRA,2053,2053,2053,4518,4518,1899.792165,2019.479071,Yes
7,FRA,2054,2054,2054,1433,1433,1899.752268,2019.436661,No
8,AUS,1930,1930,1930,506,506,1959.517787,2082.967407,No
9,CAN,2205,2205,2205,698,698,1833.154728,1948.643476,No


In [13]:
# Criando uma lista contendo os países que possuem o número mínimo de amostras para o teste

countries_ab_list = df_aux.loc[df_aux['Test possible?']=='Yes','Country'].to_list()

countries_ab_list

['MEX', 'USA', 'BRA']

## Checking data distribution

In [14]:
# Filtrando o dataframe para somente países onde é possível fazer o teste AB 

df_ab = df[df['country'].isin(countries_ab_list)]

df_ab

Unnamed: 0,uid,country,gender,spent,purchases,date,group,device
0,11115722,MEX,F,1595,5,2016-03-08,GRP B,I
1,11122053,USA,M,498,2,2017-07-14,GRP B,I
2,11128688,USA,F,2394,6,2017-09-17,GRP A,I
3,11130578,USA,F,1197,3,2017-11-30,GRP A,I
5,11131070,USA,M,1796,4,2016-08-08,GRP B,I
...,...,...,...,...,...,...,...,...
45874,99986982,USA,F,3394,6,2017-05-03,GRP B,I
45877,99991883,BRA,M,2995,5,2017-06-22,GRP B,A
45879,99995092,USA,F,899,1,2017-08-18,GRP B,A
45880,99996986,BRA,M,2194,6,2014-12-06,GRP B,A


In [15]:
df_GRP_A_sampled = pd.DataFrame(columns=df_ab.columns)

df_GRP_B_sampled = pd.DataFrame(columns=df_ab.columns)

# Separando grupo A e grupo B

df_GRP_A = df_ab[df_ab['group']=='GRP A']

df_GRP_B = df_ab[df_ab['group']=='GRP B']

In [16]:
# Fazendo uma amostragem dos dados por país baseado no cálculo de tamanho amostral

for i in countries_ab_list:
    
    n_sampled_GRP_A = df_aux.loc[df_aux['Country']==i,'GRP A required'].values[0]
    
    df_sampled_A = df_ab[(df_ab['group']=='GRP A') & (df_ab['country']==i)].sample(n=n_sampled_GRP_A, random_state=42)
    
    df_GRP_A_sampled = pd.concat([df_GRP_A_sampled, df_sampled_A])
    
    
    n_sampled_GRP_B = df_aux.loc[df_aux['Country']==i,'GRP B required'].values[0]
    
    df_sampled_B = df_ab[(df_ab['group']=='GRP B') & (df_ab['country']==i)].sample(n=n_sampled_GRP_B, random_state=42)
    
    df_GRP_B_sampled = pd.concat([df_GRP_B_sampled, df_sampled_B])

In [17]:
# Unindo de volta os df grupo A e B amostrados

df_AB = pd.concat([df_GRP_A_sampled, df_GRP_B_sampled])

In [18]:
df_AB

Unnamed: 0,uid,country,gender,spent,purchases,date,group,device
7679,25981952,MEX,F,1497,3,2016-12-13,GRP A,I
39246,86959496,MEX,F,2494,6,2018-01-05,GRP A,I
38251,84978006,MEX,M,497,3,2016-07-27,GRP A,I
26569,62254922,MEX,M,299,1,2016-04-15,GRP A,A
18054,46080590,MEX,F,2096,4,2016-11-25,GRP A,I
...,...,...,...,...,...,...,...,...
38392,85241496,BRA,F,599,1,2016-10-01,GRP B,A
25471,60143852,BRA,F,1797,3,2016-07-15,GRP B,A
43643,95647935,BRA,F,1596,4,2016-04-20,GRP B,A
16113,42331972,BRA,F,3293,7,2018-01-07,GRP B,A


In [34]:
# Mudando o formato de dados das colunas

df_AB['uid'] = df_AB['uid'].astype('int')
df_AB['spent'] = df_AB['spent'].astype('float')

In [19]:
# Definindo um dataframe que conterá as informações do teste de normalidade e variância dos dados

df_summary = pd.DataFrame(columns=['country','Shapiro p-value', 'Normal distribution?','Variance ratio','Equal Variances?'])

df_summary['country'] = df_AB['country'].unique()

df_summary

Unnamed: 0,country,Shapiro p-value,Normal distribution?,Variance ratio,Equal Variances?
0,MEX,,,,
1,USA,,,,
2,BRA,,,,


In [20]:
# Fazer o teste de Shapiro e avaliação de variâncias p/ país

for i in countries_ab_list:
    
    df_summary.loc[df_summary['country']==i, 'Shapiro p-value'] = stats.shapiro(df_AB.loc[df_AB['country']==i,'spent']).pvalue
    
    
    variance_list = [np.var(df_AB.loc[(df_AB['group']=='GRP A') & (df_AB['country']==i),'spent']), np.var(df_AB.loc[(df_AB['group']=='GRP B') & (df_AB['country']==i),'spent'])]
    
    df_summary.loc[df_summary['country']==i, 'Variance ratio'] = np.max(variance_list) / np.min(variance_list)

    
    
df_summary['Normal distribution?'] = ['No' if i<0.05 else 'Yes' for i in df_summary['Shapiro p-value']]

df_summary['Equal Variances?'] = ['No' if i>4 else 'Yes' for i in df_summary['Variance ratio']]

In [21]:
df_summary

Unnamed: 0,country,Shapiro p-value,Normal distribution?,Variance ratio,Equal Variances?
0,MEX,0.0,No,1.049738,Yes
1,USA,0.0,No,1.081985,Yes
2,BRA,0.0,No,1.061109,Yes


## Hypothesis test

In [23]:
df_AB

Unnamed: 0,uid,country,gender,spent,purchases,date,group,device
7679,25981952,MEX,F,1497,3,2016-12-13,GRP A,I
39246,86959496,MEX,F,2494,6,2018-01-05,GRP A,I
38251,84978006,MEX,M,497,3,2016-07-27,GRP A,I
26569,62254922,MEX,M,299,1,2016-04-15,GRP A,A
18054,46080590,MEX,F,2096,4,2016-11-25,GRP A,I
...,...,...,...,...,...,...,...,...
38392,85241496,BRA,F,599,1,2016-10-01,GRP B,A
25471,60143852,BRA,F,1797,3,2016-07-15,GRP B,A
43643,95647935,BRA,F,1596,4,2016-04-20,GRP B,A
16113,42331972,BRA,F,3293,7,2018-01-07,GRP B,A


In [70]:
alpha = 0.05

df_results = pd.DataFrame(columns=['País','Teste aplicado','Valor de p','Diferenças?'],
                          index=[i for i in range(len(df_AB['country'].unique()))])

df_results['País'] = df_AB['country'].unique()


for i in df_summary['country']:
    
    print(f'País avaliado:{i}')
    
    # Condição para teste não paramétrico de variâncias iguais
    if (df_summary.loc[df_summary['country']==i, 'Shapiro p-value'].values[0] < alpha) & (df_summary.loc[df_summary['country']==i, 'Variance ratio'].values[0] < 4):
                
        # Aplicar teste Mann-Whitney
        print('Teste estatístico selecionado baseado nos dados: Mann-Whitney')
        teste_escolhido='Mann-Whitney'
        results = stats.mannwhitneyu(x= df_AB.loc[(df_AB['country']==i) & (df_AB['group']=='GRP A'),'spent'].values,
                                     y= df_AB.loc[(df_AB['country']==i) & (df_AB['group']=='GRP B'),'spent'].values,
                                     use_continuity=True,
                                     alternative='two-sided',
                                     axis=0,
                                     method='auto',
                                     nan_policy='propagate',
                                     keepdims=False)
        
        # Testar o valor de p para emitir o parecer
        print(f'Valor de p:{np.round(results.pvalue, 2)}')
        if results.pvalue < alpha:
            print('Existem diferenças entre os dois tratamentos\n')
            resultado = 'Significativo'
        
        else:
            print('NÃO Existem diferenças entre os dois tratamentos\n')
            resultado = 'Não significativo'
        
    
    # Condição para teste não paramétrico de variâncias diferentes
    elif (df_summary.loc[df_summary['country']==i, 'Shapiro p-value'].values[0] < alpha) & (df_summary.loc[df_summary['country']==i, 'Variance ratio'].values[0] < 4):
        
        # Aplicar teste Welch
        print('Teste estatístico selecionado baseado nos dados: Welch')
        teste_escolhido='Welch'
        def welch_ttest(x, y):
             ## Welch-Satterthwaite Degrees of Freedom ##
                dof = (x.var()/x.size + y.var()/y.size)**2 / ((x.var()/x.size)**2 / (x.size-1) + (y.var()/y.size)**2 / (y.size-1))
                t, p = stats.ttest_ind(x, y, equal_var = False)
                print("\n",
                      f"Welch's t-test= {t:.4f}", "\n",
                      f"p-value = {p:.4f}", "\n",
                      f"Welch-Satterthwaite Degrees of Freedom= {dof:.4f}")
                
                return t,p
                
        results = welch_ttest(x=df_AB.loc[(df_AB['country']==i) & (df_AB['group']=='GRP A'),'spent'].values,
                              y=df_AB.loc[(df_AB['country']==i) & (df_AB['group']=='GRP B'),'spent'].values)
        
        # Testar o valor de p para emitir o parecer
        print(f'Valor de p:{np.round(results.pvalue, 2)}')
        if results.pvalue < alpha:
            print('Existem diferenças entre os dois tratamentos\n')
            resultado = 'Significativo'
        
        else:
            print('NÃO Existem diferenças entre os dois tratamentos\n')
            resultado = 'Não significativo'
       
              
    # Condição para teste paramétrico de variâncias iguais
    elif (df_summary.loc[df_summary['country']==i, 'Shapiro p-value'].values[0] > alpha) & (df_summary.loc[df_summary['country']==i, 'Variance ratio'].values[0] < 4):    
        
        # Aplicar o teste t paramétrico de variâncias iguais
        print('Teste estatístico selecionado baseado nos dados: Parametric ttest - equal variances')
        teste_escolhido='T test - equal variances'
        results = stats.ttest_ind(x=df_AB.loc[(df_AB['country']==i) & (df_AB['group']=='GRP A'),'spent'].values,
                                  y=df_AB.loc[(df_AB['country']==i) & (df_AB['group']=='GRP B'),'spent'].values,
                                  equal_var = True)
        
        # Testar o valor de p para emitir o parecer
        print(f'Valor de p:{np.round(results.pvalue, 2)}')
        if results.pvalue < alpha:
            print('Existem diferenças entre os dois tratamentos\n')
            resultado = 'Significativo'
        
        else:
            print('NÃO Existem diferenças entre os dois tratamentos\n')
            resultado = 'Não significativo'
        
    else:
        
        # Aplicar o teste t paramétrico de variâncias diferentes
        print('Teste estatístico selecionado baseado nos dados: Parametric ttest - unequal variances')
        teste_escolhido='T test - unequal variances'
        results = stats.ttest_ind(x=df_AB.loc[(df_AB['country']==i) & (df_AB['group']=='GRP A'),'spent'].values,
                                  y=df_AB.loc[(df_AB['country']==i) & (df_AB['group']=='GRP B'),'spent'].values,
                                  equal_var = False)
        
        # Testar o valor de p para emitir o parecer
        print(f'Valor de p:{np.round(results.pvalue, 2)}')
        if results.pvalue < alpha:
            print('Existem diferenças entre os dois tratamentos\n')
            resultado = 'Significativo'
        
        else:
            print('NÃO Existem diferenças entre os dois tratamentos\n')
            resultado = 'Não significativo'
    
    
    # Preenchendo o df_results - > resultados dos testes estatísticos por países
    df_results.loc[df_results['País']==i, 'Teste aplicado'] = teste_escolhido
    
    df_results.loc[df_results['País']==i, 'Valor de p'] = np.round(results.pvalue, 2)
    
    df_results.loc[df_results['País']==i, 'Diferenças?'] = resultado

País avaliado:MEX
Teste estatístico selecionado baseado nos dados: Mann-Whitney
Valor de p:0.28
NÃO Existem diferenças entre os dois tratamentos

País avaliado:USA
Teste estatístico selecionado baseado nos dados: Mann-Whitney
Valor de p:0.35
NÃO Existem diferenças entre os dois tratamentos

País avaliado:BRA
Teste estatístico selecionado baseado nos dados: Mann-Whitney
Valor de p:0.53
NÃO Existem diferenças entre os dois tratamentos



In [71]:
df_results

Unnamed: 0,País,Teste aplicado,Valor de p,Diferenças?
0,MEX,Mann-Whitney,0.28,Não significativo
1,USA,Mann-Whitney,0.35,Não significativo
2,BRA,Mann-Whitney,0.53,Não significativo


## Rascunhos

In [53]:
# Spent médio é de 1880

df[['group','spent']].groupby('group').mean().iloc[0,:]

# Dessa forma o objetivo da página B é aumentar o spent médio para 2000

spent    1880.504281
Name: GRP A, dtype: float64

In [58]:
# Spent médio atual 1880, faturamento:

1880 * 45883

86260040

In [61]:
# Spent médio desejado 2000, faturamento:

2000 * 45883

91766000

In [62]:
# Lift absoluto

(2000 * 45883) - (1880 * 45883)

5505960

In [None]:
# https://www.statsmodels.org/dev/generated/statsmodels.stats.power.tt_ind_solve_power.html#statsmodels.stats.power.tt_ind_solve_power

# https://www.analyticsvidhya.com/blog/2021/05/how-to-transform-features-into-normal-gaussian-distribution/

# https://machinelearningmastery.com/a-gentle-introduction-to-normality-tests-in-python/

# https://www.statology.org/determine-equal-or-unequal-variance/