In [27]:
# The statistics and calculus with python workshop (p. 500)
import scipy.stats as st
import numpy as np

np.random.seed(16172)
before=np.random.normal(50,10,100)
after=np.random.normal(57,10,100)

print (f'Sample mean of before: {np.mean(before)}')
print (f'Sample mean of after: {np.mean(after)}\n')

two_tail_results = st.ttest_ind(before, after, equal_var=False)

# this just tells us that the means are significantly different than each other
print (f'Two tail results:  {(two_tail_results)}\n')

# Use one tailed test test is "before" is smaller than after
upper_tail = st.ttest_ind(before, after, equal_var=False)
print (f'Upper tail results:  {(upper_tail)}')
print (f'p-value must be divided by 2:  {(upper_tail.pvalue/2)}\n')

# Use one tailed test to see if after is larger than before
lower_tail = st.ttest_ind(after, before, equal_var=False)
print (f'Lower tail results:  {(lower_tail)}')
print (f'p-value must be divided by 2:  {(upper_tail.pvalue/2)}\n')


Sample mean of before: 50.54824784997514
Sample mean of after: 54.95949096047315

Two tail results:  TtestResult(statistic=-3.1382666896639466, pvalue=0.0019596346894880487, df=197.61159229013055)

Upper tail results:  TtestResult(statistic=-3.1382666896639466, pvalue=0.0019596346894880487, df=197.61159229013055)
p-value must be divided by 2:  0.0009798173447440244

Lower tail results:  TtestResult(statistic=3.1382666896639466, pvalue=0.0019596346894880487, df=197.61159229013055)
p-value must be divided by 2:  0.0009798173447440244



In [25]:
# Dive into datascience (p. 76)

import pandas as pd
import scipy.stats as st
import numpy as np

desktop = pd.read_csv('desktop.csv')
laptop = pd.read_csv('laptop.csv')

print(desktop.head())
print(f'\n')
print(laptop.head())
print(f'\n')

print(st.ttest_ind(desktop['spending'], laptop['spending']))
print(st.ttest_ind(desktop['age'], laptop['age']))
print(st.ttest_ind(desktop['visits'], laptop['visits']))



   userid  spending  age  visits
0       1      1250   31     126
1       2       900   27       5
2       3         0   30     459
3       4      2890   22      18
4       5      1460   38      20


   userid  spending  age  visits
0      31      1499   32      12
1      32       799   23      40
2      33      1200   45      22
3      34         0   59     126
4      35      1350   17      85


TtestResult(statistic=-2.109853741030508, pvalue=0.03919630411621095, df=58.0)
TtestResult(statistic=-0.7101437106800108, pvalue=0.4804606394128761, df=58.0)
TtestResult(statistic=0.20626752311535543, pvalue=0.8373043059847984, df=58.0)


In [33]:
print(np.mean(desktop['spending']) )
print(np.mean(laptop['spending']))

# print(np.mean(desktop['visits']) )
# print(np.mean(laptop['visits']))

1028.6666666666667
1624.5666666666666


In [37]:
print(np.std(desktop['spending']) )
print(np.std(laptop['spending']))

892.1593405266174
1231.8258449237953


In [41]:
# How to calculate the statistical power of a test
from statsmodels.stats.power import TTestIndPower

# define parameters
alpha = 0.05  # define our choosen statistical significance threshold
nobs = 50 # number of observations.  i.e. 45 in before and 45 in after
effectsize = 0.5 # Cohens d: when 0.2=low, 0.5=medium, 0.8=high

analysis = TTestIndPower()
power = analysis.solve_power(effect_size=effectsize, nobs1=nobs, alpha=alpha)

# Rule of thumb, only authorize A/B tests that will have at least 80% power
print(power)

0.69689340057952


In [47]:
# How to calculate sample size needed to run A/B test (very similar to above)
from statsmodels.stats.power import TTestIndPower

# Define our requirments for the test
alpha = 0.005  # define our choosen statistical significance threshold
effect = 0.5 # Cohens d: when 0.2=low, 0.5=medium, 0.8=high
power = 0.9 # Rule of thumb, only authorize A/B tests that will have at least 80% power - here we can decide

analysis = TTestIndPower()
observations = analysis.solve_power(effect_size=effect, power=power, alpha=alpha)

print (f'Number of observations needed to achive test requirements:  {(observations)}')


Number of observations needed to achive test requirements:  135.71324592029018
