In [1]:
import noshmishmosh
import numpy as np
import pandas as pd
import seaborn as sns

Nosh Mish Mosh wants to run an experiment to see if we can convince more people to purchase meal plans if we use a more artisanal-looking vegetable selection. We’ve photographed these modern meals with blush tomatoes and graffiti eggplants, but aren’t sure if this strategy will sell enough units to benefit from establishing a business relationship with a new provider.

Before running this experiment, of course, we need to know the *sample size* that will be required to detect the difference we are hoping for. There are three things we need to know before we can determine that number.

* the Baseline Conversion Rate
* Minimum Detectable Effect (desired lift)
* and the Statistical Significance Threshold


Interesting to note that this is a python module. 

You can import the module.

Then use dir() to see what is inside the module. 

You have to pass specific data structures within the module to pandas in order to use it as a dataframe. 

In [7]:
noshmishmosh

# list what's inside python module
dir(noshmishmosh)

['__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 'customer_visits',
 'money_spent',
 'purchasing_customers']

In [15]:
# import py file as dataframe
all_visitors = pd.DataFrame(noshmishmosh.customer_visits)
paying_visitors = pd.DataFrame(noshmishmosh.purchasing_customers)

# total_visitor_count
total_visitor_count = len(all_visitors) 
paying_visitor_count = len(paying_visitors)

In [16]:
baseline_percent = 100 * paying_visitor_count / total_visitor_count
baseline_percent

18.6

In [13]:
all_visitors

Unnamed: 0,purchased,clickedthrough,id,moneyspent,name
0,False,True,83421,0.00,Michael Todd
1,False,True,46042,0.00,Brianna Harmon
2,False,False,23766,0.00,Mario Arnold
3,False,False,20859,0.00,Paul Quinn
4,False,True,57771,0.00,Jerome Moore
...,...,...,...,...,...
495,False,False,98100,0.00,Megan Pearson
496,False,False,16077,0.00,Bryan Wallace
497,True,True,29577,25.52,Alec Lewis
498,False,False,18685,0.00,John Smith


In [14]:
paying_visitors

Unnamed: 0,purchased,clickedthrough,id,moneyspent,name
0,True,True,15153,39.01,Jacob Harmon
1,True,True,74271,10.16,Wayne Potter
2,True,True,83489,36.88,Jimmy Carrillo
3,True,True,33246,23.41,Kara Davis
4,True,True,29847,33.49,Mr. Brett Roberts
...,...,...,...,...,...
88,True,True,26287,12.91,Brenda Fields
89,True,True,80204,30.73,Kara Dixon
90,True,True,89419,44.59,Adam Thompson
91,True,True,42822,38.53,Aaron Hamilton


In [17]:
payment_history = pd.DataFrame(noshmishmosh.money_spent)
payment_history

Unnamed: 0,0
0,39.01
1,10.16
2,36.88
3,23.41
4,33.49
...,...
88,12.91
89,30.73
90,44.59
91,38.53


In [18]:
# Use numpy to find the average payment
# Since this is an array, you can use numpy methods on it. 
# Using np.mean() will return the average of all the elements in the array.
average_payment = np.mean(payment_history)
average_payment

  return mean(axis=axis, dtype=dtype, out=out, **kwargs)


0    26.543656
dtype: float64

In [32]:
print(int(np.ceil(average_payment)))

'''So we get an average of 27 dollars for each paying customer.

So 27 * x = 1240

x = 1240/27
'''

print(np.ceil(1240/27))

# So we need 46 new paying customers to reach a profit of $1240

# Actually it's 47. Should have carried the precision until the end of the calculation.

new_customers_needed = np.ceil(1240/average_payment)
print(new_customers_needed)

new_customers_needed = int(new_customers_needed)
new_customers_needed

27
46.0
0    47.0
dtype: float64


47

In [34]:
'''
Now find the percent of weekly visitors who must make a purchase in order to make the change worthwhile.

Do this by dividing the number of customers by the total visitor count for a typical week (calculated earlier), and multiplying by 100. 

(number_customres)/(visitor_count) * 100
'''
percentage_point_increase = new_customers_needed / total_visitor_count * 100
percentage_point_increase

9.4

In [35]:
'''
In order to find our minimum detectable effect/desired lift, we need to express percentage_point_increase as a percent of baseline_percent. 
You can do this by dividing percentage_point_increase by baseline_percent and multiplying by 100.0.
'''

mde = percentage_point_increase / baseline_percent * 100
mde

50.53763440860215

In [None]:
# set statistical significance level

alpha = 0.1
# using an alpha of 10%