In [1]:
# Import libraries
import pandas as pd
import numpy as np
from scipy.stats import chi2_contingency
from scipy.stats import binomtest

# Read in the `clicks.csv` file as `abdata`
abdata = pd.read_csv('clicks.csv')

# Inspect the data
display(abdata.head())
display(abdata.info())
display(abdata.describe(include='all'))

Unnamed: 0,user_id,group,is_purchase
0,8e27bf9a,A,No
1,eb89e6f0,A,No
2,7119106a,A,No
3,e53781ff,A,No
4,02d48cf1,A,Yes


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4998 entries, 0 to 4997
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   user_id      4998 non-null   object
 1   group        4998 non-null   object
 2   is_purchase  4998 non-null   object
dtypes: object(3)
memory usage: 117.3+ KB


None

Unnamed: 0,user_id,group,is_purchase
count,4998,4998,4998
unique,4998,3,2
top,8e27bf9a,A,No
freq,1,1666,4416


In [2]:
# Analyze group  and is_purchase
Xtab = pd.crosstab(abdata['group'], abdata['is_purchase'])
display(Xtab)

chi2, pval, dof, expected = chi2_contingency(Xtab)
print(pval)
print(f'The p-value with the Chi-Square Test is: {pval}, meaning that there is an association between the variables.')

# Calculate number of visitors
num_visitors = len(abdata)
print(f'The number of visitors in one week is {num_visitors}')

# Calculate minumun number of visitors to make profit with 0.99
num_sales_needed_099 = 1000 / 0.99
print(f'The number of visitors needed to make profit with is: {num_sales_needed_099}')
# Calculate the proportion of visitors who would need to purchase the upgrade package at $0.99 in order to generate $1000
p_sales_needed_099 = num_sales_needed_099 / num_visitors
print(f'The proportion of visitors who would need to purchase the upgrade package at $0.99 in order to generate $1000 is: {p_sales_needed_099}')

# Calculate the number of visitors to make profit with 1.99
num_sales_needed_199 = 1000 / 1.99
print(f'The number of visitors needed to make profit with is: {num_sales_needed_199}')
# Calculate the proportion of visitors who would need to purchase the upgrade package at $1.99 in order to generate $1000
p_sales_needed_199 = num_sales_needed_199 / num_visitors
print(f'The proportion of visitors who would need to purchase the upgrade package at $1.99 in order to generate $1000 is: {p_sales_needed_199}') 

# Calculate the number of visitors to make profit with 4.99
num_sales_needed_499 = 1000 / 4.99
print(f'The number of visitors needed to make profit with is: {num_sales_needed_499}')
# Calculate the proportion of visitors who would need to purchase the upgrade package at $4.99 in order to generate $1000
p_sales_needed_499 = num_sales_needed_499 / num_visitors
print(f'The proportion of visitors who would need to purchase the upgrade package at $4.99 in order to generate $1000 is: {p_sales_needed_499}')

is_purchase,No,Yes
group,Unnamed: 1_level_1,Unnamed: 2_level_1
A,1350,316
B,1483,183
C,1583,83


2.412621354668426e-35
The p-value with the Chi-Square Test is: 2.412621354668426e-35, meaning that there is an association between the variables.
The number of visitors in one week is 4998
The number of visitors needed to make profit with is: 1010.1010101010102
The proportion of visitors who would need to purchase the upgrade package at $0.99 in order to generate $1000 is: 0.20210104243717691
The number of visitors needed to make profit with is: 502.51256281407035
The proportion of visitors who would need to purchase the upgrade package at $1.99 in order to generate $1000 is: 0.10054272965467594
The number of visitors needed to make profit with is: 200.40080160320642
The proportion of visitors who would need to purchase the upgrade package at $4.99 in order to generate $1000 is: 0.040096198800161346


In [3]:
# Number of visitors in group A who were offered the $0.99 price point
# shape[0] gives the number of rows
# shape [1] gives the number of columns

# Number of visitors in group A who were offered the $0.99 price point
samp_size_099 = len(abdata[abdata['group'] == 'A'])
print(f'The number of visitors in group A who were offered the $0.99 price point is: {samp_size_099}')
# Number of visitors in group A who made a purchase
sales_099 = abdata[(abdata['group'] == 'A') & (abdata['is_purchase'] == 'Yes')].shape[0]
print(f'The number of visitors in group A who made a purchase is: {sales_099}')

# Number of visitors in group A who were offered the $1.99 price point
samp_size_199 = len(abdata[abdata['group'] == 'B'])
print(f'The number of visitors in group A who were offered the $1.99 price point is: {samp_size_199}')
# Number of visitors in group B who made a purchase
sales_199 = abdata[(abdata['group'] == 'B') & (abdata['is_purchase'] == 'Yes')].shape[0]
print(f'The number of visitors in group B who made a purchase is: {sales_199}')

# Number of visitors in group A who were offered the $4.99 price point
samp_size_499 = len(abdata[abdata['group'] == 'C'])
print(f'The number of visitors in group A who were offered the $4.99 price point is: {samp_size_499}')
# Number of visitors in group C who made a purchase
sales_499 = abdata[(abdata['group'] == 'C') & (abdata['is_purchase'] == 'Yes')].shape[0]
print(f'The number of visitors in group C who made a purchase is: {sales_499}')

The number of visitors in group A who were offered the $0.99 price point is: 1666
The number of visitors in group A who made a purchase is: 316
The number of visitors in group A who were offered the $1.99 price point is: 1666
The number of visitors in group B who made a purchase is: 183
The number of visitors in group A who were offered the $4.99 price point is: 1666
The number of visitors in group C who made a purchase is: 83


In [4]:
# Calculate binoomial test for group A
# sales_099 = number of purchases in group A
# samp_size_099 = number of visitors in group A
# p_sales_needed_099 = proportion of visitors who would need to purchase the upgrade package at $0.99 in order to generate $1000
result = binomtest(sales_099, samp_size_099, p_sales_needed_099, alternative='greater')
result_test = result.pvalue
print(result_test)
# Print the results
print(f'Group A: {result_test} is statistically significant') if result_test < 0.05 else print(f'Group A: {result_test} is not statistically significant')

# Calculate binoomial test for group B
# sales_199 = number of purchases in group B
# samp_size_199 = number of visitors in group B
# p_sales_needed_199 = proportion of visitors who would need to purchase the upgrade package at $1.99 in order to generate $1000
result = binomtest(sales_199, samp_size_199, p_sales_needed_199, alternative='greater')
result_test = result.pvalue
print(result_test)
# Print the results
print(f'Group B: {result_test} is statistically significant') if result_test < 0.05 else print(f'Group B: {result_test} is not statistically significant')

# Calculate binoomial test for group C
# sales_499 = number of purchases in group C
# samp_size_499 = number of visitors in group C
# p_sales_needed_499 = proportion of visitors who would need to purchase the upgrade package at $4.99 in order to generate $1000
result = binomtest(sales_499, samp_size_499, p_sales_needed_499, alternative='greater')
result_test = result.pvalue
print(result_test)
# Print the results
print(f'Group C: {result_test} is statistically significant') if result_test < 0.05 else print(f'Group C: {result_test} is not statistically significant')

0.9028081076188554
Group A: 0.9028081076188554 is not statistically significant
0.11184562623740596
Group B: 0.11184562623740596 is not statistically significant
0.027944826659830616
Group C: 0.027944826659830616 is statistically significant
