In [55]:
import pandas as pd
from scipy.stats import ttest_ind
from scipy.stats import fisher_exact

In [3]:
# downloaded Google Sheet provided by SC and saved locally
path = '../data/Thank You Call Data - Sheet1.tsv'
raw_data = pd.read_csv(path, sep = '\t')

In [154]:
# method chain and test this post EDA

filter_criteria = (raw_data.BOOKINGS < 12)& (raw_data.GMV < 8000) & (raw_data.BOOKINGS.notnull())

dataset = (raw_data[filter_criteria]
           .assign(MARGIN = lambda x: x.LTR / x.GMV)
           .assign(CUSTOMER_RETURNED = lambda x: x.BOOKINGS > 1)
           .assign(GMV_PER_BOOKING = lambda x: x.GMV / x.BOOKINGS)
          )

# GMV

Hypothesis is that the thank you call leads to higher lifetimes GMV

In [64]:
#split data
thank_you_call = dataset[dataset.THANK_YOU_STATUS == 'thank_you_call']
no_call = dataset[dataset.THANK_YOU_STATUS == 'no_call']

# test & report
GMV_test_results = ttest_ind(a=thank_you_call['GMV'],b=no_call['GMV'])
thank_you_GMV_avg = dataset[dataset.THANK_YOU_STATUS == 'thank_you_call']['GMV'].mean()
no_call_GMV_avg = dataset[dataset.THANK_YOU_STATUS == 'no_call']['GMV'].mean()
delta_GMV = round(thank_you_GMV_avg - no_call_GMV_avg,2)
lift = round(100 * delta_GMV / no_call_GMV_avg,2)

print(f'Campaign lift of ${delta_GMV} ({lift}%) vs ${no_call_GMV_avg} baseline with p {GMV_test_results[1]}')

Campaign lift of $56.92 (6.02%) vs $944.9203556818181 baseline with p 8.105493833861987e-07


# LTR

In [66]:
#split data
thank_you_call = dataset[dataset.THANK_YOU_STATUS == 'thank_you_call']
no_call = dataset[dataset.THANK_YOU_STATUS == 'no_call']

# test & report
LTR_test_results = ttest_ind(a=thank_you_call['GMV'],b=no_call['LTR'])
thank_you_LTR_avg = dataset[dataset.THANK_YOU_STATUS == 'thank_you_call']['LTR'].mean()
no_call_LTR_avg = dataset[dataset.THANK_YOU_STATUS == 'no_call']['LTR'].mean()
delta_LTR = round(thank_you_LTR_avg - no_call_LTR_avg,2)
lift = round(100 * delta_LTR / no_call_LTR_avg,2)

print(f'Campaign lift of ${delta_LTR} ({lift}%) vs ${no_call_LTR_avg} baseline with p {LTR_test_results[1]}')

Campaign lift of $1.3 (2.0%) vs $65.10017209338235 baseline with p 0.0


# BOOKINGS & GMV PER BOOKING

## Don't use p value results, not relevant for rate data, but leverage lift results for deck


In [156]:
#split data
thank_you_call = dataset[dataset.THANK_YOU_STATUS == 'thank_you_call']
no_call = dataset[dataset.THANK_YOU_STATUS == 'no_call']

# test & report
LTR_test_results = ttest_ind(a=thank_you_call['BOOKINGS'],b=no_call['BOOKINGS'])
thank_you_LTR_avg = dataset[dataset.THANK_YOU_STATUS == 'thank_you_call']['BOOKINGS'].mean()
no_call_LTR_avg = dataset[dataset.THANK_YOU_STATUS == 'no_call']['BOOKINGS'].mean()
delta_LTR = round(thank_you_LTR_avg - no_call_LTR_avg,2)
lift = round(100 * delta_LTR / no_call_LTR_avg,2)

print(f'Campaign lift of ${delta_LTR} ({lift}%) vs ${no_call_LTR_avg} baseline with p {LTR_test_results[1]}')

Campaign lift of $0.07 (4.56%) vs $1.5334224598930482 baseline with p 3.554089301881193e-05


In [157]:
#split data
thank_you_call = dataset[dataset.THANK_YOU_STATUS == 'thank_you_call']
no_call = dataset[dataset.THANK_YOU_STATUS == 'no_call']

# test & report
LTR_test_results = ttest_ind(a=thank_you_call['BOOKINGS'],b=no_call['GMV_PER_BOOKING'])
thank_you_LTR_avg = dataset[dataset.THANK_YOU_STATUS == 'thank_you_call']['GMV_PER_BOOKING'].mean()
no_call_LTR_avg = dataset[dataset.THANK_YOU_STATUS == 'no_call']['GMV_PER_BOOKING'].mean()
delta_LTR = round(thank_you_LTR_avg - no_call_LTR_avg,2)
lift = round(100 * delta_LTR / no_call_LTR_avg,2)

print(f'Campaign lift of ${delta_LTR} ({lift}%) vs ${no_call_LTR_avg} baseline with p {LTR_test_results[1]}')

Campaign lift of $8.9 (1.4%) vs $636.7880225521448 baseline with p 0.0


# Return Rate

In [148]:
return_rate_dataset = (dataset
.groupby(['THANK_YOU_STATUS','CUSTOMER_RETURNED'])
.USER_ID
.count()
.reset_index()
.pivot(index = 'THANK_YOU_STATUS',
 columns = 'CUSTOMER_RETURNED',
 values = 'USER_ID')
)

return_rate_dataset

CUSTOMER_RETURNED,False,True
THANK_YOU_STATUS,Unnamed: 1_level_1,Unnamed: 2_level_1
no_call,11077,3883
thank_you_call,5716,2178


In [149]:
return_rate_results = fisher_exact(return_rate_dataset)

summary_return_rate = (return_rate_dataset
                       .reset_index()
                       .assign(RETURN_RATE = lambda x: x[True] / (x[False] + x[True]))
                      )

control_group_rate = summary_return_rate[summary_return_rate.THANK_YOU_STATUS == 'no_call'][['RETURN_RATE']].sum()[0]
test_group_rate = summary_return_rate[summary_return_rate.THANK_YOU_STATUS == 'thank_you_call'][['RETURN_RATE']].sum()[0]
delta_return_rate = test_group_rate - control_group_rate

print(f'Test saw {100*delta_return_rate:.2}% change in customer return rate from baseline of {100*control_group_rate:.2f}% with p value {return_rate_results[1]}')

Test saw 1.6% change in customer return rate from baseline of 25.96% with p value 0.008115007865450358
