## Page Visits Funnel

### Inspect the DataFrames

In [1]:
import pandas as pd

visits = pd.read_csv('visits.csv', parse_dates=[1])
cart = pd.read_csv('cart.csv', parse_dates=[1])
checkout = pd.read_csv('checkout.csv', parse_dates=[1])
purchase = pd.read_csv('purchase.csv', parse_dates=[1])

print(visits.info())
print(cart.info())
print(checkout.info())
print(purchase.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   user_id     2000 non-null   object        
 1   visit_time  2000 non-null   datetime64[ns]
dtypes: datetime64[ns](1), object(1)
memory usage: 31.4+ KB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 348 entries, 0 to 347
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   user_id    348 non-null    object        
 1   cart_time  348 non-null    datetime64[ns]
dtypes: datetime64[ns](1), object(1)
memory usage: 5.6+ KB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 360 entries, 0 to 359
Data columns (total 2 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   user_id        360 non-null    object        
 1   checkout_time  360 

### Combining Tables

Visits & Carts

In [2]:
visits_cart = pd.merge(visits, cart, how='left')
visits_without_cart = visits_cart[visits_cart.cart_time.isnull()]
n_visits = len(visits_cart)
n_visits_without_cart = len(visits_without_cart)
p_visits_without_cart = float(n_visits_without_cart)/n_visits
print(n_visits)
print(n_visits_without_cart)
print(p_visits_without_cart)

2000
1652
0.826


Carts & Checkouts

In [3]:
carts_checkout = pd.merge(cart, checkout, how='left')
carts_without_checkout = carts_checkout[carts_checkout.checkout_time.isnull()]
n_carts_checkout = len(carts_checkout)
n_carts_without_checkout = len(carts_without_checkout)
p_carts_without_checkout = float(n_carts_without_checkout)/n_carts_checkout
print(n_carts_checkout)
print(n_carts_without_checkout)
print(p_carts_without_checkout)

482
122
0.25311203319502074


Checkouts & Purchases

In [4]:
checkouts_purchase = pd.merge(checkout, purchase, how='left')
checkouts_withuot_purchase = checkouts_purchase[checkouts_purchase.purchase_time.isnull()]
n_checkouts_purchase = len(checkouts_purchase)
n_checkouts_withuot_purchase = len(checkouts_withuot_purchase)
p_checkouts_withuot_purchase = float(n_checkouts_withuot_purchase)/n_checkouts_purchase
print(n_checkouts_purchase)
print(n_checkouts_withuot_purchase)
print(p_checkouts_withuot_purchase)

598
101
0.1688963210702341


In [5]:
print('Percent of users who visited Cool T-Shirts Inc but didn\'t place a t-shirt in their cart: ' \
      + str(format(100*p_visits_without_cart, '.2f')) + '%')
print('Percent of users who put items in their cart but didn\'t proceed to checkout: ' \
      + str(format(100*p_carts_without_checkout, '.2f')) + '%')
print('Percent of users who proceeded to checkout but didn\'t purchase a t-shirt: ' \
      + str(format(100*p_checkouts_withuot_purchase, '.2f')) + '%')

Percent of users who visited Cool T-Shirts Inc but didn't place a t-shirt in their cart: 82.60%
Percent of users who put items in their cart but didn't proceed to checkout: 25.31%
Percent of users who proceeded to checkout but didn't purchase a t-shirt: 16.89%


All Data

In [6]:
all_data = visits.merge(cart, how='left').merge(checkout, how='left').merge(purchase, how='left')
print(all_data.info())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2372 entries, 0 to 2371
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   user_id        2372 non-null   object        
 1   visit_time     2372 non-null   datetime64[ns]
 2   cart_time      720 non-null    datetime64[ns]
 3   checkout_time  598 non-null    datetime64[ns]
 4   purchase_time  497 non-null    datetime64[ns]
dtypes: datetime64[ns](4), object(1)
memory usage: 111.2+ KB
None


In [7]:
n_visits = len(all_data[~all_data.visit_time.isnull()])
n_cart = len(all_data[~all_data.cart_time.isnull()])
n_checkout = len(all_data[~all_data.checkout_time.isnull()])
n_purchase = len(all_data[~all_data.purchase_time.isnull()])

print('# of Visits: ' + str(n_visits))
print('# of Carts: ' + str(n_cart))
print('# of Checkouts: ' + str(n_checkout))
print('# of Purchases: ' + str(n_purchase))

p_visits_without_cart_v2 = float(n_visits - n_cart)/n_visits
p_carts_without_checkout_v2 = float(n_cart - n_checkout)/n_cart
p_checkouts_withuot_purchase_v2 = float(n_checkout - n_purchase)/n_checkout

print('Percent of users who visited Cool T-Shirts Inc but didn\'t place a t-shirt in their cart: ' \
      + str(format(100*p_visits_without_cart_v2, '.2f')) + '%')
print('Percent of users who put items in their cart but didn\'t proceed to checkout: ' \
      + str(format(100*p_carts_without_checkout_v2, '.2f')) + '%')
print('Percent of users who proceeded to checkout but didn\'t purchase a t-shirt: ' \
      + str(format(100*p_checkouts_withuot_purchase_v2, '.2f')) + '%')

# of Visits: 2372
# of Carts: 720
# of Checkouts: 598
# of Purchases: 497
Percent of users who visited Cool T-Shirts Inc but didn't place a t-shirt in their cart: 69.65%
Percent of users who put items in their cart but didn't proceed to checkout: 16.94%
Percent of users who proceeded to checkout but didn't purchase a t-shirt: 16.89%


### Tiempo promedio de compra

In [8]:
all_data['time_to_purchase'] = \
    all_data.purchase_time - \
    all_data.visit_time

print(all_data.time_to_purchase[~all_data.time_to_purchase.isnull()])
print(all_data.time_to_purchase.mean())

1      0 days 00:44:00
14     0 days 00:38:00
49     0 days 00:41:00
50     0 days 01:00:00
51     0 days 00:41:00
             ...      
2359   0 days 00:28:00
2360   0 days 00:11:00
2361   0 days 00:32:00
2362   0 days 00:28:00
2363   0 days 00:11:00
Name: time_to_purchase, Length: 497, dtype: timedelta64[ns]
0 days 00:43:53.360160965
