In [3]:
from google.cloud import bigquery
import pandas as pd

#### Setting up the connection

In [4]:
client = bigquery.Client(project='product-analytics-portfolio')

#### Exploratory Queries

In [None]:
# Looking at a user journey for a single user
example_user_journey_sql = """
SELECT user_pseudo_id,
       event_timestamp,
       event_name
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_20210131`
WHERE user_pseudo_id = "1026454.4271112504"
ORDER BY event_timestamp
"""

example_user_journey_query = client.query(example_user_journey_sql).to_dataframe()

example_user_journey_query



Unnamed: 0,user_pseudo_id,event_timestamp,event_name
0,1026454.4271112503,1612069510766593,page_view
1,1026454.4271112503,1612069510766593,session_start
2,1026454.4271112503,1612069510766593,first_visit
3,1026454.4271112503,1612069515781635,page_view
4,1026454.4271112503,1612069515781635,view_promotion
5,1026454.4271112503,1612069529243877,scroll
6,1026454.4271112503,1612069530073506,user_engagement


Observations:
- The first three events (page_view, session_start and first_visit) all have the same timestamp
    - This should be tested with some exploratory queries to know for sure
- Same finding for page_view and view_promotion
- This user did not proceed to adding items to cart or checking out

In [7]:
# Looking at the unique event names
event_names_sql = """
SELECT DISTINCT event_name
FROM `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_20210131`
"""

event_names_query = client.query(event_names_sql).to_dataframe()
event_names_query



Unnamed: 0,event_name
0,page_view
1,scroll
2,user_engagement
3,session_start
4,first_visit
5,view_promotion
6,view_item
7,view_search_results
8,add_payment_info
9,add_shipping_info


Observations:
- There are many steps that are likely irrelevant: scroll, click, user_engagement, etc.
- Major steps for the funnel analysis are: page_view, add_to_cart, begin_checkout and purchase