# Ecommerce Data Project
Based on https://github.com/tinybirdco/ecommerce_data_project:



If you have opened the notebook in Google Colab then `Copy to Drive` (see above).

In [1]:
#@title Mount your Google Drive to save and use local files
from google.colab import drive
drive.mount('/content/gdrive', force_remount=False)

% cd "/content/gdrive/My Drive/Colab Notebooks/Tinybird/tb_examples"

Mounted at /content/gdrive
/content/gdrive/My Drive/Colab Notebooks/Tinybird/tb_examples


In [2]:
#@title Install Tinybird CLI, libraries and your token
!pip install tinybird-cli -q
!sudo apt-get install jq

import os
import re

if not os.path.isfile('.tinyb'):
  !tb auth

if not os.path.isdir('./datasources'):
  !tb init

[?25l[K     |████▌                           | 10 kB 21.7 MB/s eta 0:00:01[K     |█████████                       | 20 kB 29.0 MB/s eta 0:00:01[K     |█████████████▌                  | 30 kB 35.5 MB/s eta 0:00:01[K     |██████████████████              | 40 kB 38.0 MB/s eta 0:00:01[K     |██████████████████████▋         | 51 kB 39.0 MB/s eta 0:00:01[K     |███████████████████████████     | 61 kB 13.5 MB/s eta 0:00:01[K     |███████████████████████████████▋| 71 kB 14.3 MB/s eta 0:00:01[K     |████████████████████████████████| 72 kB 963 kB/s 
[K     |████████████████████████████████| 54 kB 2.4 MB/s 
[K     |████████████████████████████████| 81 kB 8.5 MB/s 
[K     |████████████████████████████████| 61 kB 7.0 MB/s 
[K     |████████████████████████████████| 86 kB 5.1 MB/s 
[K     |████████████████████████████████| 46 kB 3.5 MB/s 
[?25h  Building wheel for tabulate (setup.py) ... [?25l[?25hdone
[31mERROR: pip's dependency resolver does not currently take into account

In [3]:
#@title Helper function to write to files
def write_text_to_file(filename, text):
  with open(filename, 'w') as f: f.write(text)

# Create Data Sources


## 1. Events Data Source

In [4]:
filename="./datasources/events.datasource"
text='''
DESCRIPTION > # Events from users
    this contains all the events produced by kafka, there are 4 fixed columns 
    plus a `json` column which contains the rest of the data for that event

SCHEMA >
    date DateTime,
    product_id String,
    user_id String,
    event String,
    extra_data String

ENGINE MergeTree
ENGINE_SORTING_KEY timestamp
'''

write_text_to_file(filename, text)

In [53]:
!tb datasource generate datasources/events.datasource --force

[92m** Generated datasources/events.datasource
** => Create it on the server running: $ tb push datasources/events.datasource
** => Append data using: $ tb datasource append events datasources/events.datasource`
[0m
[92m** => Generated fixture datasources/fixtures/events.csv[0m


In [54]:
!tb datasource append events https://storage.googleapis.com/tinybird-assets/datasets/guides/events_50M_1.csv

[0m** 🥚 starting import process[0m
[92m** 🐥 done[0m
[92m** Total rows in events: 50000000[0m
[92m** Data appended to Data Source 'events' successfully![0m
[0m** Data pushed to events[0m


In [55]:
!tb datasource append events https://storage.googleapis.com/tinybird-assets/datasets/guides/events_50M_2.csv

[0m** 🥚 starting import process[0m
[92m** 🐥 done[0m
[92m** Total rows in events: 100000000[0m
[92m** Data appended to Data Source 'events' successfully![0m
[0m** Data pushed to events[0m


In [56]:
!tb sql "SELECT count() FROM events"

-------------
|   [1;32mcount()[0m |
-------------
| 100000000 |
-------------


In [8]:
!tb sql "SELECT * FROM events LIMIT 1"

------------------------------------------------
[1;32mdate:[0m 2015-11-22 00:00:00
[1;32mproduct_id:[0m 6acf8c3a-1aaa-11eb-ab5e-acde48001122
[1;32muser_id:[0m 396547
[1;32mevent:[0m remove_item_from_cart
[1;32mextra_data:[0m {"city": "Jackson"}
------------------------------------------------


## 2. Products Data Source

In [37]:
filename="datasources/products_join_sku.datasource"
text='''
SCHEMA >
    sku String,
    color String,
    section_id String,
    title String

# this creates a join table ready to access by sku
# using joinGet('products_join_by_id', 'color', sku)

ENGINE Join
ENGINE_JOIN_STRICTNESS ANY
ENGINE_JOIN_TYPE LEFT
ENGINE_KEY_COLUMNS sku
'''

write_text_to_file(filename, text)

In [38]:
!tb push datasources/products_join_sku.datasource

[0m** Processing datasources/products_join_sku.datasource[0m
[0m** Building dependencies[0m
[0m** Running products_join_sku [0m
[92m** 'products_join_sku' created[0m
[0m** Not pushing fixtures[0m


In [39]:
!tb datasource append products_join_sku https://storage.googleapis.com/tinybird-assets/datasets/guides/products_1.csv

[0m** 🥚 starting import process[0m
[92m** 🐥 done[0m
[92m** Appended 1200000 new rows[0m
[92m** Total rows in products_join_sku: None[0m
[92m** Data appended to Data Source 'products_join_sku' successfully![0m
[0m** Data pushed to products_join_sku[0m


In [40]:
!tb datasource append products_join_sku https://storage.googleapis.com/tinybird-assets/datasets/guides/products_2.csv

[0m** 🥚 starting import process[0m
[92m** 🐥 done[0m
[92m** Appended 1241156 new rows[0m
[92m** Total rows in products_join_sku: None[0m
[92m** Data appended to Data Source 'products_join_sku' successfully![0m
[0m** Data pushed to products_join_sku[0m


In [41]:
!tb sql "SELECT count() FROM products_join_sku"

-----------
| [1;32mcount()[0m |
-----------
| 2441156 |
-----------


In [42]:
!tb sql "SELECT * FROM products_join_sku LIMIT 1"

-----------------------------------------
[1;32msku:[0m 5c9ed212-1aaa-11eb-b04a-acde48001122
[1;32mcolor:[0m burlywood4
[1;32msection_id:[0m 8
[1;32mtitle:[0m Famille Nombreuse/Mlah
-----------------------------------------


## 3. Top Products View Data Source

In [14]:
filename="datasources/top_products_view.datasource"
text='''
SCHEMA >
    date Date,
    top_10 AggregateFunction(topK(10), String),
    total_sales AggregateFunction(sum, Float64)

ENGINE AggregatingMergeTree
ENGINE_SORTING_KEY date
'''

write_text_to_file(filename, text)

In [15]:
!tb push datasources/top_products_view.datasource

[0m** Processing datasources/top_products_view.datasource[0m
[0m** Building dependencies[0m
[0m** Running top_products_view [0m
[92m** 'top_products_view' created[0m
[0m** Not pushing fixtures[0m


# Create Pipes

## Top Product Per Day Pipe

In [16]:
filename="pipes/top_product_per_day.pipe"
text='''
NODE only_buy_events
DESCRIPTION >
    filters all the buy events

SQL >
    SELECT
        toDate(date) date,
        product_id,
        JSONExtractFloat(extra_data, 'price') as price
    FROM events
    where event = 'buy'


NODE top_per_day
SQL >
   SELECT date,
          topKState(10)(product_id) top_10,
          sumState(price) total_sales
   from only_buy_events
   group by date

TYPE materialized
DATASOURCE top_products_view
'''

write_text_to_file(filename, text)

In [57]:
!tb push 'pipes/top_product_per_day.pipe' --force --populate

[0m** Processing pipes/top_product_per_day.pipe[0m
[0m** Building dependencies[0m
[0m** Running top_product_per_day [0m
[0m** Materialized node 'top_per_day' using the Data Source 'top_products_view'[0m
[0m** Populating job url https://api.tinybird.co/v0/jobs/b3b6a04d-f15d-4242-adee-d16b3bb7101e[0m
[92m** 'top_product_per_day' created[0m
[0m** Not pushing fixtures[0m


In [58]:
!tb sql "SELECT date, topKMerge(top_10), sumMerge(total_sales) \
FROM top_products_view \
GROUP BY date LIMIT 3"

--------------------------------------------------------------------------------
[1;32mdate:[0m 2015-11-22
[1;32mtopKMerge(top_10):[0m ['61a92be8-1aaa-11eb-8f2a-acde48001122', '5f5a93c6-1aaa-11eb-8a25-acde48001122', '679c30cc-1aaa-11eb-861c-acde48001122', '6938df86-1aaa-11eb-9aea-acde48001122', '6832eab4-1aaa-11eb-a35f-acde48001122', '689c6714-1aaa-11eb-9a32-acde48001122', '5f1a4314-1aaa-11eb-a25a-acde48001122', '6848a368-1aaa-11eb-9fed-acde48001122', '696457ba-1aaa-11eb-abb5-acde48001122', '6bfae926-1aaa-11eb-8a20-acde48001122']
[1;32msumMerge(total_sales):[0m 1346591.340000051
--------------------------------------------------------------------------------
[1;32mdate:[0m 2015-11-23
[1;32mtopKMerge(top_10):[0m ['66358a50-1aaa-11eb-b8c3-acde48001122', '695582ee-1aaa-11eb-8f60-acde48001122', '6bcb18a4-1aaa-11eb-861f-acde48001122', '69067abe-1aaa-11eb-8cee-acde48001122', '6bf169d2-1aaa-11eb-9ca0-acde48001122', '68650c1a-1aaa-11eb-872a-acde48001122', '69cb8b1a-1aaa-11eb-9be9-acd

# Create Endpoints

In [19]:
filename="endpoints/sales.pipe"
text='''
DESCRIPTION >
    return sales for a product with color filter
    
NODE only_buy_events
SQL >
    SELECT
        toDate(date) date,
        product_id,
        joinGet('products_join_sku', 'color', product_id) as color,
        JSONExtractFloat(extra_data, 'price') as price
    FROM events
    WHERE event = 'buy'

NODE endpoint
DESCRIPTION >
    return sales for a product with color filter
SQL >
    %
    select date, sum(price) total_sales
    from only_buy_events
    where color = 'dark green'
    group by date
'''

write_text_to_file(filename, text)

In [49]:
!tb push 'endpoints/sales.pipe' --force --populate

[0m** Processing endpoints/sales.pipe[0m
[0m** Building dependencies[0m
[0m** Running sales [0m
[92m** => Test endpoint at https://api.tinybird.co/v0/pipes/sales.json[0m
[92m** 'sales' created[0m
[0m** Not pushing fixtures[0m


In [59]:
!tb sql "SELECT * FROM sales LIMIT 10"

-----------------------------------
| [1;32mdate[0m       |        [1;32mtotal_sales[0m |
-----------------------------------
| 2015-11-22 |             719.14 |
| 2015-11-23 |  953.2400000000001 |
| 2015-11-24 | 1320.8400000000001 |
| 2015-11-25 |             800.41 |
| 2015-11-26 |            1064.47 |
| 2015-11-27 |             720.26 |
| 2015-11-28 |            1104.78 |
| 2015-11-29 |            1106.12 |
| 2015-11-30 |             904.84 |
| 2015-12-01 | 1052.1299999999999 |
-----------------------------------


In [51]:
filename="endpoints/top_products_params.pipe"
text='''
NODE endpoint
DESCRIPTION >
    returns top 10 products given start and end dates
SQL >
    %
    select
        date,
        topKMerge(10)(top_10) as top_10
    from top_product_per_day
    where date between {{Date(start)}} and {{Date(end)}}
    group by date
'''

write_text_to_file(filename, text)

In [52]:
!tb push 'endpoints/top_products_params.pipe' --force --populate

[0m** Processing endpoints/top_products_params.pipe[0m
[0m** Building dependencies[0m
[0m** Running top_products_params [0m
[92m** => Test endpoint at https://api.tinybird.co/v0/pipes/top_products_params.json[0m
[92m** 'top_products_params' created[0m
[0m** Not pushing fixtures[0m


https://api.tinybird.co/v0/pipes/top_products_params.json?start=2019-01-01&end=2019-01-05
