In [None]:
!pip install chart-studio
!pip install google-cloud-bigquery-connection

In [None]:
import pandas as pd
import numpy as np
import scipy.optimize

# Import and setup for plotly in Colab
import chart_studio
import chart_studio.plotly as py
import plotly.graph_objects as go
import plotly.io as pio
import plotly.express as px

# Enable displaying pandas data frames as interactive tables by default
from google.colab import data_table
data_table.enable_dataframe_formatter()

In [None]:
PROJECT_ID = 'change me'
REGION = "US"

In [None]:
from google.colab import auth
auth.authenticate_user()

In [None]:
!gcloud config set project {PROJECT_ID}
!gcloud config get-value project

In [None]:
%%bigquery --project {PROJECT_ID}
SELECT
  o.order_id,
  o.user_id order_user_id,
  o.status order_status,
  o.created_at order_created_at,
  o.returned_at order_returned_at,
  o.shipped_at order_shipped_at,
  o.delivered_at order_delivered_at,
  o.num_of_item order_number_of_items,
  i.id AS order_items_id,
  i.product_id AS order_items_product_id,
  i.status order_items_status,
  i.sale_price order_items_sale_price,
  p.id AS product_id,
  p.cost product_cost,
  p.category product_category,
  p.name product_name,
  p.brand product_brand,
  p.retail_price product_retail_price,
  p.department product_department,
  p.sku product_sku,
  p.distribution_center_id,
  d.name AS dist_center_name,
  d.latitude dist_center_lat,
  d.longitude dist_center_long,
  u.id AS user_id,
  u.first_name user_first_name,
  u.last_name user_last_name,
  u.age user_age,
  u.gender user_gender,
  u.state user_state,
  u.postal_code user_postal_code,
  u.city user_city,
  u.country user_country,
  u.latitude user_lat,
  u.longitude user_long,
  u.traffic_source user_traffic_source
FROM
  gcp_lakehouse_ds.gcp_tbl_orders o
INNER JOIN
  gcp_lakehouse_ds.gcp_tbl_order_items i
ON
  o.order_id = i.order_id
INNER JOIN
  gcp_lakehouse_ds.gcp_tbl_products p
ON
  i.product_id = p.id
INNER JOIN
  gcp_lakehouse_ds.gcp_tbl_distribution_centers d
ON
  p.distribution_center_id = d.id
INNER JOIN
  gcp_lakehouse_ds.gcp_tbl_users u
ON
  o.user_id = u.id
limit 100

In [None]:
%%bigquery --project {PROJECT_ID}

SELECT
sum(order_id) as count,
  date(o.created_at) date
FROM
  gcp_lakehouse_ds.gcp_tbl_orders o
  group by o.created_at
  order by date(o.created_at)
  limit 500

In [None]:
%%bigquery data --project {PROJECT_ID}

SELECT
sum(order_id) as count,
  date(o.created_at) date
FROM
  gcp_lakehouse_ds.gcp_tbl_orders o
  group by o.created_at
  order by date(o.created_at)
  limit 500

In [None]:
data['date'] = pd.to_datetime(data['date'])
data['date'] = data['date'].astype(np.int64) // 10**9
data.head()

In [None]:
from datetime import datetime

fig, ax = plt.subplots(figsize=(20,12))
data.plot(x='date', y='count', kind='scatter', ax=ax)
ax.set_xticklabels([datetime.fromtimestamp(date).strftime('%Y/%m/%d') for date in ax.get_xticks()])