In [1]:
import warnings
warnings.filterwarnings("ignore")

# Packages

In [2]:
import pandas as pd

# Example 2: Query a table with BigQuery DataFrames

In [3]:
# With BigQuery DataFrames, you can use many familiar Pandas methods, but the
# processing happens BigQuery rather than the runtime, allowing you to work with larger
# DataFrames that would otherwise not fit in the runtime memory.
# Learn more here: https://cloud.google.com/python/docs/reference/bigframes/latest

import bigframes.pandas as bf

bf.options.bigquery.location = "US" #this variable is set based on the dataset you chose to query
bf.options.bigquery.project = "inernship-1206" #this variable is set based on the dataset you chose to query

In [5]:
query_users = """
SELECT id, first_name, last_name, email, created_at
FROM `bigquery-public-data.thelook_ecommerce.users`
"""

query_products = """
SELECT id, name, category, retail_price
FROM `bigquery-public-data.thelook_ecommerce.products`
"""

df_users = pd.DataFrame(bf.read_gbq(query_users))
df_products = pd.DataFrame(bf.read_gbq(query_products))

df_products.columns = ["id", "name", "category", "retail_price"]
df_users.columns = ["id", "first_name", "last_name", "email", "created_at"]

In [6]:
query_orders = """
SELECT order_id, user_id, created_at, num_of_item
FROM `bigquery-public-data.thelook_ecommerce.orders`
WHERE status='Complete'
"""

query_orderDetails = """
SELECT id, order_id, product_id
FROM `bigquery-public-data.thelook_ecommerce.order_items`
WHERE status='Complete'
"""

df_orders = pd.DataFrame(bf.read_gbq(query_orders))
df_orderDetails = pd.DataFrame(bf.read_gbq(query_orderDetails))

df_orders.columns = ["order_id", "user_id", "created_at", "num_of_item"]
df_orderDetails.columns = ["id", "order_id", "product_id"]

In [7]:
df_products_10 = df_products.sample(10)

In [18]:
df_users.shape

(100000, 5)

In [9]:
df_products_10.set_index("id", inplace=True)

In [10]:
df_products_10

Unnamed: 0_level_0,name,category,retail_price
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
23566,Menâ€™s Denim Work Short,Shorts,36.0
22780,Carhartt Men's Denim Work Short,Shorts,47.0
12691,Fashion Essentials Bra Back Converter,Intimates,6.9
7826,Calvin Klein Jeans Women's Cinch Back Jacket,Blazers & Jackets,89.5
8254,Anne Klein Casa Blanca Skirt Suit,Suits,186.369995
15707,Pure Cashmere Men and Women Solid Scarf,Plus,47.990002
2175,Life is good. Womens Softwash Hoodie - LIG - S...,Fashion Hoodies & Sweatshirts,63.0
13094,Speedo Women's Speedo Women'S Race Endurance+ ...,Swim,70.160004
9989,Hue Sleepwear Women's Misty Leopard Pant,Sleep & Lounge,25.0
15428,Glamorise Women's MagicLift Full-Figure Suppor...,Plus,40.0


In [20]:
print(df_products_10.loc[15428, "name"])

Glamorise Women's MagicLift Full-Figure Support Bra #1000


In [21]:
df_orderDetails.columns

Index(['id', 'order_id', 'product_id'], dtype='object')

In [22]:
df_orders.columns

Index(['order_id', 'user_id', 'created_at', 'num_of_item'], dtype='object')

In [55]:
orders_5_1 = list(df_orders[df_orders.num_of_item>1].sample(5).order_id)
orders_5_2 = list(df_orders[df_orders.num_of_item==1].sample(5).order_id)

In [56]:
print(df_orderDetails[df_orderDetails.order_id.isin(orders_5_1)].shape, 
      df_orderDetails[df_orderDetails.order_id.isin(orders_5_2)].shape)

(12, 3) (5, 3)


In [57]:
print(df_orderDetails[df_orderDetails.order_id.isin(orders_5_1)].product_id.nunique(), 
      df_orderDetails[df_orderDetails.order_id.isin(orders_5_2)].product_id.nunique())

12 5


In [61]:
df_products.set_index("id", inplace=True)

In [64]:
df_products_1 = df_products[df_products.index.isin(list(df_orderDetails[df_orderDetails.order_id.isin(orders_5_1)].product_id.unique()) +
                                list(df_orderDetails[df_orderDetails.order_id.isin(orders_5_2)].product_id.unique()))]
df_products_1

Unnamed: 0_level_0,name,category,retail_price
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
18447,C-IN2 Men's Pop Color Street Jock,Active,17.0
9780,Carole Hochman Women's Long Zip Robe,Sleep & Lounge,79.0
26122,Harbor Bay Big & Tall 2-Pack Plaid Woven Boxers,Underwear,23.0
20204,Ralph Lauren Mens SB 2B Solid Navy Blue Wool Suit,Suits & Sport Coats,319.98999
9738,Aimee Gowns Original Bra-less Nursing Gown,Sleep & Lounge,39.950001
25329,Premium White Above Ankle Toe Socks,Socks,7.5
8267,Mod-O-Doc Women's Big Cord Peacoat,Outerwear & Coats,170.0
21571,True Religion Men's Geno Baja Slim Jean,Jeans,264.0
8728,Larry Levine Women's Maxi Length Hooded Down J...,Outerwear & Coats,118.75
27032,HUGO BOSS Men's Woven Long Elastic Pant,Sleep & Lounge,34.299999


In [74]:
df_orders.set_index("order_id", inplace=True)

In [78]:
users = list(df_orders[df_orders.index.isin(orders_5_1+orders_5_2)].user_id.unique())

In [80]:
df_users.set_index("id", inplace=True)
df_users[df_users.index.isin(users)]

Unnamed: 0_level_0,first_name,last_name,email,created_at
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
79323,Michelle,Gonzalez,michellegonzalez@example.net,2021-01-28 03:29:00+00:00
17080,Lauren,Butler,laurenbutler@example.org,2021-07-07 08:10:00+00:00
3237,Daniel,Hunt,danielhunt@example.com,2021-11-09 10:44:00+00:00
72897,James,Clark,jamesclark@example.org,2019-05-11 10:36:00+00:00
63400,Stephanie,Monroe,stephaniemonroe@example.net,2021-05-24 08:01:00+00:00
27927,Cynthia,Shelton,cynthiashelton@example.org,2022-06-09 11:41:00+00:00
64878,Anthony,Wong,anthonywong@example.org,2019-08-31 02:14:00+00:00
48522,Fred,Hopkins,fredhopkins@example.org,2021-10-06 09:35:00+00:00
16842,Glenn,Vasquez,glennvasquez@example.com,2021-02-12 01:53:00+00:00
89318,Scott,Weaver,scottweaver@example.net,2019-11-23 10:19:00+00:00


In [94]:
df_orderDetails[df_orderDetails.order_id.isin(orders_5_1+orders_5_2)].merge(df_products.reset_index()[["id","retail_price"]], left_on="product_id", right_on="id").groupby("order_id")[["retail_price"]].sum()

Unnamed: 0_level_0,retail_price
order_id,Unnamed: 1_level_1
4204,198.850006
21127,191.48
21439,17.0
34926,170.0
60865,57.299999
79236,110.160001
81111,264.0
91075,17.0
99024,197.75
111586,319.98999


In [82]:
df_orders[df_orders.index.isin(orders_5_1+orders_5_2)]

Unnamed: 0_level_0,user_id,created_at,num_of_item
order_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
79236,63400,2021-07-23 08:01:00+00:00,3
21127,16842,2024-02-18 01:53:00+00:00,3
81111,64878,2020-07-30 02:14:00+00:00,1
21439,17080,2023-03-27 08:10:00+00:00,1
91075,72897,2022-11-12 10:36:00+00:00,1
111586,89318,2024-04-04 10:19:00+00:00,1
4204,3237,2022-07-19 10:44:00+00:00,2
34926,27927,2023-11-04 11:41:00+00:00,1
60865,48522,2023-12-17 09:35:00+00:00,2
99024,79323,2022-03-06 03:29:00+00:00,2


In [92]:
df_orderDetails[df_orderDetails.order_id.isin(orders_5_1+orders_5_2)].merge(df_products.reset_index()[["id","retail_price"]], left_on="product_id", right_on="id")

Unnamed: 0,id_x,order_id,product_id,id_y,retail_price
0,115012,79236,9738,9738,39.950001
1,31032,21439,9129,9129,17.0
2,30577,21127,28832,28832,150.0
3,143521,99024,9780,9780,79.0
4,143522,99024,8728,8728,118.75
5,115011,79236,3212,3212,60.0
6,88376,60865,27032,27032,34.299999
7,161766,111586,20204,20204,319.98999
8,88377,60865,26122,26122,23.0
9,30576,21127,25329,25329,7.5


In [93]:
df_products_1

Unnamed: 0_level_0,name,category,retail_price
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
18447,C-IN2 Men's Pop Color Street Jock,Active,17.0
9780,Carole Hochman Women's Long Zip Robe,Sleep & Lounge,79.0
26122,Harbor Bay Big & Tall 2-Pack Plaid Woven Boxers,Underwear,23.0
20204,Ralph Lauren Mens SB 2B Solid Navy Blue Wool Suit,Suits & Sport Coats,319.98999
9738,Aimee Gowns Original Bra-less Nursing Gown,Sleep & Lounge,39.950001
25329,Premium White Above Ankle Toe Socks,Socks,7.5
8267,Mod-O-Doc Women's Big Cord Peacoat,Outerwear & Coats,170.0
21571,True Religion Men's Geno Baja Slim Jean,Jeans,264.0
8728,Larry Levine Women's Maxi Length Hooded Down J...,Outerwear & Coats,118.75
27032,HUGO BOSS Men's Woven Long Elastic Pant,Sleep & Lounge,34.299999


In [6]:
df_users.sample(3)

Unnamed: 0,id,first_name,last_name,email,created_at
59178,86462,Karen,Turner,karenturner@example.net,2022-11-05 08:02:00+00:00
14610,70030,Lauren,Shepard,laurenshepard@example.com,2020-10-26 14:27:00+00:00
99976,5940,Jennifer,Espinoza,jenniferespinoza@example.com,2021-04-04 09:05:00+00:00
