### In this tutorial, we will use DML to update data into Bigquery

In [1]:
import os
from google.cloud import bigquery
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "C:/Users/Lenovo/OneDrive/Documents1/Google_Bigquery/premium-botany-431702-u7-498d1c535417.json"
client = bigquery.Client()

##### UPDATE with WHERE clause
The following example updates a table named Inventory by reducing the value of the quantity field by 10 for all products that contain the string washer. Assume that the default value for the supply_constrained column is set to TRUE.

In [2]:
# 
QUERY = (
    '''
    UPDATE premium-botany-431702-u7.dataset1.Inventory
    SET quantity = quantity - 10,
    supply_constrained = DEFAULT
    WHERE product like '%washer%'
      '''
      )
query_job = client.query(QUERY)  # API request

In [3]:
# Perform a query.
QUERY = (
    'SELECT * FROM `premium-botany-431702-u7.dataset1.Inventory` LIMIT 1000;')
query_job = client.query(QUERY)  # API request
rows = query_job.result()  # Waits for query to finish

In [4]:
for row in rows:
    print(row)

Row(('front load washer', 10, None), {'product': 0, 'quantity': 1, 'supply_constrained': 2})
Row(('dishwasher', 20, None), {'product': 0, 'quantity': 1, 'supply_constrained': 2})
Row(('top load washer', 0, None), {'product': 0, 'quantity': 1, 'supply_constrained': 2})
Row(('microwave', 20, None), {'product': 0, 'quantity': 1, 'supply_constrained': 2})
Row(('oven', 5, None), {'product': 0, 'quantity': 1, 'supply_constrained': 2})
Row(('refrigerator', 10, None), {'product': 0, 'quantity': 1, 'supply_constrained': 2})
Row(('dryer', 30, None), {'product': 0, 'quantity': 1, 'supply_constrained': 2})


##### UPDATE using joins 
The following example generates a table with inventory totals that include existing inventory and inventory from the NewArrivals table, and marks supply_constrained as false:

In [5]:

QUERY = (
    '''
    UPDATE premium-botany-431702-u7.dataset1.Inventory
    SET quantity = quantity +
    (SELECT quantity FROM premium-botany-431702-u7.dataset1.NewArrivals
    WHERE Inventory.product = NewArrivals.product),
    supply_constrained = false
    WHERE product IN (SELECT product FROM premium-botany-431702-u7.dataset1.NewArrivals);
      '''
      )
query_job = client.query(QUERY)  # API request

In [6]:
# Perform a query.
QUERY = (
    'SELECT * FROM `premium-botany-431702-u7.dataset1.Inventory` LIMIT 1000;')
query_job = client.query(QUERY)  # API request
rows = query_job.result()  # Waits for query to finish
for row in rows:
    print(row)

Row(('refrigerator', 10, None), {'product': 0, 'quantity': 1, 'supply_constrained': 2})
Row(('front load washer', 10, None), {'product': 0, 'quantity': 1, 'supply_constrained': 2})
Row(('dishwasher', 20, None), {'product': 0, 'quantity': 1, 'supply_constrained': 2})
Row(('microwave', 20, None), {'product': 0, 'quantity': 1, 'supply_constrained': 2})
Row(('oven', 305, False), {'product': 0, 'quantity': 1, 'supply_constrained': 2})
Row(('top load washer', 100, False), {'product': 0, 'quantity': 1, 'supply_constrained': 2})
Row(('dryer', 230, False), {'product': 0, 'quantity': 1, 'supply_constrained': 2})


##### UPDATE nested fields
The following example updates nested record fields.

In [7]:

QUERY = (
    '''
    UPDATE premium-botany-431702-u7.dataset1.DetailedInventory
    SET specifications.color = 'white',
    specifications.warranty = '1 year'
    WHERE product like '%washer%';
      '''
      )
query_job = client.query(QUERY)  # API request

In [8]:
# Perform a query.
QUERY = (
    'SELECT * FROM `premium-botany-431702-u7.dataset1.DetailedInventory` LIMIT 1000;')
query_job = client.query(QUERY)  # API request
rows = query_job.result()  # Waits for query to finish
for row in rows:
    print(row)

Row(('countertop microwave', 20, None, [], None), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('refrigerator', 10, False, [], None), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('oven', 5, False, [], None), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('dryer', 30, False, [], None), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('microwave', 20, False, [], None), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('dishwasher', 30, False, [], {'color': 'white', 'warranty': '1 year', 'dimensions': None}), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('top load washer', 10, False, [], {'color': 'white', 'warranty': '1 year', 'dimensions': None}), {'product': 0, 'quantity': 1, 'supply_constrai

Alternatively, you can update the entire record:

In [9]:

QUERY = (
    '''
    UPDATE premium-botany-431702-u7.dataset1.DetailedInventory
    SET specifications
        = STRUCT<color STRING, warranty STRING,
    dimensions STRUCT<depth FLOAT64, height FLOAT64, width FLOAT64>>('white', '1 year', NULL)
    WHERE product like '%washer%';
'''
)
query_job = client.query(QUERY)  # API request

In [10]:
# Perform a query.
QUERY = (
    'SELECT * FROM `premium-botany-431702-u7.dataset1.DetailedInventory` LIMIT 1000;')
query_job = client.query(QUERY)  # API request
rows = query_job.result()  # Waits for query to finish
for row in rows:
    print(row)

Row(('countertop microwave', 20, None, [], None), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('dishwasher', 30, False, [], {'color': 'white', 'warranty': '1 year', 'dimensions': None}), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('top load washer', 10, False, [], {'color': 'white', 'warranty': '1 year', 'dimensions': None}), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('front load washer', 20, False, [], {'color': 'white', 'warranty': '1 year', 'dimensions': None}), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('microwave', 20, False, [], None), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('oven', 5, False, [], None), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('dryer', 30, Fal

##### UPDATE repeated records
The following example appends an entry to a repeated record in the comments column for products that contain the string washer:

In [11]:
QUERY = (
    '''
    UPDATE premium-botany-431702-u7.dataset1.DetailedInventory
    SET comments = ARRAY(
    SELECT comment FROM UNNEST(comments) AS comment
    UNION ALL
    SELECT (CAST('2016-01-01' AS DATE), 'comment1')
)
WHERE product like '%washer%';
'''
)
query_job = client.query(QUERY)  # API request

In [12]:
# Perform a query.
QUERY = (
    'SELECT * FROM `premium-botany-431702-u7.dataset1.DetailedInventory` LIMIT 1000;')
query_job = client.query(QUERY)  # API request
rows = query_job.result()  # Waits for query to finish
for row in rows:
    print(row)

Row(('countertop microwave', 20, None, [], None), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('dryer', 30, False, [], None), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('refrigerator', 10, False, [], None), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('oven', 5, False, [], None), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('microwave', 20, False, [], None), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('top load washer', 10, False, [{'created': datetime.date(2016, 1, 1), 'comment': 'comment1'}], {'color': 'white', 'warranty': '1 year', 'dimensions': None}), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('front load washer', 20, False, [{'created': datetime.date(2016, 1, 1), 'comm

##### Alternatively, you can use the ARRAY_CONCAT function:

In [13]:
QUERY = (
    '''
    UPDATE premium-botany-431702-u7.dataset1.DetailedInventory
    SET comments = ARRAY_CONCAT(comments,
    ARRAY<STRUCT<created DATE, comment STRING>>[(CAST('2016-01-01' AS DATE), 'comment1')])
    WHERE product like '%washer%';
'''
)
query_job = client.query(QUERY)  # API request

In [14]:
# Perform a query.
QUERY = (
    'SELECT * FROM `premium-botany-431702-u7.dataset1.DetailedInventory` LIMIT 1000;')
query_job = client.query(QUERY)  # API request
rows = query_job.result()  # Waits for query to finish
for row in rows:
    print(row)

Row(('countertop microwave', 20, None, [], None), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('dryer', 30, False, [], None), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('oven', 5, False, [], None), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('refrigerator', 10, False, [], None), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('microwave', 20, False, [], None), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('front load washer', 20, False, [{'created': datetime.date(2016, 1, 1), 'comment': 'comment1'}, {'created': datetime.date(2016, 1, 1), 'comment': 'comment1'}], {'color': 'white', 'warranty': '1 year', 'dimensions': None}), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('top load wa

The following example appends a second entry to the repeated record in the comments column for all rows:

In [15]:
QUERY = (
    '''
    UPDATE premium-botany-431702-u7.dataset1.DetailedInventory
    SET comments = ARRAY(
    SELECT comment FROM UNNEST(comments) AS comment
    UNION ALL
    SELECT (CAST('2016-01-01' AS DATE), 'comment2')
)
WHERE true

SELECT product, comments FROM premium-botany-431702-u7.dataset1.DetailedInventory;
'''
)
query_job = client.query(QUERY)  # API request

In [16]:
# Perform a query.
QUERY = (
    'SELECT * FROM `premium-botany-431702-u7.dataset1.DetailedInventory` LIMIT 1000;')
query_job = client.query(QUERY)  # API request
rows = query_job.result()  # Waits for query to finish
for row in rows:
    print(row)

Row(('countertop microwave', 20, None, [], None), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('dryer', 30, False, [], None), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('oven', 5, False, [], None), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('refrigerator', 10, False, [], None), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('microwave', 20, False, [], None), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('front load washer', 20, False, [{'created': datetime.date(2016, 1, 1), 'comment': 'comment1'}, {'created': datetime.date(2016, 1, 1), 'comment': 'comment1'}], {'color': 'white', 'warranty': '1 year', 'dimensions': None}), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('top load wa

To delete repeated value entries, you can use **WHERE ... NOT LIKE**:

In [17]:
QUERY = (
    '''
    UPDATE premium-botany-431702-u7.dataset1.DetailedInventory
    SET comments = ARRAY(
      SELECT c FROM UNNEST(comments) AS c
      WHERE c.comment NOT LIKE '%comment2%'
)
WHERE true;
'''
)
query_job = client.query(QUERY)  # API request

In [18]:
# Perform a query.
QUERY = (
    'SELECT * FROM `premium-botany-431702-u7.dataset1.DetailedInventory` LIMIT 1000;')
query_job = client.query(QUERY)  # API request
rows = query_job.result()  # Waits for query to finish
for row in rows:
    print(row)

Row(('countertop microwave', 20, None, [], None), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('oven', 5, False, [], None), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('refrigerator', 10, False, [], None), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('dryer', 30, False, [], None), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('microwave', 20, False, [], None), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('top load washer', 10, False, [{'created': datetime.date(2016, 1, 1), 'comment': 'comment1'}, {'created': datetime.date(2016, 1, 1), 'comment': 'comment1'}], {'color': 'white', 'warranty': '1 year', 'dimensions': None}), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('front load wa

##### UPDATE statement using join between three tables
The following example sets supply_constrained to true for all products from NewArrivals where the warehouse location is in 'WA' state.

In [19]:
QUERY = (
    '''
    UPDATE premium-botany-431702-u7.dataset1.DetailedInventory
    SET supply_constrained = true
    FROM premium-botany-431702-u7.dataset1.NewArrivals, premium-botany-431702-u7.dataset1.Warehouse
    WHERE DetailedInventory.product = NewArrivals.product AND
          NewArrivals.warehouse = Warehouse.warehouse AND
          Warehouse.state = 'WA';
'''
)
query_job = client.query(QUERY)  # API request

In [20]:
# Perform a query.
QUERY = (
    'SELECT * FROM `premium-botany-431702-u7.dataset1.DetailedInventory` LIMIT 1000;')
query_job = client.query(QUERY)  # API request
rows = query_job.result()  # Waits for query to finish
for row in rows:
    print(row)

Row(('countertop microwave', 20, None, [], None), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('oven', 5, False, [], None), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('refrigerator', 10, False, [], None), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('dryer', 30, False, [], None), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('microwave', 20, False, [], None), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('top load washer', 10, False, [{'created': datetime.date(2016, 1, 1), 'comment': 'comment1'}, {'created': datetime.date(2016, 1, 1), 'comment': 'comment1'}], {'color': 'white', 'warranty': '1 year', 'dimensions': None}), {'product': 0, 'quantity': 1, 'supply_constrained': 2, 'comments': 3, 'specifications': 4})
Row(('front load wa