# Notebook Create DB Views
## Creating a DB View to query ALL NOAA stations for various years
The below notebook creates a DB View in your Google BigQuery project. A view is a prepackaged query that does not take up space in your DB. The query is a UNION ALL over the years selected as a parameter.

In [1]:
import os
# The path to your json credentials file. Replace with your corresponding file.
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/home/fjvr/Downloads/birdproject-2020-3cdcf7c1792d.json"

from google.cloud import bigquery
client = bigquery.Client()
project = 'birdproject-2020' # Change this project to your 
source_dataset_id = 'audubon_cdc'
# source_table_id = 'us_states'
shared_dataset_ref = client.dataset(source_dataset_id)

# This is the parameter used to create the years that will go into the view
year = 1900

# The name of the view in our DB
view_ref = shared_dataset_ref.table("noaa_from_" + str(year) + "_to_present")
view = bigquery.Table(view_ref)

# The string that will contain the SQL we will use to create our view
sql_statement_accumulated = ""
tables_used = []
tables = client.list_tables("bigquery-public-data.ghcn_d")

# Iterate over all tables in the schema and store the tables we will use in the tables_used array
for table in tables:
    try:
        if int(table.table_id[-4:]) >= year:
            tables_used.append(table.table_id)
    except:
        pass
    
# Iterate over all values in an array and annex to the sql_statement_accumulated
for index in range(0,len(tables_used)):
    if index == len(tables_used) - 1:
        sql_template = 'SELECT * FROM `{}`'
        sql_statement_accumulated = sql_statement_accumulated + sql_template.format("bigquery-public-data.ghcn_d." + str(tables_used[index]))
    else:
        sql_template = 'SELECT * FROM `{}` UNION ALL '
        sql_statement_accumulated = sql_statement_accumulated + sql_template.format("bigquery-public-data.ghcn_d." + str(tables_used[index]))
        

# Assiging the SQL to the associated view
view.view_query = sql_statement_accumulated
view = client.create_table(view)  # API request

print("Successfully created view at {}".format(view.full_table_id))

Successfully created view at birdproject-2020:audubon_cdc.noaa_from_1900_to_present


## Creating the DB View to flatten the data
The below notebook creates a DB View in your Google BigQuery project consisting of the flatten data from the union of all data from NOAA stations given the specified year.

In [2]:
import pandas as pd

# Query for flattening the data
query = """
SELECT DISTINCT
  base.id, 
  base.date,
  stations.name,
  stations.state,
  temp_min.value as temp_min_value,
  temp_max.value as temp_max_value,
  precipitation.value as precipitation_value,
  temp_avg.value as temp_avg,
  snow.value as snow,
  snwd.value as snwd

FROM {} base
LEFT JOIN {} temp_min ON base.id = temp_min.id AND base.date = temp_min.date AND temp_min.element = 'TMIN'
LEFT JOIN {} temp_max ON base.id = temp_max.id AND base.date = temp_max.date AND temp_max.element = 'TMAX'
LEFT JOIN {} precipitation ON base.id = precipitation.id AND base.date = precipitation.date AND precipitation.element = 'PRCP'
LEFT JOIN {} temp_avg ON base.id = temp_avg.id AND base.date = temp_avg.date AND temp_avg.element = 'TAVG'
LEFT JOIN {} snow ON base.id = snow.id AND base.date = snow.date AND snow.element = 'SNOW'
LEFT JOIN {} snwd ON base.id = snwd.id AND base.date = snwd.date AND snwd.element = 'SNWD'

INNER JOIN `bigquery-public-data`.ghcn_d.ghcnd_stations stations ON base.id = stations.id

ORDER BY base.id, base.date
"""

# This is the from clause table we will use in our project. 
# Please replace `fjvr-testing` with your PROJECT NAME
parameter = f"`{project}`.audubon_cdc.noaa_from_1900_to_present"
# In this statement we replace the {} with the corresponding table parameter
query = query.format(parameter,parameter,parameter,parameter,parameter,parameter,parameter)

# Name of the view in our DB
view_ref = shared_dataset_ref.table("flatten_noaa_from_" + str(year) + "_to_present")
view = bigquery.Table(view_ref)

# Assigning the SQL string to the view query
view.view_query = query

# Creating the view associated with the query
view = client.create_table(view)  # API request

print("Successfully created view at {}".format(view.full_table_id))

Successfully created view at birdproject-2020:audubon_cdc.flatten_noaa_from_1900_to_present
