In [32]:
from google.cloud import bigquery
from google.cloud.exceptions import NotFound
import pandas as pd
import os
import logging
from datetime import datetime
import credentials

# Setting up metadata

In [33]:
dimension_name = 'agency'
surrogate_key = dimension_name+'_dim_id'
business_key = dimension_name+'_id'

gcp_project = 'cis4400-381214' # your gcp project ID
bq_dataset = '311-complaints_dataset'
table_name = dimension_name+'_dimension'

dimension_table_path = ".".join([gcp_project,bq_dataset,table_name])

file_source_path = 'data/'

# Set up Logging

In [34]:
current_date = datetime.today().strftime('%Y%m%d')
log_filename = "_".join(["etl", dimension_name, current_date])+".log"
logging.basicConfig(filename=log_filename, encoding='utf-8', format='%(asctime)s%(message)s', level=logging.DEBUG)
logging.info("=========================================================================")
logging.info("Starting ETL Run for dimension " + dimension_name + " on date " + current_date)

# Reading a CSV File into a dataframe

In [35]:
def load_data_file(logging, file_source_path, year):
    file_source = os.path.join(file_source_path, '311_traffic_signal_complaints_'+ year +'.csv')
    logging.info("Reading source data file: %s", file_source)
    # Read in the source data file for the customers data
    try:
        df = pd.read_csv(file_source)
        # Set all of the column names to lower case letters
        df = df.rename(columns=str.lower)
        logging.info("Read %d records from file: %s", df.shape[0], file_source)
        return df
    except:
        logging.error("Failed to read file: %s", file_source)
        os._exit(-1)
    return df

# Creating the BigQuery Client object

In [36]:
def create_bigquery_client(logging):
    try:
        bqclient = bigquery.Client.from_service_account_json(credentials.path_to_service_account_key_file) # your path to the GCP key json file
        logging.info("Created BigQuery Client: %s", bqclient)
        return bqclient
    except Exception as err:
        logging.error("Failed to create BigQuery Client.", err)
        os._exit(-1)
    return bqclient

# Upload a Dataframe to a BigQuery Table

In [37]:
def upload_bigquery_table(logging, bqclient, table_path, write_disposition, df):
    """
    upload_bigquery_table
    Accepts a path to a BigQuery table, the write disposition and a dataframe
    Loads the data into the BigQuery table from the dataframe.
    for credentials. The write disposition is either
    write_disposition="WRITE_TRUNCATE" Erase the target data and load all new data.
    write_disposition="WRITE_APPEND" Append to the existing table
    """
    try:
        logging.info("Creating BigQuery Job configuration with write_disposition=%s", write_disposition)
        # Set up a BigQuery job configuration with the write_disposition.
        job_config = bigquery.LoadJobConfig(write_disposition=write_disposition)
        # Submit the job
        logging.info("Submitting the BigQuery job")
        job = bqclient.load_table_from_dataframe(df, table_path, job_config=job_config)
        # Show the job results
        logging.info("Job results: %s",job.result())
    except Exception as err:
        logging.error("Failed to load BigQuery Table. %s", err)

# Test to see if a BigQuery table exists

In [38]:
def bigquery_table_exists(table_path, bqclient):
    """
    bigquery_table_exists
    Accepts a path to a BigQuery table
    Checks if the BigQuery table exists.
    Returns True or False
    """
    try:
        bqclient.get_table(table_path)
        return True
    except NotFound:
        return False

In [39]:
load_data_file(logging, file_source_path, '2019')

  df = pd.read_csv(file_source)


Unnamed: 0,unique_key,created_date,closed_date,agency,agency_name,complaint_type,descriptor,incident_zip,intersection_street_1,intersection_street_2,...,park_facility_name,park_borough,latitude,longitude,location,incident_address,street_name,cross_street_1,cross_street_2,bbl
0,45280741,2019-12-31T22:30:00.000,2020-01-01T00:05:00.000,DOT,Department of Transportation,Traffic Signal Condition,Controller,11364.0,BELL BOULEVARD,KINGSBURY AVENUE,...,Unspecified,QUEENS,40.736048,-73.756025,"{'latitude': '40.736048351530556', 'longitude'...",,,,,
1,45284465,2019-12-31T22:17:00.000,2019-12-31T23:00:00.000,DOT,Department of Transportation,Traffic Signal Condition,Veh Signal Head,,HYLAN BLVD,STEUBEN ST,...,Unspecified,Unspecified,,,,,,,,
2,45282939,2019-12-31T22:10:00.000,2020-01-01T17:55:00.000,DOT,Department of Transportation,Traffic Signal Condition,Underground,11693.0,BEACH 94 STREET,ROCKAWAY BEACH BOULEVARD,...,Unspecified,QUEENS,40.585985,-73.816586,"{'latitude': '40.585985329802995', 'longitude'...",,,,,
3,45280787,2019-12-31T22:10:00.000,2020-01-01T08:50:00.000,DOT,Department of Transportation,Traffic Signal Condition,Underground,,LEXINGTON AVE,59 ST E,...,Unspecified,MANHATTAN,,,,,,,,
4,45283419,2019-12-31T22:10:00.000,2020-01-01T18:20:00.000,DOT,Department of Transportation,Traffic Signal Condition,Underground,11694.0,BEACH CHANNEL DRIVE,BEACH 116 STREET,...,Unspecified,QUEENS,40.581766,-73.838299,"{'latitude': '40.581766233498946', 'longitude'...",,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39937,41311517,2019-01-01T00:45:00.000,2019-01-01T01:00:00.000,DOT,Department of Transportation,Traffic Signal Condition,Controller,11206.0,MARCY AVENUE,MYRTLE AVENUE,...,Unspecified,BROOKLYN,40.695377,-73.949207,"{'latitude': '40.69537669187525', 'longitude':...",,,,,
39938,41306914,2019-01-01T00:29:00.000,2019-01-01T00:30:00.000,DOT,Department of Transportation,Traffic Signal Condition,Veh Signal Lamp,11208.0,DREW STREET,LINDEN BOULEVARD,...,Unspecified,BROOKLYN,40.669725,-73.860962,"{'latitude': '40.66972534989857', 'longitude':...",,,,,
39939,41310542,2019-01-01T00:08:00.000,2019-01-01T02:10:00.000,DOT,Department of Transportation,Traffic Signal Condition,Controller,11354.0,NORTHERN BOULEVARD,157 STREET,...,Unspecified,QUEENS,40.763798,-73.808231,"{'latitude': '40.76379777489679', 'longitude':...",,,,,
39940,41307247,2019-01-01T00:06:00.000,2019-01-01T01:20:00.000,DOT,Department of Transportation,Traffic Signal Condition,Controller,10459.0,PROSPECT AVENUE,WESTCHESTER AVENUE,...,Unspecified,BRONX,40.819694,-73.901602,"{'latitude': '40.819693804076756', 'longitude'...",,,,,


In [42]:
bqclient = bigquery.Client.from_service_account_json(credentials.path_to_service_account_key_file)

In [44]:
upload_bigquery_table(logging, bqclient, 'cis4400-381214.311_complaints_dataset', 'WRITE_APPEND', df)

NameError: name 'df' is not defined