In [1]:

# --- #
# modules

import pandas as pd
import requests
import json
import config
from google.cloud import bigquery
import os
import datetime

# --- #
# pandas settings

# pd.set_option('display.max_columns',    None)
# pd.set_option('display.max_rows',       200)
# pd.set_option('display.width',          500)
# pd.set_option('display.max_seq_items',  None)
# pd.set_option('display.max_colwidth',   50)
# pd.set_option('expand_frame_repr',      True)

# --- #


In [2]:

# --- #
# achieve token
# --- #

def achieve_token():
    pass

url_token = config.url_token

# data payload

data = {
         'grant_type'   : config.grant_type
        ,'companyId'    : config.companyId
        ,'username'     : config.user_name
        ,'password'     : config.password
        ,'client_id'    : config.client_id
}

try:
    response = requests.post(url_token, data = data)
    # console.print('great success', style = 'good')
    # print('great success')

except:
    # console.print('error', style = 'bad')
    print('error')
    # pass

data    = json.loads(response.content)

token   = data['access_token']

# --- #


In [3]:

# --- #
# get report

head        = {'Authorization': f'Bearer {token}'}

url_report  = f'https://us63-services.dayforcehcm.com/Api/{config.companyId}/V1/Reports/{config.report2}?exampleParameterGuid1=string&exampleParameterGuid2=string&exampleParameterGuid3=string'

response    = requests.get(url_report, headers = head)

data        = json.loads(response.content)

# --- #


In [4]:

# --- #
# extract only the records / data portion of the response

data_filtered   = data['Data']['Rows']

df              = pd.DataFrame.from_dict(data_filtered)

# --- #


In [5]:

# --- #
# convert datetime columns to datetime

df[['Employee_StartDate', 'Employee_HireDate', 'Employee_LastModifiedTimestamp', 'Employee_TerminationDate']] = df[['Employee_StartDate', 'Employee_HireDate', 'Employee_LastModifiedTimestamp', 'Employee_TerminationDate']].apply(pd.to_datetime)

# modify format to have bigquery recognise the object as a DATE, python doesn't have a DATE datatype (only datetime)

df['Employee_StartDate']        = pd.to_datetime(df['Employee_StartDate'].dt.date)
df['Employee_HireDate']         = pd.to_datetime(df['Employee_HireDate'].dt.date)
df['Employee_TerminationDate']  = pd.to_datetime(df['Employee_TerminationDate'].dt.date)

df['DenormEmployeeProperty1_Field83'] = pd.to_numeric(df['DenormEmployeeProperty1_Field83'])

# --- #


In [6]:

# --- #
# rename columns

df = df.rename(

    columns = {

             'Employee_XRefCode'                    : 'EmployeeId'
            ,'Employee_DisplayName'                 : 'DisplayName'
            ,'Employee_FirstName'                   : 'FirstName'
            ,'Employee_LastName'                    : 'LastName'
            ,'Department_ShortName'                 : 'Department'
            ,'Employee_StartDate'                   : 'DateStarted'
            ,'Employee_HireDate'                    : 'DateHired'
            ,'Employee_TerminationDate'             : 'DateTerminated'
            ,'Employee_LastModifiedTimestamp'       : 'DateTimeLastModified'
            ,'DenormEmployeeContact_BusinessEmail'  : 'Email'
            ,'DenormEmployeeProperty1_Field83'      : 'NetSuiteId'

    }
        # ,inplace = True

)

# --- #


In [7]:

# ---#
# loc = len(df.columns) => count number of columns

df.insert(loc = len(df.columns), column = 'DateTimeInserted', value = datetime.datetime.utcnow())


# --- #


In [8]:

# --- #
# connect to bigquery

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = config.bigquery_credentials_path

client = bigquery.Client()

# --- #


In [9]:

schema = [

    {
         'name'          : 'EmployeeId'
        ,'type'          : 'STRING'
        ,'mode'          : 'REQUIRED'
        ,'description'   : 'dayforce employee id'
    },
    {
         'name'          : 'DisplayName'
        ,'type'          : 'STRING'
        ,'mode'          : 'NULLABLE'
        ,'description'   : 'employee''s preferred display name'
    },
    {
         'name'          : 'FirstName'
        ,'type'          : 'STRING'
        ,'mode'          : 'NULLABLE'
        ,'description'   : 'employee''s first name'
    },
    {
         'name'          : 'LastName'
        ,'type'          : 'STRING'
        ,'mode'          : 'NULLABLE'
        ,'description'   : 'employee''s surname'
    },
    {
         'name'          : 'Department'
        ,'type'          : 'STRING'
        ,'mode'          : 'NULLABLE'
        ,'description'   : 'current department'
    },
    {
         'name'          : 'DateStarted'
        ,'type'          : 'DATE'
        ,'mode'          : 'NULLABLE'
        ,'description'   : 'the date the employee started'
    },
    {
         'name'          : 'DateHired'
        ,'type'          : 'DATE'
        ,'mode'          : 'NULLABLE'
        ,'description'   : 'the date the employee was hired'
    },
    {
         'name'          : 'DateTerminated'
        ,'type'          : 'DATE'
        ,'mode'          : 'NULLABLE'
        ,'description'   : 'the date the employee is no longer at bowery'
    },
    {
         'name'          : 'DateTimeLastModified'
        ,'type'          : 'DATETIME'
        ,'mode'          : 'NULLABLE'
        ,'description'   : 'last datetime that the record was modified'
    },
    {
         'name'          : 'Email'
        ,'type'          : 'STRING'
        ,'mode'          : 'NULLABLE'
        ,'description'   : 'employee''s email address'
    },
    {
         'name'          : 'NetSuiteId'
        ,'type'          : 'FLOAT'
        ,'mode'          : 'NULLABLE'
        ,'description'   : 'employee''s netsuiteid'
    },
    {
         'name'          : 'DateTimeInserted'
        ,'type'          : 'DATETIME'
        ,'mode'          : 'NULLABLE'
        ,'description'   : 'the datetime that the script retrieve and inserted the records (utc)'
    }

]

table_path = f'{config.project_id}.{config.dataset}.stage_{config.table2}'
truncate_table = (f'''TRUNCATE TABLE {table_path}''')


try:
    client.get_table(table = table_path)
    print('table already exists')
    client.query(truncate_table).result()

except:
    table = bigquery.Table(table_path, schema = schema)
    client.create_table(table)


table already exists


In [10]:

# --- #

insert = client.load_table_from_dataframe(df, table_path).result()

# --- #


In [11]:

# --- #
# call stored procedure to MERGE between stage/source table and target

# stored_procedure = (f'CALL `{config.project_id}.{config.dataset}.merge_{config.table2}();`')

stored_procedure = (f'CALL `{config.project_id}.{config.dataset}.merge_{config.table2}`()')

client.query(stored_procedure).result()


# --- #


<google.cloud.bigquery.table._EmptyRowIterator at 0x13b40c070>

In [12]:

# --- #
# delete the stage table

client.query(truncate_table).result()


# --- #


<google.cloud.bigquery.table._EmptyRowIterator at 0x13b40fd90>

In [13]:
# --- #
# close connection

client.close()

# --- #
