# Build and Execute Merge with Destination Fix - Snowflake

## Import Python Libraries

In [8]:
import snowflake.connector
import os
import sys
import pandas as pd
import numpy as np
import pandas as pd
import pyarrow as pa
# pd.set_option('max_columns', 40)

## Set Snowflake Variables

In [9]:
# snowflake connection variables
snowflake_user = 'JMILLER'
snowflake_password = os.environ['BISNOWPASS']
snowflake_account = 'eh69371.east-us-2.azure'
snowflake_role = 'SYSADMIN'
snowflake_warehouse = 'COMPUTE_WH'

# # database and schema for metadata objects
snowflake_database = 'UTIL_DB'
snowflake_schema = 'METADATA'

print('Using Notebook Variables:')
print('snowflake_user: ' + snowflake_user)
print('snowflake_password: ' + '***************')
print('snowflake_account: ' + snowflake_account)
print('snowflake_role: ' + snowflake_role)
print('snowflake_warehouse: ' + snowflake_warehouse)
print('snowflake_database: ' + snowflake_database)
print('snowflake_schema: ' + snowflake_schema)

Using Notebook Variables:
snowflake_user: JMILLER
snowflake_password: ***************
snowflake_account: eh69371.east-us-2.azure
snowflake_role: SYSADMIN
snowflake_warehouse: COMPUTE_WH
snowflake_database: UTIL_DB
snowflake_schema: METADATA


## Set Notebook Variables

In [10]:
# notebook variables
src_database = 'UTIL_DB'
tgt_database = 'UTIL_DB'
src_schema = 'INFORMATION_SCHEMA'
tgt_schema = 'METADATA'
src_table = 'DATABASES'
tgt_table = 'D_DATABASES'
added_dim_column_names_tag = 'standard_uc'
natural_key_columns = 'DATABASE_NAME'
type_2_columns = 'DATABASE_OWNER,RETENTION_TIME'
type_0_columns = ''

src_table_full = '"' + src_database + '"."' + src_schema + '"."' + src_table + '"'
if not src_database:
        src_table_full = '"' + src_schema + '"."' + src_table + '"'

tgt_table_full = '"' + tgt_database + '"."' + tgt_schema + '"."' + tgt_table + '"'
if not tgt_database:
        src_table_full = '"' + tgt_schema + '"."' + tgt_table + '"'

print('Using Notebook Variables:')
print('src_table_full: ' + src_table_full)
print('tgt_table_full: ' + tgt_table_full)
print('added_dim_column_names_tag: ' + added_dim_column_names_tag)
print('natural_key_columns: ' + natural_key_columns)
print('type_2_columns: ' + type_2_columns)
print('type_0_columns: ' + type_0_columns)


Using Notebook Variables:
src_table_full: "UTIL_DB"."INFORMATION_SCHEMA"."DATABASES"
tgt_table_full: "UTIL_DB"."METADATA"."D_DATABASES"
added_dim_column_names_tag: standard_uc
natural_key_columns: DATABASE_NAME
type_2_columns: DATABASE_OWNER,RETENTION_TIME
type_0_columns: 


## Check for Required values

In [11]:
if not src_schema:
    sys.exit("src_schema is required")

if not tgt_schema:
    sys.exit("tgt_schema is required")

if not src_table:
    sys.exit("src_table is required")

if not tgt_table:
    sys.exit("tgt_table is required")

if not added_dim_column_names_tag:
    sys.exit("added_dim_column_names_tag is required")

if not natural_key_columns:
    sys.exit("natural_key_columns is required")

## Establish Snowflake Connection

In [12]:
ctx = snowflake.connector.connect(
    user = snowflake_user,
    password = snowflake_password,
    account = snowflake_account,
    role = snowflake_role,
    warehouse = snowflake_warehouse
    )
cur = ctx.cursor()

# Return Client
cur.execute("select CURRENT_CLIENT()")
one_row = cur.fetchone()
print('Snowflake Connection Successful')
print(one_row[0])

Snowflake Connection Successful
PythonConnector 2.6.0


## Set Snowflake Database and Schema Context

In [13]:
sql = "use database " + snowflake_database + ";"
print(sql)
cur.execute(sql)

sql = "use schema " + snowflake_schema + ";"
print(sql)
cur.execute(sql)

use database UTIL_DB;
use schema METADATA;


<snowflake.connector.cursor.SnowflakeCursor at 0x1ff08b70b80>

## Get names for supplemental dimension columns

In [14]:
sql = """
SELECT ROW_IS_CURRENT, ROW_EFFECTIVE_DATE, ROW_EXPIRATION_DATE, ROW_INSERT_DATE, ROW_UPDATE_DATE 
  FROM ADDED_DIM_COLUMN_NAMES 
 WHERE ADDED_DIM_COLUMN_NAMES_TAG  = '""" + added_dim_column_names_tag + """' 
"""
print(sql)
cur.execute(sql)
one_row = cur.fetchone()
row_is_current = one_row[0]
row_effective_date = one_row[1]
row_expiration_date = one_row[2]
row_insert_date = one_row[3]
row_update_date = one_row[4]

print('row_is_current:', row_is_current)
print('row_effective_date:', row_effective_date)
print('row_expiration_date:', row_expiration_date)
print('row_insert_date:', row_insert_date)
print('row_update_date:', row_update_date)


SELECT ROW_IS_CURRENT, ROW_EFFECTIVE_DATE, ROW_EXPIRATION_DATE, ROW_INSERT_DATE, ROW_UPDATE_DATE 
  FROM ADDED_DIM_COLUMN_NAMES 
 WHERE ADDED_DIM_COLUMN_NAMES_TAG  = 'standard_uc' 

row_is_current: ROW_IS_CURRENT
row_effective_date: ROW_EFFECTIVE_DATE
row_expiration_date: ROW_EXPIRATION_DATE
row_insert_date: ROW_INSERT_DATE
row_update_date: ROW_UPDATE_DATE
