### Connections

#### SQL Server

In [None]:
# SQL SERVER CONNECTION USING SPARK
jdbcHostname = 'your_sql_server_hostname'
jdbcPort = 'your_port_number'
jdbcUsername = 'sqlserver_username'
jdbcPassword = 'sqlserver_password'
jdbcDatabase = 'database_name'
jdbcUrl = "jdbc:sqlserver://{0}:{1};database={2};user={3};password={4};".format(jdbcHostname, jdbcPort, jdbcDatabase, jdbcUsername, jdbcPassword)

sqlserver_connection = {
    "user" : jdbcUsername,
    "password": jdbcPassword,
    "driver" : "com.microsoft.sqlserver.jdbc.SQLServerDriver",
    "fetchsize": '50000'
}

#### Snowflake

In [1]:
# SNOWFLAKE CONNECTION USING SPARK
from pyspark.sql import SQLContext
from pyspark.sql.types import *

# Set options below
snowflake_connection = {
    "sfURL" : 'your_snowflake_url.snowflakecomputing.com',
    "sfUser" : 'snowflake_username',
    "sfPassword" : 'snowflake_password',
    "sfDatabase" : 'database_name',
    "sfSchema" : 'schema_name',
    "sfWarehouse" : 'snowflake_warehouse'
}

### Table Names

In [2]:
tableList = [
    'table1'
    ,'table2'
    ,'table3'
]

### Reading record count from databases

#### SQL Server

In [None]:
schema = 'dbo'
    
for table in tableList:
    try:
        sql1 = f"(SELECT COUNT(*) as SQL_RECORD_COUNT, '{table}' as TABLE_NAME, GETDATE() as SQL_SYSDATE FROM {schema}.{table}) a"
        df1 = spark.read.jdbc(url=jdbcUrl, table = sql, properties=sqlserver_connection)
        df1.write.mode("append").option("header",True).parquet("sqlserver_parquet")
    except Exception as e:
        print(e)
            
df1_merged = spark.read.parquet("sqlserver_parquet")
df1_merged.show()

# OPTIONAL STEP - write as a CSV locally
df1_merged.orderBy("TABLE_NAME").coalesce(1).write.format("csv").mode("overwrite").option("header",True).save("SQLSERVER_COUNTS")

####  Snowflake

In [None]:
database = 'test'
schema = 'public'
    
for table in tableList:
    try:
        sql2 = f"(SELECT COUNT(*) as SNOW_RECORD_COUNT, '{table}' as TABLE_NAME, CURRENT_TIMESTAMP() as SNOW_SYSDATE FROM {database}.{schema}.{table}) a"
        df2 = spark.read.format("snowflake").options(**sf_options).option("query",sql).load()
        df2.write.mode("append").option("header",True).parquet("snowflake_parquet")
    except Exception as e:
        print(e)
            
df2_merged = spark.read.parquet("snowflake_parquet")
df2_merged.show()

# OPTIONAL STEP - write as a CSV locally
df2_merged.orderBy("TABLE_NAME").coalesce(1).write.format("csv").mode("overwrite").option("header",True).save("SNOWFLAKE_COUNTS")

### FINAL RESULT

In [None]:
df3 = df1.join(df2,['TABLE_NAME'],"FULL")
df3 = df3.withColumn("DIFF",col('SNOW_RECORD_COUNT')-col('SQL_RECORD_COUNT'))
df3 = df3.withColumn("PCT_DIFF",((col('SNOW_RECORD_COUNT')-col('SQL_RECORD_COUNT'))/col('SQL_RECORD_COUNT'))*100)
df3 = df3.select("TABLE_NAME","SQL_RECORD_COUNT","SNOW_RECORD_COUNT","DIFF","PCT_DIFF")
df3.show()

# OPTIONAL STEP - write as a CSV locally
df3.orderBy("TABLE_NAME").coalesce(1).write.format("csv").mode("overwrite").option("header",True).save("FINAL_RESULT")