In [0]:
%run "./config file"

In [0]:
import time

class SetupHelper():

    def __init__(self):
        self.conf = ConfigModule('dev')
        self.data_dir = f"{self.conf.base_file_path}/data"
        self.checkpoint_dir = f"{self.conf.base_checkpoint_path}/checkpoint"
        self.catalog = self.conf.environment
        self.schema = self.conf.db_name
        self.database_intialize = False
    
    def create_database(self):
        spark.catalog.clearCache()
        print(f"\tCreating database {self.catalog}.{self.schema} ...", end='')
        spark.sql(f"create database if not exists {self.catalog}.{self.schema}")
        spark.sql(f"use {self.catalog}.{self.schema}")
        self.database_intialize = True
        print('Done.')
    
    def create_registered_user_bz(self):
        if self.database_intialize:
            print(f"\tCreating table {self.catalog}.{self.schema}.bz_registered_users...", end='')
            spark.sql(f"""CREATE TABLE IF NOT EXISTS {self.catalog}.{self.schema}.bz_registered_users (
                        user_id bigint, 
                        device_id bigint, 
                        mac_address string, 
                        registration_timestamp string,
                        load_time timestamp,
                        source_file string
                    )""")
            print('Done.')
        else:
            raise ReferenceError('Database not initialized')

    def create_gym_logins_bz(self):
        if self.database_intialize:
            print(f"\tCreating table {self.catalog}.{self.schema}.bz_gym_logins...", end='')
            spark.sql(f"""CREATE TABLE IF NOT EXISTS {self.catalog}.{self.schema}.bz_gym_logins (
                        mac_address string, 
                        gym bigint, 
                        login string, 
                        logout string,
                        load_time timestamp,
                        source_file string
                    )""")
            print('Done.')
        else:
            raise ReferenceError('Database not initialized')

    def create_kafka_multiplex_bz(self):
        if self.database_intialize:
            print(f"\tCreating table {self.catalog}.{self.schema}.bz_kafka_multiplex...", end='')
            spark.sql(f"""CREATE TABLE IF NOT EXISTS {self.catalog}.{self.schema}.bz_kafka_multiplex (
                       key string, 
                       value string, 
                       topic string, 
                       partition int, 
                       offset long, 
                       timestamp long, 
                       date date, 
                       week_part string, 
                       load_time timestamp, 
                       source_file string
                    ) PARTITIONED BY (topic, week_part)""")
            print('Done.')
        else:
            raise ReferenceError('Database not initialized')
    
    def create_users_sl(self):
        if self.database_intialize:
            print(f"\tCreating table {self.catalog}.{self.schema}.sl_users...", end='')
            spark.sql(f"""CREATE TABLE IF NOT EXISTS {self.catalog}.{self.schema}.sl_users (
                        user_id bigint, 
                        device_id bigint, 
                        mac_address string, 
                        registration_timestamp timestamp
                    )""")
            print('Done.')
        else:
            raise ReferenceError('Database not initialized')
    
    def create_gym_logins_sl(self):
        if self.database_intialize:
            print(f"\tCreating table {self.catalog}.{self.schema}.sl_gym_logins...", end='')
            spark.sql(f"""CREATE TABLE IF NOT EXISTS {self.catalog}.{self.schema}.sl_gym_logins (
                        mac_address string, 
                        gym bigint, 
                        login timestamp, 
                        logout timestamp
                    )""")
            print('Done.')
        else:
            raise ReferenceError('Database not initialized')
    
    def create_user_profile_sl(self):
        if self.database_intialize:
            print(f"\tCreating table {self.catalog}.{self.schema}.sl_user_profile...", end='')
            spark.sql(f"""CREATE TABLE IF NOT EXISTS {self.catalog}.{self.schema}.sl_user_profile (
                        user_id bigint,
                        update_type string,
                        timestamp timestamp,
                        dob string,
                        sex string,
                        gender string,
                        first_name string,
                        last_name string,
                        address struct <
                                        street_address string,
                                        city string,
                                        state string,
                                        postalcode bigint
                                        >
                    )""")
            print('Done.')
        else:
            raise ReferenceError('Database not initialized')
    
    def create_heart_rate_sl(self):
        if self.database_intialize:
            print(f"\tCreating table {self.catalog}.{self.schema}.sl_heart_rate...", end='')
            spark.sql(f"""CREATE TABLE IF NOT EXISTS {self.catalog}.{self.schema}.sl_heart_rate (
                        device_id bigint,
                        time timestamp,
                        heart_rate double
                    )""")
            print('Done.')
        else:
            raise ReferenceError('Database not initialized')
    
    def create_workout_session_sl(self):
        if self.database_intialize:
            print(f"\tCreating table {self.catalog}.{self.schema}.sl_workout_session...", end='')
            spark.sql(f"""CREATE TABLE IF NOT EXISTS {self.catalog}.{self.schema}.sl_workout_session (
                        user_id bigint,
                        workout_id bigint,
                        timestmap timestamp,
                        action string,
                        session_id bigint
                    )""")
            print('Done.')
        else:
            raise ReferenceError('Database not initialized')
    
    
    def create_workout_session_sl(self):
        if self.database_intialize:
            print(f"\tCreating table {self.catalog}.{self.schema}.sl_workout_session...", end='')
            spark.sql(f"""CREATE TABLE IF NOT EXISTS {self.catalog}.{self.schema}.sl_workout_session (
                        user_id bigint,
                        workout_id bigint,
                        timestmap timestamp,
                        action string,
                        session_id bigint
                    )""")
            print('Done.')
        else:
            raise ReferenceError('Database not initialized')
    
    def create_user_bins_sl(self):
        if self.database_intialize:
            print(f"\tCreating table {self.catalog}.{self.schema}.sl_user_bins...", end='')
            spark.sql(f"""CREATE TABLE IF NOT EXISTS {self.catalog}.{self.schema}.sl_user_bins (
                        user_id bigint,
                        age int,
                        gender string,
                        city string,
                        state string
                    )""")
            print('Done.')
        else:
            raise ReferenceError('Database not initialized')
    
    def create_complete_workout_sl(self):
        if self.database_intialize:
            print(f"\tCreating table {self.catalog}.{self.schema}.sl_complete_workout...", end='')
            spark.sql(f"""CREATE TABLE IF NOT EXISTS {self.catalog}.{self.schema}.sl_complete_workout (
                        user_id bigint,
                        workout_id bigint,
                        session_id bigint,
                        start_time timestamp,
                        end_time timestamp
                    )""")
            print('Done.')
        else:
            raise ReferenceError('Database not initialized')

    def create_date_lookup(self):
        if self.database_intialize:
            print(f"\tCreating table {self.catalog}.{self.schema}.date_lookup...", end='')
            spark.sql(f"""CREATE TABLE IF NOT EXISTS {self.catalog}.{self.schema}.date_lookup (
                        date date,
                        week int,
                        year int,
                        month int,
                        day_of_week int,
                        day_of_month int,
                        day_of_year int,
                        week_part string
                    )""")
            print('Done.')
        else:
            raise ReferenceError('Database not initialized')
    
    def create_workout_bpm_summary_gd(self):
        if self.database_intialize:
            print(f"\tCreating table {self.catalog}.{self.schema}.gd_workout_bpm_summary...", end='')
            spark.sql(f"""CREATE TABLE IF NOT EXISTS {self.catalog}.{self.schema}.gd_workout_bpm_summary (
                        user_id bigint,
                        date date,
                        workout_id bigint,
                        session_id bigint,
                        age int,
                        sec string,
                        city string,
                        state string,
                        rec int,
                        min_bpm double,
                        avg_bpm double,
                        max_bpm double
                    )""")
            print('Done.')
        else:
            raise ReferenceError('Database not initialized')
    
    def create_gym_summary_gd(self):
        if self.database_intialize:
            print(f"\tCreating table {self.catalog}.{self.schema}.gd_gym_summary...", end='')
            pass
            print('Done.')
        else:
            raise ReferenceError('Database not initialized')
    
    def launcher(self):
        print('Started setup process: ')
        start_time = time.time()
        self.create_database()
        self.create_registered_user_bz()
        self.create_gym_logins_bz()
        self.create_kafka_multiplex_bz()
        self.create_users_sl()
        self.create_workout_session_sl()
        self.create_user_bins_sl()
        self.create_gym_logins_sl()
        self.create_heart_rate_sl()
        self.create_date_lookup()
        self.create_user_profile_sl()
        self.create_complete_workout_sl()
        self.create_workout_bpm_summary_gd()
        self.create_gym_summary_gd()
        print(f'Done in  {int(time.time() - start_time)} seconds.')

    

In [0]:
from pyspark.sql.functions import lower

class HelperTestSuit():

    def __init__(self):
        self.cls_obj = SetupHelper()
        self.data_dir = self.cls_obj.data_dir
        self.checkpoint_dir = self.cls_obj.checkpoint_dir
        self.catalog = self.cls_obj.catalog
        self.schema = self.cls_obj.schema

    def cleanup(self):
        a = input('Do you want to cleanup the database and data/checkpoint locations? (y/n): ')
        if a.lower()=='y':
            print('Cleaning process:', )
            print(f'\tDropping database {self.catalog}.{self.schema} ...', end='')
            spark.sql(f"DROP DATABASE IF EXISTS {self.catalog}.{self.schema} CASCADE")
            print('Done.')
            '''print(f'\tDeleting & recreate data_dir {self.data_dir} ...', end='')
            dbutils.fs.rm(self.data_dir, True)
            dbutils.fs.mkdirs(self.data_dir)
            print('Done.')'''
            print(f'\tDeleting & recreate checkpoint_dir {self.checkpoint_dir} ...', end='')
            dbutils.fs.rm(self.checkpoint_dir, True)
            dbutils.fs.mkdirs(self.checkpoint_dir)
            print('Done.')
        else:
            print('Skipping cleaning process!!!')
    
    def assert_database_exists(self, db_name):
        print(f'\tChecking if database {self.schema} exits or not...', end='')
        assert spark.sql(f"show databases in {self.catalog}")\
                           .filter(f"databaseName='{db_name}'").count()==1, "Not Exist"
        print('Exist')

    def assert_table_exists(self, table_name):
        print(f'\tChecking if table {table_name} exists in the {self.schema} database...', end='')
        assert spark.sql(f"show tables in {self.catalog}.{self.schema}")\
                           .filter(f"tableName='{table_name}' and isTemporary=False").count()==1, "Not Exist"
        print('Exist')
    
    def run_testcases(self):
        self.cleanup()
        self.cls_obj.launcher()
        print("Validation started...")
        self.assert_database_exists(self.schema)
        self.assert_table_exists('bz_registered_users')
        self.assert_table_exists('bz_gym_logins')
        self.assert_table_exists('bz_kafka_multiplex')
        self.assert_table_exists('sl_user_profile')
        self.assert_table_exists('sl_workout_session')
        self.assert_table_exists('sl_user_bins')
        self.assert_table_exists('sl_gym_logins')
        self.assert_table_exists('sl_heart_rate')
        self.assert_table_exists('date_lookup')
        self.assert_table_exists('sl_complete_workout')
        self.assert_table_exists('sl_users')
        self.assert_table_exists('gd_workout_bpm_summary')
        #self.assert_table_exists('gd_gym_summary')
        print('Done.')

obj = HelperTestSuit()
obj.run_testcases()

Do you want to cleanup the database and data/checkpoint locations? (y/n):  y

Cleaning process:
	Dropping database dev.hc_db ...Done.
	Deleting & recreate checkpoint_dir abfss://hc-unmanaged-dev@healthcarelakestorage.dfs.core.windows.net/checkpoint_zone/checkpoint/cp_gym_logins ...Done.
Started setup process: 
	Creating database dev.hc_db ...Done.
	Creating table dev.hc_db.bz_registered_users...Done.
	Creating table dev.hc_db.bz_gym_logins...Done.
	Creating table dev.hc_db.bz_kafka_multiplex...Done.
	Creating table dev.hc_db.sl_users...Done.
	Creating table dev.hc_db.sl_workout_session...Done.
	Creating table dev.hc_db.sl_user_bins...Done.
	Creating table dev.hc_db.sl_gym_logins...Done.
	Creating table dev.hc_db.sl_heart_rate...Done.
	Creating table dev.hc_db.date_lookup...Done.
	Creating table dev.hc_db.sl_user_profile...Done.
	Creating table dev.hc_db.sl_complete_workout...Done.
	Creating table dev.hc_db.gd_workout_bpm_summary...Done.
	Creating table dev.hc_db.gd_gym_summary...Done.
Done in  8 seconds.
Validation started...
	Checking if database hc_db exits or