In [None]:
from etls.qa.dbHelpers import createEngine
import pandas as pd
from pangres import upsert

source_db_engine = createEngine('SOURCE')
target_db_engine = createEngine('TARGET')


In [11]:
class SimpleDataTransfer:
    def __init__(self, source_db_engine, source_schema, source_table, target_db_engine, target_schema, target_table, mapping = None):
        self.source_db_engine = source_db_engine
        self.source_schema = source_schema
        self.source_table = source_table
        self.target_db_engine = target_db_engine
        self.target_schema = target_schema
        self.target_table = target_table
        self.mapping = mapping
        self.source_data = None
        self.mapped_data = None
        self.target_primary_key = None

    def getSourceData(self):
        source_data = pd.read_sql(
            f'select * from {self.source_schema}.{self.source_table}', con=self.source_db_engine)
        self.source_data = source_data
        return source_data

    def getTargetData(self):
        target_data = pd.read_sql(
            f'select * from {self.target_table}', con=self.target_db_engine, schema=self.target_schema)
        self.target_data = target_data
        return self.target_data
    def getTargetPrimaryKey(self):
        query = f"""SELECT
                        kcu.column_name as pk
                    FROM
                        information_schema.table_constraints AS tc
                    JOIN
                        information_schema.key_column_usage AS kcu
                    ON tc.constraint_name = kcu.constraint_name
                    WHERE
                        tc.constraint_type = 'PRIMARY KEY' AND tc.table_schema = '{self.target_schema}' AND tc.table_name = '{self.target_table}';"""
        pk_df = pd.read_sql(
            query, con=self.target_db_engine)
        self.target_primary_key = pk_df['pk'][0]
        return self.target_primary_key
    def mapData(self):
        if self.source_data is None:
            self.getSourceData()
        if self.mapping is not None:
            target_fields = list(self.mapping.values())
            mapped_data = self.source_data.rename(columns = self.mapping)[target_fields]
        else:
            mapped_data = self.source_data
        if self.target_primary_key is None:
            self.getTargetPrimaryKey()
        mapped_data.set_index(self.target_primary_key, inplace = True)
        self.mapped_data = mapped_data
        return self.mapped_data
    def loadData(self):
        if self.mapped_data is None:
            self.mapData()
        load = upsert(con = self.target_db_engine, 
                      df = self.mapped_data, 
                      table_name = self.target_table,
                      if_row_exists='update',
                      create_schema = False,
                      add_new_columns = False)
        return load


In [12]:
npiDataTransfer=SimpleDataTransfer(source_db_engine, 
                                   source_schema = 'silver_nppes', 
                                   source_table = 'npidetail', 
                                   target_db_engine = target_db_engine, 
                                   target_schema = 'npd', 
                                   target_table = 'npi',
                                   mapping = {
                                       'npi':'npi',
                                       'entity_type_code':'entity_type_code',
                                       'replacement_npi':'replacement_npi',
                                       'enumeration_date':'enumeration_date',
                                       'last_update_date':'last_update_date',
                                       'deactivation_reason_code':'deactivation_reason_code',
                                       'deactivation_date':'deactivation_date',
                                       'reactivation_date':'reactivation_date',
                                       'certification_date':'certification_date'
                                   })

In [13]:
mapped_data = npiDataTransfer.mapData()

In [None]:
npiDataTransfer.loadData()

In [None]:
addressDataTransfer=SimpleDataTransfer(source_db_engine, 
                                   source_schema = 'silver_address', 
                                   source_table = 'address_us', 
                                   target_db_engine = target_db_engine, 
                                   target_schema = 'npd', 
                                   target_table = 'address'
                                   )