In [3]:
import os
import configparser
import logging
import pandas as pd
import oracledb

def setup_logging(config):
    log_file = config.get('logging', 'log_file')
    log_level = config.get('logging', 'log_level')

    log_dir = os.path.dirname(log_file)
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)

    logging.basicConfig(
        filename=log_file,
        level=getattr(logging, log_level.upper()),
        format='%(asctime)s:%(levelname)s:%(message)s'
    )
    console = logging.StreamHandler()
    console.setLevel(logging.ERROR)
    formatter = logging.Formatter('%(name)s - %(levelname)s - %(message)s')
    console.setFormatter(formatter)
    logging.getLogger('').addHandler(console)

def fetch_data(user, password, dsn, query):
    try:
        connection = oracledb.connect(user=user, password=password, dsn=dsn)
        cursor = connection.cursor()
        cursor.execute(query)
        columns = [col[0] for col in cursor.description]
        data = cursor.fetchall()
        cursor.close()
        connection.close()
        df = pd.DataFrame(data, columns=columns)
        return df
    except Exception as e:
        logging.error(f"Error fetching data: {e}")
        raise e

def upsert_to_customers(df, app_user, app_password, app_dsn):
    try:
        connection = oracledb.connect(user=app_user, password=app_password, dsn=app_dsn)
        cursor = connection.cursor()
        
        for _, row in df.iterrows():
            if not row['CUSTOMER_ID_app_exists']:
                # Insert new records
                cursor.execute("""
                    INSERT INTO APP.CUSTOMERS (CUSTOMER_ID, FULL_NAME, EMAIL_ADDRESS)
                    VALUES (:1, :2, :3)
                """, (row['CUSTOMER_ID'], row['FULL_NAME_etl'], row['EMAIL_ADDRESS_etl']))
            else:
                # Check for changes before updating
                if row['FULL_NAME_etl'] != row['FULL_NAME_app'] or row['EMAIL_ADDRESS_etl'] != row['EMAIL_ADDRESS_app']:
                    cursor.execute("""
                        UPDATE APP.CUSTOMERS
                        SET FULL_NAME = :1, EMAIL_ADDRESS = :2
                        WHERE CUSTOMER_ID = :3
                    """, (row['FULL_NAME_etl'], row['EMAIL_ADDRESS_etl'], row['CUSTOMER_ID']))

        connection.commit()
        connection.close()
    except Exception as e:
        logging.error(f"Error during upsert operation: {e}")
        raise e

def main(env='production'):
    try:
        config = configparser.ConfigParser()
        config.read('config/config.ini')

        etl_section = f'{env}_etl'
        app_section = f'{env}_app'

        etl_user = config[etl_section]['username']
        etl_password = config[etl_section]['password']
        etl_dsn = config[etl_section]['dsn']
        app_user = config[app_section]['username']
        app_password = config[app_section]['password']
        app_dsn = config[app_section]['dsn']

        setup_logging(config)
        logging.info(f'Starting the database operations script in {env} environment.')

        etl_query = "SELECT * FROM ETL.S_CUSTOMERS"
        etl_data = fetch_data(etl_user, etl_password, etl_dsn, etl_query)
      
        app_query = "SELECT * FROM APP.CUSTOMERS"
        app_data = fetch_data(app_user, app_password, app_dsn, app_query)
       
        # Ensure to add the indicator=True parameter to include _merge column
        df_merged = etl_data.merge(app_data, on="CUSTOMER_ID", how="left", suffixes=('_etl', '_app'), indicator=True)
     
        # Add a column to detect if CUSTOMER_ID exists in both dataframes
        df_merged['CUSTOMER_ID_app_exists'] = df_merged['_merge'] == 'both'
        
        print(df_merged)
        
        # Perform the upsert operation
        upsert_to_customers(df_merged, app_user, app_password, app_dsn)

        logging.info('Data synchronization between ETL.S_CUSTOMERS and APP.CUSTOMERS completed.')
    except Exception as e:
        logging.error(f"Error in main function: {e}")
        raise e

if __name__ == "__main__":
    main()


   CUSTOMER_ID         EMAIL_ADDRESS_etl FULL_NAME_etl  REC_STS  \
0          392  adam.miller@internalmail   Adam Miller        1   
1         1666                      test          test        1   

            REC_TMSTP         EMAIL_ADDRESS_app FULL_NAME_app _merge  \
0 2024-09-01 16:03:32  adam.miller@internalmail   Adam Miller   both   
1 2024-08-30 17:18:39                      test          test   both   

   CUSTOMER_ID_app_exists  
0                    True  
1                    True  
