In [91]:
from datetime import datetime, timedelta
import os
import shutil
import pandas as pd
import json
from jinja2 import Environment, FileSystemLoader
from google.cloud import bigquery
import sqlalchemy
import logging

In [92]:
#Write file to local directory
def write_file_local(path,file_data):
    
    with open(path, 'w') as file:
        file_string = '\n'.join(file_data)
        file.write(file_string)

In [93]:
with open('config/lob_config.json') as json_lob_config:
    config = json.load(json_lob_config)

lob = config['lob']
lob_lower = lob.lower().strip()
lob_upper = lob.upper().strip()
domain_abbr = config['domain_abbr']
project_id = config['project_id']
core_dataset = config['lob_core_dataset']
jinja_template_path = config['jinja_template_path']
jinja_extension = config['jinja_extension']
output_file_extension = config['output_file_extension']
output_parent_folder = config['output_parent_folder']
input_table_list_folder = config['input_table_list_folder']
table_list_file_name = config['table_list_file_name']

domain_abbr_lower = domain_abbr.lower().strip()

input_path = f"{input_table_list_folder}\{table_list_file_name}"

df_table_info = pd.read_csv(f"{input_path}", index_col=None)

In [94]:
with open('../config/td_config.json') as json_td_config:
    config = json.load(json_td_config)

USER_NAME = config['user_name']
PASSWORD = config['pword']
HOST_NAME = config['host_name']

In [95]:
#get the DDLs for the tables from teradata 
def cons_audit_recs_output():
    dt1 = datetime.now()

    lob_folder_path = f"{output_parent_folder}\{lob.strip()}"
    if not os.path.exists(lob_folder_path):
        os.makedirs(lob_folder_path)
        
    df = pd.read_csv(input_path, index_col=None)

    env = Environment(loader = FileSystemLoader(jinja_template_path),   trim_blocks=True, lstrip_blocks=True)

    client = bigquery.Client(project=project_id)

    td_engine = sqlalchemy.create_engine('teradatasql://' + HOST_NAME + '/?user=' + USER_NAME + '&password=' + PASSWORD + '&logmech=LDAP')

    audit_results_list = []
    audit_results_list.append("Source,Dataset,Table,BQ_Expected,BQ_Actual,TD_Expected,TD_Actual")

    try:
        for index, row in df.iterrows():

            table_name = str(row['Table_Name']).strip().lower()
            source_system = str(row['Source_System']).strip().lower()
            job_name = str(row['Job_Name']).strip().upper()

            # print(table_name)

            replace_params = {
                    "source_system_param"               : f"{source_system}",
                    "table_name_param"                  : f"edw{lob_lower}.{table_name}",
                    "bq_audit_control_dataset_param"    : f"edw{lob_lower}_ac",
                    "td_audit_control_dataset_param"    : f"EDW{lob_upper}_DMX_AC",
                    "job_name_dataset_param"            : f"{job_name}"
            }

            template = env.get_template(f"big_query_audit_check{jinja_extension}")
            sql_query = template.render(replace_params)
            # print(sql_query)

            df = client.query(sql_query, project=project_id, location='US').result().to_dataframe()
            if len(df) > 0:
                bq_expected_value, bq_actual_value = df.apply(pd.to_numeric).iloc[0]
            else:
                bq_expected_value, bq_actual_value = ["NP", "NP"]
            

            metric_type = "Expected"
            template = env.get_template(f"teradata_expected_audit_check{jinja_extension}")
            replace_params.update({"metric_param"  :   f"{metric_type}"})
            sql_query = template.render(replace_params)
            # print(sql_query)
            results_df_expected_value = pd.read_sql(sql_query, td_engine)
            if len(results_df_expected_value) > 0:
                td_expected_value = int(results_df_expected_value[f"Control_Value_{metric_type}"][0])
            else:
                td_expected_value = "NP"

            metric_type = "Actual"
            template = env.get_template(f"teradata_expected_audit_check{jinja_extension}")
            replace_params.update({"metric_param"  :   f"{metric_type}"})
            sql_query = template.render(replace_params)
            # print(sql_query)
            results_df_actual_value = pd.read_sql(sql_query, td_engine)

            if len(results_df_actual_value) > 0:
                td_actual_value = int(results_df_actual_value[f"Control_Value_{metric_type}"][0])
            else:
                td_actual_value = "NP"

            audit_results_list.append(f"{source_system},edw{lob_lower},{table_name},{int(bq_expected_value)},{int(bq_actual_value)},{td_expected_value},{td_actual_value}")

        run_time = (dt1).strftime('%Y%m%d_%H%M')

        output_file_path = f"{output_parent_folder}\{lob.strip()}\{lob.strip()}_{run_time}{output_file_extension}"
        print(output_file_path)
        write_file_local(output_file_path, audit_results_list)

    except Exception as e1:
        print(e1)
        pass
    
    dt2 = datetime.now()
    print(dt2-dt1)

In [96]:
print("Begin of Processing")

cons_audit_recs_output()

print("End of Processing")

Begin of Processing




LOB\CR\CR_20240702_2323.csv
0:04:59.273413
End of Processing
