In [None]:
# Execute in Windows Prompt or Terminal (MAC)
!gcloud auth application-default login

In [None]:
!gcloud config set project hca-hin-dev-cur-parallon

In [None]:
from google.cloud import bigquery
import google.auth
import pandas as pd
from datetime import datetime
import io
import json
from jinja2 import Environment, FileSystemLoader

In [None]:
import os
os.environ["HTTP_PROXY"] = "proxy.nas.medcity.net:80"
os.environ["HTTPS_PROXY"] = "proxy.nas.medcity.net:80"

In [None]:
credentials, project_id = google.auth.default(scopes=["https://www.googleapis.com/auth/cloud-platform"])
print(project_id)
bq_client = bigquery.Client(credentials=credentials, project=project_id)

In [None]:
input_path = "InputFiles\\load_tables_bq.csv"
output_path = "OutputFiles\\loaded_tables_comp.csv"
date_marker = "250706"
date_expiration = "2025-08-01"
data_path = f"OutputFiles\\TableData\\{date_marker}\\"
template_folder = "InputFiles\Template"
template_name = "create_table_template"
jinja_extension = ".j2"
f = io.StringIO("")

In [None]:
#Write file to local directory
def write_file_local(path,file_data):
    
    with open(path, 'w') as file:
        file_string = '\n'.join(file_data)
        file.write(file_string)

In [None]:
 
def copy_file_data():
    dt1 = datetime.now()
    source_df = pd.read_csv(input_path, index_col=None, 
    converters={'Database': str,'Table': str,'TD_Count': int,'DW_Last_Update_Time': str})
    try:
        source_df.sort_values(by=['Database','Table'], inplace=True)
        source_df.reset_index(drop=True, inplace=True)
        source_df = source_df.applymap(lambda x: x.strip() if isinstance(x, str) else x)

        job_config = bigquery.job.LoadJobConfig()
        job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE
        job_config.source_format = bigquery.SourceFormat.CSV
        job_config.field_delimiter = ','
        job_config.null_marker = ''
        job_config.allow_quoted_newlines = True
        job_config.skip_leading_rows = 0

        table_rec_counts = []
        source_df['Load_Rec_Count'] = 0
        table_rec_counts.append((',').join(source_df.columns.values))

        # Jinja2 Environment
        env = Environment(loader = FileSystemLoader(template_folder),   trim_blocks=True, lstrip_blocks=True)
        template_file_name = f"{template_name}{jinja_extension}"
        template = env.get_template(template_file_name)

        for index, row in source_df.iterrows():
            database_name = row['Database']
            table_name = row['Table']
            script_dict = {
                "stage_dataset":f"{database_name}",
                "table_name":f"{table_name}",
                "date_marker":f"{date_marker}",
                "date_expiration":f"{date_expiration}"
            }
            dataset_ref = bq_client.dataset(database_name)
            table_ref = dataset_ref.table(f"{table_name}_{date_marker}")
            table_obj = bq_client.get_table(table_ref)
            f = io.StringIO("")
            bq_client.schema_to_json(table_obj.schema, f)
            tblschema = json.loads(f.getvalue())
            file_path = f"{data_path}{table_name}_{date_marker}.csv"

            job_config.schema = tblschema
            with open(file_path, "rb") as source_file:
            	load_job = bq_client.load_table_from_file(source_file, table_obj, job_config=job_config)
            load_job.result()
            row['Load_Rec_Count'] = load_job.output_rows
            print( "Loaded {} rows and {} columns to {}".format( load_job.output_rows, len(table_obj.schema), table_ref ) )
            table_rec_counts.append((',').join([row['Database'], row['Table'], str(row['TD_Count']), row['DW_Last_Update_Time'], str(row['Load_Rec_Count'])]))
            # create_clone_table_stmt = template.render(script_dict)
            # query_job = bq_client.query(create_clone_table_stmt, project=project_id, location='US')
            # query_job.result()

        write_file_local(output_path, table_rec_counts)
        dt2 = datetime.now()
        print(dt2-dt1)

    except Exception as e1:
        print(e1)
        pass

In [None]:
print("Begin of Processing")

copy_file_data()

print("End of Processing")