In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *

In [0]:
import sys
sys.path.append("/Workspace/Users/mayur10594@gmail.com/ETL_project")
from config.utils import get_logger
logger = get_logger("dataloadinitiate")

In [0]:
import json, yaml, os
from config.utils import load_config,create_tables,load_all_schemas

In [0]:
dbutils.widgets.text("fileList", "")
dbutils.widgets.text("taxYear", "")
dbutils.widgets.text("clientId", "")
dbutils.widgets.text("env","")
dbutils.widgets.text("bronze_path", "")
dbutils.widgets.text("maps","")
fileList = dbutils.widgets.get("fileList")
taxYear = dbutils.widgets.get("taxYear")
clientId = dbutils.widgets.get("clientId")
env = dbutils.widgets.get("env")
bronze_path = dbutils.widgets.get("bronze_path")
maps=json.loads(dbutils.widgets.get("maps"))
logger.info(f"fileList: {fileList}, taxYear: {taxYear}, clientId: {clientId}, env: {env}, bronze_path: {bronze_path}, maps to be loaded: {maps}")

In [0]:
# Dictionary to hold DataFrames
dfs = {}

for map_name in maps:
    table=f"cp_database.{clientId}_{map_name}"
    df = spark.read.table(table)
    dfs[map_name] = df
    #print(f"df_{map_name} is created and stored in dfs['{map_name}']")
    logger.info(f"df_{map_name} is created and stored in dfs['{map_name}']")

In [0]:
dfs['transactions']=dfs['transactions'].withColumn('date',to_date('date','dd-MM-yyyy HH:mm')).withColumn('amount',regexp_replace('amount','[$]','').cast('decimal(10,2)'))

In [0]:
config_path='/Workspace/Users/mayur10594@gmail.com/ETL_project/config/gold_config.yaml'
with open(config_path, "r") as f:
    sqls_cfg = yaml.safe_load(f)

In [0]:
sqls=sqls_cfg['dev']['cards']['sqls']

In [0]:
def load_sqls(path,file):
    file_path=f"{path}/{file}"
    with open(file_path, "r") as f:
        return f.read()
        

In [0]:
path = '/Workspace/Users/mayur10594@gmail.com/ETL_project/gold_transformations'
extracts = []

for map in maps:
    # Register input DF as temp view
    dfs[map].createOrReplaceTempView(map)
    print(f"✅ Temp view created for {map}")
    print(f"▶️ Started extract for {map}")

    sqls = sqls_cfg[env][map]['sqls']

    for sql_file in sqls:
        df_name = f"{map}_{sql_file.split('.')[0]}"   # string name
        extracts.append(df_name)

        print(f"▶️ Creating {df_name} dataframe")
        print(f"▶️ Running SQL for {map}: {sql_file}")

        # Load the SQL text
        sql_query = load_sqls(path, sql_file)
        print(sql_query)

        # Run SQL and store DF in dictionary
        dfs[df_name] = spark.sql(sql_query)

print("✅ Extracts created:", extracts)


In [0]:
dfs['cards_debit_card_extract'].display()