### Helpers and modularity

### checking the next file date

In [0]:
next_file_date = spark.sql(f""" 
    SELECT NVL(DATE_ADD(MAX(Process_file_date), 1), '2023-01-01') 
    FROM `uc_prod`.`watermark_tbl`.process_logs 
    WHERE Process_name = '01_bronze_ingest_src_data_HTTP' AND process_status = 'Completed'
""")

In [0]:
from datetime import datetime
get_file_date = next_file_date.collect()[0][0]
dt = datetime.strptime(get_file_date, '%Y-%m-%d')
file_date_string = datetime.strftime(dt,"%d%m%Y")

### Turn the file arraival date into filereadable format

In [0]:
source_url = 'https://retailpricing.blob.core.windows.net/'
source_folder_path = 'daily-pricing/'
source_file_name = f'PW_MW_DR_{file_date_string}.csv'
source_file_path = source_url + source_folder_path + source_file_name

sink_storage_name = 'madhupavanadls'
sink_container_name = 'bronze'
sink_folder_path = 'daily_processing_file/csv'
sink_location = f'abfss://{sink_container_name}@{sink_storage_name}.dfs.core.windows.net/{sink_folder_path}'

### Prepare  and Tranform the file

#### imports

In [0]:
import pandas as pd
from pyspark.sql.functions import lit,date_format
from datetime import datetime

In [0]:
pd_df = pd.read_csv(source_file_path)
sprk_df = spark.createDataFrame(pd_df)
fl_dt = datetime.strptime(get_file_date, '%Y-%m-%d')
sprk_df = sprk_df.withColumn('file_arrival_date',lit(fl_dt))
sprk_df.limit(5).display()

### Writing the file into bronze layer in delta format

In [0]:
sprk_df.write.format('delta').mode('overwrite').save(sink_location)

#### Update the watermark table about the notebook run

In [0]:
fl_dt_str = fl_dt.strftime('%Y-%m-%d %H:%M:%S')

#### update the watermark table

In [0]:
sql_query = f"""
insert into `uc_prod`.`watermark_tbl`.process_logs (
  process_name,
  process_file_date,
  process_status
) values(
  '01_bronze_ingest_src_data_HTTP',
  '{fl_dt_str}',
  'Completed'
)
"""
spark.sql(sql_query)