## Revision History



In [None]:
# Change_date         revision_number     change_description                           author
# 08/15/2023          1                   initial check-in                             Kranthi
# 01/06/2024          2                   OrderFlags tag fix                           Kranthi 
# 02/15/2024          3                   replace structured_adls with lakedb_gold     kranthi  
#03/12/2024           4                   add upc to unique combination                kranthi 
#03/25/2024           5                   add shippigstateshort, subtotal columns      kranthi
#04/23/2024           6                   display subsidy only for one line            kranthi   
#05/08/2024           7                   if a single order, then extracting voucher code should handled kranthi
#05/15/2024           8                   add retries for 20 mins                       kranthi
#06/17/2024           9                   show all order history                        Kranthi 
#06/21/2024           10                  do not move the files to archive folder- do a full load daily       Kranthi 
#07/22/2024           11                  change from delta to parquet                 josh hintz
#08/01/2024           12                  change criteria against ZSOM_D12 selection   josh hintz
#08/12/2024           13                  addition of ZSOMVOURF (voucher return amt)   josh hintz
#08/14/2024           14                  add credit card refund and calculation       josh hintz
#08/20/2024           15                  apply refunds only on return records         josh hintz
#08/26/2024           16                  merge all to line 1, zero out aggr sums on returns josh hintz
#08/28/2024           17                  update line items on exchange-return lines   josh hintz
#09/12/2024           18                  change from YTAXAMT1 to ZCNTYTAX             josh hintz
#09/23/2024           19                  order date to est, return and exchange initialized dates   josh hintz
# 11/18/2024          20                  Added API status filter for Blocked in dev/test   Kettnech
#11/19/2024           21                  Adjust env variable in API call for dev/test  josh hintz
#11/19/2024           22                  Revert spark pool to spkmed03, change to spkmed34 later   josh hintz
#2/3/2025             23                  Remove API call and replace with Voucher delta table  josh hintz
#2/3/2025             24                  Fix spark34 failure                           josh hintz 

In [None]:
from pyspark.sql.functions import from_json, col, concat, lit,array,explode,first,when,expr
from pyspark.sql.functions import date_format
import pyspark.sql.functions as f
from pyspark import HiveContext
from pyspark.sql.types import *
from pyspark.sql import Row, functions as F
from pyspark.sql.window import Window
import concurrent.futures
import datetime
import re
from dateutil import tz
from time import sleep
from pyspark.sql.functions import input_file_name,regexp_extract
from pyspark.sql.functions import from_utc_timestamp
import xml.etree.ElementTree as ET
spark.conf.set("spark.sql.parquet.mergeSchema", True)   
spark.conf.set("spark.hadoop.parquet.enable.summary-metadata", True)
spark.conf.set("spark.sql.sources.partitionOverwriteMode", "dynamic")
spark.sql("SET spark.databricks.delta.schema.autoMerge.enabled = true")

In [None]:
print(mssparkutils.env.getWorkspaceName())

In [None]:
%run /utils/common_functions

In [None]:
%run /utils/merge_data_notebook

In [None]:
%run /b2b2c/config/b2b2c_config_variables

## Move the files

In [None]:


def move_files_from_sftp(file_extension,raw_b2b2cfolder):
  print("attempting move_files_from_sftp")
  v_file_cnt =0
  print("file_extension::",file_extension,"raw_folder::",raw_b2b2cfolder)  
  file_lst=[c.path for c in mssparkutils.fs.ls(sftp_path) if re.search(f"\{file_extension}$",c.path) is not None]
  print("file_lst in SFTP::",file_lst)
  mssparkutils.fs.ls(sftp_path)
  for j in file_lst :
    file_name = j.split("/")[-1].split('.')[0]
    if  env_var == 'azwwwprodprdadapsyn01' and 'prod' in file_name :
      print("PROD SFTP file_name::",file_name, ' moving ', file_name, ' to raw_path' )
      v_file_cnt = v_file_cnt+1
      mssparkutils.fs.mv(j,f"{raw_path}{raw_b2b2cfolder}/{file_name}_{dt}{current_cst_time}{file_extension}",True)
    elif (env_var == 'azwwwnonproddevadapsyn01' or env_var == 'azwwwnonprodtestadapsyn01') and ('staging' in file_name or 'development' in file_name) :
      print("TEST SFTP file_name::",file_name, ' moving ', file_name, ' to raw_path' )
      v_file_cnt = v_file_cnt+1
      mssparkutils.fs.mv(j,f"{raw_path}{raw_b2b2cfolder}/{file_name}_{dt}{current_cst_time}{file_extension}",True)

  return v_file_cnt    
 

## First Step - move the files from SFTP to raw folder 

In [None]:
from notebookutils import mssparkutils
spark.sql("SET spark.databricks.delta.schema.autoMerge.enabled = true")
retry_attempt = 0
v_xml_file_cnt = 0
#print("env_var::", env_var)

#dt = (datetime.datetime.now()).strftime('%Y-%m-%d')
dt = (datetime.datetime.now()).strftime('%Y-%m-%d')
dt_time = (datetime.datetime.now()).strftime('%Y-%m-%d %H:%M:%S')

print("date of load::", dt)
dt_date_type = datetime.datetime.strptime(dt, '%Y-%m-%d')

dt_datetime_type = datetime.datetime.strptime(dt_time, '%Y-%m-%d %H:%M:%S')

print("time of load UTC::", dt_datetime_type)


from_zone = tz.gettz('UTC')
to_zone = tz.gettz('America/Chicago')
dt_time = (datetime.datetime.now()).strftime('%Y-%m-%dT%H:%M:%SZ')

utc = datetime.datetime.strptime(dt_time, "%Y-%m-%dT%H:%M:%SZ")
print("utc::",utc)
utc = utc.replace(tzinfo=from_zone)
cst = utc.astimezone(to_zone)
print("utc2::",utc)
print("cst1::",cst)
print("cst2::",cst.strftime('%H:%M:%S').replace(':',''))
current_cst_time = cst.strftime('%H:%M:%S').replace(':','')
print("current_cst_time",current_cst_time)

#v_csv_file_cnt = move_files_from_sftp('.csv','dpcce')
while (retry_attempt < 4) :  
  v_xml_file_cnt = move_files_from_sftp('.xml','sfcc')
  print('v_xml_file_cnt::',v_xml_file_cnt,'retry_attempt::', retry_attempt)
  retry_attempt = retry_attempt +1
  if v_xml_file_cnt == 0:
    time.sleep(300)
  else:
    break  

#print("v_csv_file_cnt::",v_csv_file_cnt)

print("v_xml_file_cnt::",v_xml_file_cnt)


## Exit the notebook if no XML files are found in the path

In [None]:
import sys

#file_lst_cnt = mssparkutils.fs.ls(f"{raw_path}sfcc/")
file_lst_cnt = [j for j in mssparkutils.fs.ls(f"{raw_path}sfcc/") if j.isDir== False]
print("xml file count::",len(file_lst_cnt))
if len(file_lst_cnt)<1:
  mssparkutils.notebook.exit("None") 

## read the XML file

In [None]:
# df = spark.read.format('xml').option("rootTag", "orders").option("rowtag","order").load("abfss://raw@azwwwnonproddevadapadls.dfs.core.windows.net/b2b2c/sfcc/Order-20231220_2023-12-21063658.xml")
# dfcustom_attributes = df.select(col("_order-no").alias("order_no"),col("order-date").alias("order_date"),"custom-attributes.*")
# display(dfcustom_attributes)
# dfcustom_attributes = dfcustom_attributes.select("order_no","order_date",explode('custom-attribute').alias("col1"))
# dfcustom_attributes = dfcustom_attributes.select("order_no","order_date","col1.*")
# display(dfcustom_attributes)

In [None]:
#df_xml = spark.read.format('xml').option("rootTag", "orders").option("rowtag","order").load("abfss://raw@azwwwnonproddevadapadls.dfs.core.windows.net/b2b2c/sfcc/Order-20231220_2023-12-21120844.xml")
#malformed
# df_xml = spark.read.format('xml').option("rootTag", "orders")\
# .option("rowtag","order")\
# .option("nullValue","")\
# .option("inferSchema", 'false')\
# .option("mode","PERMISSIVE")\
# .load("abfss://raw@azwwwnonproddevadapadls.dfs.core.windows.net/b2b2c/sfcc/Order-20231220_2023-12-21063658.xml")
#df_xml = spark.read.format('xml').option("rootTag", "orders").option("rowtag","order").load("abfss://raw@azwwwnonproddevadapadls.dfs.core.windows.net/b2b2c/sfcc/short_xml_test.xml")
df_xml = spark.read.format('xml').option("rootTag", "orders").option("rowtag","order").load(f"{raw_path}sfcc/")
display(df_xml)

In [None]:
# display(df_xml.where("`_order-no` IN ('T1080039867','T1080039868')"))

# Get Customer details

In [None]:

dfCustomer = df_xml.select("customer.*")  
dfCustomer = dfCustomer.select("customer-email","customer-name","customer-no","billing-address.*") 
display(dfCustomer)

## get payment details only for subsidy data

In [None]:
subsidy_payment_schema = StructType([
    StructField("order_no", StringType(), True),
    #StructField("order_date", TimestampType(), True),
    StructField("is_redeemed", StringType(), True),
    StructField("voucher_amount", IntegerType(), True),
    StructField('voucher_code',StringType(), True)
                      ])
dummy_data = [('a','a',0,'a')]  
stringified_array_schema = ArrayType(StructType([StructField("_VALUE",StringType(),True)
,StructField("_attribute_id",StringType(),True)]))                   

try:
    dfPayment = df_xml.select(col("_order-no").alias("order_no"),col("order-date").alias("order_date"),"payments.*")
   #dfPayment = dfPayment.select(col("_order-no").alias("order_no"),explode('payment').alias("payment"))
   #dfPayment = dfPayment.select('*','payment.custom-attributes.*')
   #display(dfPayment)
    dfPayment = dfPayment.select('order_no','order_date',explode("payment.custom-method").alias('payment_custom_attr'))
    dfPayment = dfPayment.select('order_no','order_date','payment_custom_attr.custom-attributes.custom-attribute')
    dfPayment = dfPayment.select('order_no','order_date',explode('custom-attribute').alias('payment_custom_attr'))
    dfPayment = dfPayment.groupBy("order_no","order_date").pivot("payment_custom_attr._attribute-id").agg(first("payment_custom_attr._VALUE"))
    dfPayment = dfPayment.selectExpr('order_no'
    ,'order_date'
    ,'isRedeemed as is_redeemed'
    , 'isSubsidy as is_subsidy'
    ,'voucherAmount as voucher_amount'
    ,'voucherCode as voucher_code')
    display(dfPayment)    
except Exception as e:
    if  "cannot resolve 'explode(" in str(e):
      print('payment is of type STRUCT, cannot apply explode()')  
      dfPayment = dfPayment.select('order_no','order_date','payment.*')
      dfPayment = dfPayment.select('order_no','order_date',"custom-method.custom-attributes.*")
      dfPayment = dfPayment.select('order_no','order_date',"custom-attribute")
      dfPayment = dfPayment.select('order_no','order_date',explode('custom-attribute').alias('custom-attribute'))     
      dfPayment = dfPayment.groupBy("order_no","order_date").pivot("custom-attribute._attribute-id").agg(first("custom-attribute._VALUE"))
      dfPayment = dfPayment.selectExpr('order_no'
                                    , 'order_date'
                                    , 'isRedeemed as is_redeemed'
                                    , 'isSubsidy as is_subsidy'
                                    , 'voucherAmount as voucher_amount'
                                    , 'voucherCode as voucher_code')
  
    else:    
      print("no voucher/custom-method payment present::") 
      dfPayment =   spark.createDataFrame(dummy_data, subsidy_payment_schema)
      dfPayment = dfPayment.withColumn("order_date",F.current_timestamp()) 
display(dfPayment)

In [None]:
#display(dfPayment.select('*').where("""order_no IN ('T1080027936','T1080027937','T1080027939','T1080027940','T1080027941','T1080027942','T1080027943','T1080027951','T1080027952','T1080027953','T1080027945','T1080027946',
#'T1080027947','T1080027954','T1080027955','T1080027957')"""))

## get the payment data for creditcard when partially paid by voucher

In [None]:
cc_payment_schema = StructType([
    StructField("order_no", StringType(), True),     
    StructField("pay_amt", IntegerType(), True),
    StructField('card-holder',StringType(), True),
    StructField('card-number',StringType(), True),
    StructField('card-token',StringType(), True),
    StructField('card-type',StringType(), True),
    StructField('expiration-month',StringType(), True),
    StructField('expiration-year',StringType(), True)
                      ])
dummy_data = [('a',0,'a','a','a','a','a','a')]  
try:
    dfPaymentcc0 = df_xml.select(col("_order-no").alias("order_no"),col("order-date").alias("order_date"),"payments.*")
    #display(dfPaymentcc0)
    dfPaymentcc = dfPaymentcc0.select('order_no','order_date','payment.amount',explode("payment.credit-card").alias('payment_credit_card'))
    #display(dfPaymentcc)
    dfPaymentcc = dfPaymentcc.selectExpr('order_no','order_date','amount as pay_amt','payment_credit_card.*')
    display(dfPaymentcc)
except Exception as e:
    print("no credit card payment present::") 
    dfPaymentcc =   spark.createDataFrame(dummy_data, cc_payment_schema)
    dfPaymentcc = dfPaymentcc.withColumn("order_date",F.current_timestamp()) 
    display(dfPaymentcc)

# dfPaymentccattr = dfPaymentcc0.select('order_no','order_date', explode(array('payment.custom-attributes')).alias('payment_cc_custom-attr'))
# dfPaymentccattr = dfPaymentccattr.select('order_no','order_date',explode(array('payment_cc_custom-attr.custom-attribute')).alias('payment_cc_custom-attr'))  
# dfPaymentccattr = dfPaymentccattr.groupBy("order_no","order_date").pivot("payment_cc_custom-attr._attribute-id").agg(first("payment_cc_custom-attr._VALUE"))
# dfPaymentcc = dfPaymentcc.join(dfPaymentccattr,['order_no','order_date']).selectExpr('order_no','order_date','cardType as cc_card_type','authAmount as cc_amount_paid').distinct()
# display(dfPaymentcc)

In [None]:
#display(dfPaymentcc.select('*').where("""order_no IN ('T1080027936','T1080027937','T1080027939','T1080027940','T1080027941','T1080027942','T1080027943','T1080027951','T1080027952','T1080027953','T1080027945','T1080027946',
#'T1080027947','T1080027954','T1080027955','T1080027957')"""))

# get product lines

In [None]:

try:
    df_product_lines = df_xml.select(col("_order-no").alias("order_no")
    ,col("order-date").alias("order_date_time")
    ,explode("product-lineitems.product-lineitem").alias('line_item'))
    df_product_lines = df_product_lines.select("order_no"
    ,"order_date_time"
    ,"line_item.`gross-price`"
    ,"line_item.`lineitem-text`"
    ,"line_item.`product-id`"
    ,"line_item.quantity"
    ,"line_item.`product-name`" ).select('*').distinct()

    display(df_product_lines)
except Exception as e:
  #print('e',type(e).__name__, str(e)[0:30])  
  if  """Cannot resolve "explode(""" in str(e):
    df_product_lines = df_xml.select(col("_order-no").alias("order_no")
    ,col("order-date").alias("order_date_time")
    ,col("product-lineitems.product-lineitem")\
    .alias('line_item'))
    df_product_lines = df_product_lines.selectExpr("order_no"
    ,"order_date_time"
    ,"line_item.`gross-price`"
    ,"line_item.`lineitem-text`"
    ,"line_item.`product-id`"
    ,"line_item.quantity"
    ,"line_item.`product-name`" ).select('*').distinct()
    display(df_product_lines)
  else:
    print('Different error', str(e))
    raise

In [None]:
from pyspark.sql.functions import col, explode

try:
    df_product_lines = df_xml.select(
        col("_order-no").alias("order_no"),
        col("order-date").alias("order_date_time"),
        explode("product-lineitems.product-lineitem").alias("line_item")
    )

    df_product_lines = df_product_lines.select(
        "order_no",
        "order_date_time",
        col("line_item.gross-price").alias("gross_price"),
        col("line_item.lineitem-text").alias("lineitem_text"),
        col("line_item.product-id").alias("product_id"),
        col("line_item.quantity._VALUE").alias("quantity"),  # Extracting only the value from the JSON structure
        col("line_item.product-name").alias("product_name")
    ).distinct()

    display(df_product_lines)

except Exception as e:
    if  """Cannot resolve "explode(""" in str(e):
        df_product_lines = df_xml.select(
            col("_order-no").alias("order_no"),
            col("order-date").alias("order_date_time"),
            col("product-lineitems.product-lineitem").alias("line_item")
        )

        df_product_lines = df_product_lines.select(
            "order_no",
            "order_date_time",
            col("line_item.gross-price").alias("gross_price"),
            col("line_item.lineitem-text").alias("lineitem_text"),
            col("line_item.product-id").alias("product_id"),
            col("line_item.quantity._VALUE").alias("quantity"),  # Extracting only the value from the JSON structure
            col("line_item.product-name").alias("product_name")
        ).distinct()

        display(df_product_lines)
    else:
        print("Error:", str(e))
        raise

In [None]:
#dfcustom_attributes = df_xml.select(col("_order-no").alias("order_no"),col("order-date").alias("order_date"),"custom-attributes.*")
#dfcustom_attributes = dfcustom_attributes.select("order_no","order_date",explode('custom-attribute').alias("col1"))
#display(dfcustom_attributes.select('*'))

## get custom attributes

In [None]:
file_list = []
custom_attributes_lod = []
custom_attrib_schema = StructType([
    StructField("order_no", StringType(), True),
    #StructField("order_date_time", TimestampType(), True),
    StructField("POLocation", StringType(), True),
    StructField("accountName", StringType(), True),
    StructField('blanketPO',StringType(), True),
    StructField('brandBusinessUnit',StringType(), True),
    StructField('costcenter',StringType(), True),
    StructField('employeeID',StringType(), True),
    StructField('employeeLocation',StringType(), True),
    StructField('firstName',StringType(), True),
    StructField('lastName',StringType(), True),
    StructField('districtOrLocationCodeOrBranch',StringType(), True),
    StructField('storeOrDept',StringType(), True),
    StructField('salesCenter',StringType(), True),
    StructField('POValue',StringType(), True),
                      ])

def parse_xml(file_name):
    ## read the text file, convert to string and parse the string
    file_text = spark.read.text(f"{raw_path}sfcc/{file_name}", wholetext=True)
    xml_data = file_text.collect()[0][0]
    root = ET.fromstring(xml_data)

    # Get the dynamic namespace
    namespace = {'ns': root.tag.split('}')[0][1:]}
   
    # Iterate over each order
    for order in root.findall('.//ns:order', namespaces=namespace):
        custom_attributes = {}
        custom_attributes['order_no'] = order.get('order-no')
        #datetime.strptime(timestamp_str, '%Y-%m-%dT%H:%M:%S.%fZ')
        #custom_attributes['order_date'] = datetime.datetime.strptime(order.find('./ns:order-date', namespaces=namespace).text,'%Y-%m-%dT%H:%M:%S.%fZ')
                                                                        
        
        # Extract parent-level custom-attributes
        #parent_level_attributes = order.findall('./ns:custom-attributes/ns:custom-attribute', namespaces=namespace)
        parent_level_attributes = order.findall('./ns:custom-attributes/ns:custom-attribute', namespaces=namespace)
        # Print order details
        #print(f"\nOrder Number: {order_no}")
        #print(f"Order Date: {order_date}")
       
        
        # Print parent-level custom-attributes
        for attr in parent_level_attributes:
            attribute_id = attr.get('attribute-id')
            value = attr.text
            custom_attributes[attribute_id] = value
            #print(f"{attribute_id}: {value}")
        #print(custom_attributes)
        custom_attributes_lod.append(custom_attributes)
 
  

    
# extract file names 
file_list = [j.name for j in mssparkutils.fs.ls(f"{raw_path}sfcc/") if j.size>0]
 
with concurrent.futures.ThreadPoolExecutor() as executor:
        results = list(executor.map(parse_xml, file_list))
#print(custom_attributes_lod)           
# Create a DataFrame from the list of dictionaries
dfcustom_attributes = spark.createDataFrame(data=custom_attributes_lod,schema=custom_attrib_schema)
#display(dfcustom_attributes)   

# dfcustom_attributes = df_xml.select(col("_order-no").alias("order_no"),col("order-date").alias("order_date"),"custom-attributes.*")
# display(dfcustom_attributes)
# #display(dfcustom_attributes.select("*").where("order_no='T1080008309'"))
# dfcustom_attributes = dfcustom_attributes.select("order_no","order_date",explode('custom-attribute').alias("col1"))
# #dfcustom_attributes = dfcustom_attributes.selectExpr("_order-no","explode('custom-attribute') as col1")
# display(dfcustom_attributes.select("order_no","col1.*"))

# dfcustom_attributes = dfcustom_attributes.select("order_no","order_date","col1.*")
# #display(dfcustom_attributes.select("*").where("order_no='T1080008309'"))
#dfcustom_attributes  = dfcustom_attributes.groupBy("order_no","order_date").pivot("_attribute-id").agg(first("_VALUE"))
dfcustom_attributes = dfcustom_attributes.join(df_product_lines,["order_no"],"inner")
dfcustom_attributes = dfcustom_attributes.select("*").distinct()
display(dfcustom_attributes)


In [None]:
#display(dfcustom_attributes.select('*').where("""order_no IN ('T1080027945','T1080027946',
#'T1080027947','T1080027954','T1080027955','T1080027957')"""))

## Retreive API password and today's bearer token

In [None]:
#import requests
#import json

#voucher_api_pwd = mssparkutils.credentials.getSecret(kv_name,'b2b2cVoucherApiPassword','ls_kv_adap' )
#voucher_api_authorization = mssparkutils.credentials.getSecret(kv_name,'b2b2cVoucherApiAuthorization','ls_kv_adap' )
#voucher_api_pwd = mssparkutils.credentials.getSecret(kv_name,'b2b2cVoucherApiPassword')
# import sys  
# from pyspark.sql import SparkSession  
  
# sc = SparkSession.builder.getOrCreate()  
# token_library = sc._jvm.com.microsoft.azure.synapse.tokenlibrary.TokenLibrary  
  
# voucher_api_pwd = token_library.getSecret(kv_name, 'b2b2cVoucherApiPassword')  

#headers = { 'Content-Type': 'application/x-www-form-urlencoded' }

#body = { 'grant_type': 'password',
#          'client_id': b2b_client_id,
#          'client_secret': b2b_client_secret,
#          'username': b2b2c_voucher_api_user_name,
#          'password': voucher_api_pwd
#          }

#response = requests.post(b2b2c_voucher_token_url,
#                         headers=headers,
#                         data=body)

#voucher_api_authorization = json.loads(response.text)['access_token']

#print(voucher_api_pwd)
#print(voucher_api_authorization)

## Get the token to be used for calling coupon API
## This is commented out as broken, above code blcok replaces the below

In [None]:
#import requests
#import json



#headers = {
#    'Content-Type': 'application/x-www-form-urlencoded',
#    'Authorization': voucher_api_authorization
#}

#data = {
#    'client_id': b2b_client_id,
#    'client_secret': b2b_client_secret,
#    'password': voucher_api_pwd,
#    'grant_type': 'password',
#    'username': b2b2c_voucher_api_user_name ,
#}

#response = requests.post(b2b2c_voucher_token_url,    
#                        headers=headers,
#                        data=data,
#)
#json_resp = json.loads(response.text)
#print("token::",json_resp['access_token'])
#print(json.load(response))

### Write voucher raw to bronze 

In [None]:
file_list = mssparkutils.fs.ls(f'{raw_adls_path}Salesforce/dpcce/Voucher__c/')
file_path = f'{raw_adls_path}Salesforce/dpcce/Voucher__c/'

for f in file_list:

    if f != "archive":

        print("--DPCCE Voucher--: Reading file " + file_path + f.name)
        dpcce = spark.read.json(file_path + f.name)

        display(dpcce)

        display(dpcceSql)

        #Create temp view
        dpcce.createOrReplaceTempView("Voucher__c")

        count = spark.sql("SELECT COUNT(*) AS COUNT FROM Voucher__c")

        #Create view as new dataframe osView
        if count.collect()[0][0] > 0:
            print("--DPCCE Voucher--: Processing " + str(count.collect()[0][0]) + " records...")
            dpcceView = spark.sql(dpcceSql)
            createMergeData('Salesforce/DPCCE/Voucher__c', dpcceView, "Voucher__c", "Salesforce/DPCCE")
        else:
            print("--DPCCE Voucher--: No records found - empty file?")

        #Move files to archive folder to reduce process time if we're in production, if not leave them alone
        mssparkutils.fs.mv(file_path + f.name, raw_adls_path + 'Salesforce/dpcce/Voucher__c_archive/' + f.name,True, overwrite=True)

## read voucher data returned by  API

In [None]:
#import requests
#from pyspark.sql import SparkSession
#from pyspark.sql.types import StructType, StructField, StringType, ArrayType, DateType


#print(voucher_api_authorization)


#headers = {
#    'Content-Type': 'application/json',
    #'Authorization': f"Bearer {json_resp['access_token']}",
#    'Authorization': f"Bearer {voucher_api_authorization}",
    # 'Cookie': 'BrowserId=n5uowCD8Ee68SElowr2Prw; CookieConsentPolicy=0:1; LSKey-c$CookieConsentPolicy=0:1',
#}

# 20241118 KETTNECH - appears that thousands of Expired vouchers were added to dev/test recently and are overflowing the text string below.
# Limiting in dev/test
# Valid values appear to be: Active, Printed, Blocked, Disabled, Expired, Removed, Inactive
# Not sure how to specify multiple select here (tried ['1','2','3',etc])

#if env_var == 'azwwwnonproddevadapsyn01' or env_var == 'azwwwnonprodtestadapsyn01':
#    json_data = {
#        'status': 'Redeemed',
#        'employeeid': '',
#        'companyid': '',
#        'emailid': '',
#    }
#else:
#    json_data = {
#        'status': '',
#        'employeeid': '',
#        'companyid': '',
#        'emailid': '',
#    }

#response1 = requests.post(b2b2c_voucher_api_url,   
#                        headers=headers,
#                        json=json_data,
#)

#print(response1.text)
#json_resp1 = json.loads(response1.text)
#print("response status::",json_resp1['success'])
#if json_resp1['success'] == True:
#    print('success response')
#    #print(json_resp1['response'])
#    #prepare list of tuples
#    voucher_d = [(k, v) for k, v in json_resp1['response'].items()]
#    voucher_data = spark.createDataFrame(voucher_d, voucher_schema)

#    voucher_data = voucher_data.selectExpr('voucher_status','explode(voucher_data) as voucher_data')
#    df_csv_voucher = voucher_data.selectExpr('voucher_data.vouchercode as voucher_code'
#    ,'voucher_status as status'
#    ,'voucher_data.startDate as voucher_start_dt','voucher_data.expirydate as expiry_date'
#    ,'voucher_data.employeeid as employee_id', 'voucher_data.amount', 'voucher_data.companyid as company_id'
#    ,'voucher_data.redemptiondate as redemption_date','row_number() over(partition by voucher_data.vouchercode order by voucher_data.expirydate desc) as rn')
#    df_csv_write = write_to_file(df_csv_voucher,f"{raw_path}dpcce/")
#    df_csv_voucher_dups = df_csv_voucher.select("*").where("rn>1")
#    df_csv_voucher = df_csv_voucher.select("*").where("rn=1").drop("rn")
#    display(df_csv_voucher)
#else:
#    print("No API response")    

#New method reading delta table 1/27/2025 - hintzjo

df_vou = DeltaTable.forPath(spark, f'{bronze_adls_path}/Salesforce/DPCCE/Voucher__c').toDF()
df_csv_voucher = df_vou.selectExpr('Voucher_Code__c as voucher_code'
    ,'Status__c as status'
    ,'Start_Date__c as voucher_start_dt'
    ,'Expiry_Date__c as expiry_date'
    ,'Employee_Id__c as employee_id'
    ,'Amount__c as amount'
    ,'Company_Id__c as company_id'
    ,'Redemption_Date__c as redemption_date'
    ,'Partial_Redemption_Allowed__c as partial_redemption_allowed'
    ,'Balance__c as remaining_balance'
    ,'Redeemed_Amount__c as redeemed_amount'
    ,'Original_Amount__c as original_amount')

display(df_csv_voucher)

In [None]:
#display(df_csv_voucher.select('employee_id','company_id','status','voucher_start_dt','voucher_code').where("voucher_code like '20240228%'"))

## print duplicate voucher codes from Voucher API

In [None]:
#display(df_csv_voucher_dups)

In [None]:
print(df_csv_voucher.count())

## get data from Fact.orders 

In [None]:
# from notebookutils import mssparkutils

# import sys  
# from pyspark.sql import SparkSession  
  
# sc = SparkSession.builder.getOrCreate()  
# token_library = sc._jvm.com.microsoft.azure.synapse.tokenlibrary.TokenLibrary  
# #token_library.getSecret("kv-name", "secret-name", "linked-service")  
# jdbcPassword = token_library.getSecret(kv_name, "SqlAdmin", "ls_kv_adap")  
# print(jdbcPassword)

# #jdbcHostname = 'az-www-datahub-nonprod-dev-adap-sql.database.windows.net'
# #"azwwwnonproddevadapsyn01.sql.azuresynapse.net"
# #jdbc:sqlserver://azwwwnonproddevadapsyn01.sql.azuresynapse.net:1433;database=syndw01;user=sqlAdmin@azwwwnonproddevadapsyn01;password={your_password_here};encrypt=true;trustServerCertificate=false;hostNameInCertificate=*.sql.azuresynapse.net;loginTimeout=30;
# jdbcPort = 1433

# #az-www-datahub-prod-prd-adap-sql.database.windows.net

# jdbcDatabase = "dw"

# jdbcUsername = "sqlAdmin"

# #jdbcPassword = mssparkutils.credentials.getSecret('az-www-dev-adap-kv', 'SqlAdmin')

# jdbcDriver = "com.microsoft.sqlserver.jdbc.SQLServerDriver"

# #url = s"jdbc:sqlserver://${database_host}:${database_port}/${database_name}"

# table = "fact.orders"

# jdbcUrl = f"jdbc:sqlserver://{jdbcHostname}:{jdbcPort};databaseName={jdbcDatabase}"

# df_fact_orders = spark.read.format("jdbc")\
# .option("driver", jdbcDriver)\
# .option("url", jdbcUrl)\
# .option("dbtable", table)\
# .option("user", jdbcUsername)\
# .option("password", jdbcPassword).load()
# display(df_fact_orders.select("*"))
df_fact_orders = spark.read.format('delta').load(f'{bronze_adls_path}SAP/BW/ZSOM_D12')
#display(df_fact_orders.where("`/BIC/ZSOMOSMNM` IN ('T1080039863')"))
df_fact_orders = df_fact_orders.selectExpr('`/BIC/ZSOMOSMNM` as order_number'
                                ,'`/BIC/ZUPCU` as upc' # ProductCode
                                ,'`/BIC/ZMATNUM` as stock_number' # StockNumber
                                ,'`/BIC/YQTY` as quantity' 
                                ,'`/BIC/ZSHOEWD`  as width'
                                ,'`/BIC/ZSHOESZ` as size'
                                ,'`/BIC/ZSOMOSTTL` as pre_tax_total' # PreTaxTotal/pre_tax_total
                                ,'`/BIC/ZCNTYTAX` as  item_tax' # TaxAmount1/item_tax
                                #,'round(`/BIC/ZSOMOTTL`,3) as  order_total' #GrandTotal/order_total
                                ,'round(`/BIC/ZSOMOSTTL`,3) + round(`/BIC/ZCNTYTAX`,3) as order_total'
                                ,'`/BIC/ZREC_TYPE` as record_type' # RecordType/record_type
                                ,'`ERDAT` as date_key' #OrderCreatedDate
                                ,'`/BIC/ZSOMDLVST` as ShipToState' #ShipToRegion or ship_to_state
                                ,'`/BIC/YAMOUNT` as sale_price' #UnitPrice
                                ,'`/BIC/ZSOMVEMP` as voucheremployeeid' #voucheremployeeid
                                ,'`/BIC/ZSOMVSTE` as vouchersiteid' # vouchersiteid
                                ,'`/BIC/ZSOMVBBU` as voucherbrandbusinessunit' # VoucherBrandBusinessUnit
                                ,'`/BIC/ZSOMLINE` as LineNumber'  
                                ,'round(`/BIC/ZSOMVOUPY`,3) as LineItemSubsidyCharge' #LineItemSubsidyCharge
                                ,'round(`RPA_TAT`,3) as LineItemCreditCardCharge' # TenderValue/LineItemCreditCardCharge                                
                                ,'`/BIC/ZSOMOISST` as ship_status'
                                ,'`/BIC/ZSOMSHPDT` as ship_date'
                                ,'`/BIC/YSHIPCHRG` as ship_charge'
                                ,'`/BIC/ZSOMVOURF` as voucher_refund_amt'
                                ,'`/BIC/ZSOMRFAMT` as cc_refund_amt' 
                                ,'`/BIC/ZRPLCSKU` as replacement_upc'
                                ,'`/BIC/ZSOMRTNDT` as return_exchange_date'
                                ,'row_number() over(partition by `/BIC/ZSOMOSMNM`,`/BIC/ZREC_TYPE`,`/BIC/ZSOMLINE` order by  `AEDAT` DESC,`/BIC/ZAEDAT` DESC,`/BIC/ZCHGTIME` DESC) as rn') 
#display(df_fact_orders.select('*').where("order_number = 'T1080030340'"))
#display(df_fact_orders.where("order_number IN ('T1080030340')"))
df_fact_orders = df_fact_orders.where('rn=1')
display(df_fact_orders)
#display(df_fact_orders.where("order_number IN ('T1080039863')"))

#Original where clause - commented out 8/1/2024 - josh hintz
#df_fact_order_one_upc = df_fact_orders.selectExpr('order_number','upc','LineNumber')\
#                        .where("record_type = 'O' and upc is not null and upc != 'SHIPMENTCOSTS'")

#New where clause - added 8/1/2024 - josh hintz
df_fact_order_one_upc = df_fact_orders.selectExpr('order_number','upc','LineNumber')\
                        .where("record_type in ('O','E') and upc is not null and upc != 'SHIPMENTCOSTS'")

df_fact_order_one_upc = df_fact_order_one_upc\
       .selectExpr('order_number','first(`upc`) over(partition by `order_number` order by `upc`,`LineNumber`) as upc')\
       .distinct()                       
#display(df_fact_order_one_upc.where("order_number IN ('T1080033220','T1080033222','T1080033221')"))
df_fact_ordersT = df_fact_orders.selectExpr('order_number', 'record_type as rec_type','LineItemSubsidyCharge','LineItemCreditCardCharge','voucher_refund_amt','cc_refund_amt')\
.where("rec_type='T' ")
df_fact_ordersT = df_fact_ordersT.join(df_fact_order_one_upc,['order_number'],"inner")
#display(df_fact_ordersT.where("order_number IN ('T1080045391')")) #record_type='T' and LineItemSubsidyCharge>0 
#display(df_fact_orders.where("order_number IN ('T1080045391')"))
#df_fact_orders.printSchema()
 

## join  SFCC and DPCCE on employeeId ,accountName/company_id and voucher_code

In [None]:
dpcce_join_sfcc = ''
dpcce_join_sfcc = dfcustom_attributes.alias('da').join(dfPayment.alias('dfp'),(dfcustom_attributes.order_no==dfPayment.order_no),"left")
dpcce_join_sfcc = dpcce_join_sfcc.join(df_csv_voucher.alias('dc'), ((df_csv_voucher.employee_id==dfcustom_attributes.employeeID) & (F.lower(df_csv_voucher.company_id)==F.lower(dfcustom_attributes.accountName)) & (dfPayment.voucher_code ==df_csv_voucher.voucher_code)) ,"left" )
dpcce_join_sfcc = dpcce_join_sfcc.selectExpr("da.order_no as order_no"
,'da.`product_id` as upc'
,"da.order_date_time as order_date_time"
,"cast(da.order_date_time as date) as order_date"
,"da.brandBusinessUnit as brand_business_unit" 
,"da.accountName as account_name"
,"da.employeeID as employee_id"
,"da.lastName as last_name"
,"da.firstName as first_name"
,"dc.voucher_start_dt" 
,"dc.voucher_code"
,"dc.status as voucher_status"
,"dc.partial_redemption_allowed"
,"dc.remaining_balance"
,"dc.redeemed_amount"
,"dc.original_amount"
,'da.employeeLocation as b2b_customer_location'
,'da.costCenter as cost_center'
,"da.districtOrLocationCodeOrBranch as b2b_location_code" ## b2b_location_code 
,"da.storeOrDept as b2b_store_no" #B2B - STORE # 
,"da.salesCenter as b2b_sales_center" #B2B - SALES CENTER #
,"da.POLocation as b2b_po_location" #B2B PO-Location
,"da.blanketPO as b2b_blanket_po"
,"da.POValue as POValue" #this is po_number from SFCC--pending,this field is not there yet, only applies for a PO enabled site 
##,"round(dfp.voucher_amount,3) as line_item_subsidy_charge"
)

#dpcce_join_sfcc = dpcce_join_sfcc.alias('sfcc1').join(dfPayment.alias('dp'),['voucher_code'],"inner")
#dpcce_join_sfcc = dpcce_join_sfcc.selectExpr("sfcc1.*",'dp.is_redeemed','dp.is_subsidy','dp.voucher_amount as line_item_subsidy_charge','dp.cc_amount_paid as line_item_cc_charge')
display(dpcce_join_sfcc.select('*').distinct())

In [None]:
display(dfcustom_attributes.select('accountName').distinct())

In [None]:
df_state_codes = spark.read.format('csv')\
       .option("header", "true")\
       .load(f"{raw_adls_path}b2b2c/state_codes/US_state_codes.csv")
display(df_state_codes)       

## join with fact_orders

In [None]:
dpcce_sfcc_fact_orders=''
dpcce_sfcc_fact_orders = dpcce_join_sfcc.alias('sf')\
.join(df_fact_orders.alias('fo'),(dpcce_join_sfcc.order_no == df_fact_orders.order_number)&  (dpcce_join_sfcc.upc == df_fact_orders.upc) ,"inner")\
.join(df_state_codes.alias('scodes'),(df_fact_orders.ShipToState == df_state_codes.us_state),"left")\
.join(df_fact_ordersT.alias('fot'),((df_fact_ordersT.order_number == df_fact_orders.order_number)&(df_fact_orders.LineNumber == '00000000001')),"left")
#.join(df_fact_ordersT.alias('fot'),((df_fact_ordersT.order_number == df_fact_orders.order_number)&(df_fact_ordersT.upc == df_fact_orders.upc)&(df_fact_orders.LineNumber == '00000000001')),"left")
dpcce_sfcc_fact_orders = dpcce_sfcc_fact_orders.selectExpr('sf.*'
,'fo.stock_number'
,'fo.quantity as product_qty'
,'fo.width'
, "case when cast(ltrim('0',fo.size) as int) >20 then cast(ltrim('0',fo.size) as int)/10 else cast(ltrim('0',fo.size) as int) end as size"
,'fo.pre_tax_total as line_item_price'
,'fo.item_tax line_item_tax'
,'fo.order_total line_item_total'
,'fo.record_type as order_type'
,'fo.date_key'
,'fo.shipToState as shipping_state'
,'fo.sale_price'
,'fo.voucheremployeeid as voucher_emp_id'
,'fo.vouchersiteid as voucher_site_id'
,'fo.voucherbrandbusinessunit as voucher_bbusiness_unit'
#,'round(nvl(fot.LineItemCreditCardCharge,0),3) - round(nvl(fot.cc_refund_amt,0),3) as line_item_credit_card_charge'
,'round(nvl(fot.LineItemCreditCardCharge,0),3) as line_item_credit_card_charge'
#subtracting voucher refund amt to adjust line_item_subsidy_charge
#,'round(nvl(fot.LineItemSubsidyCharge,0),3) - round(nvl(fot.voucher_refund_amt,0),3) as line_item_subsidy_charge'
,'round(nvl(fot.LineItemSubsidyCharge,0),3) as line_item_subsidy_charge'
#,'case when fo.order_total > round(nvl(fot.LineItemSubsidyCharge,0),3) then round(nvl(fot.LineItemSubsidyCharge,0),3) else round(fo.order_total,3) end as balance_national_acc_owes'
#subtracting voucher refunda mt to adjust balance_national_acc_owes
,'round(nvl(fot.LineItemSubsidyCharge,0),3) - round(nvl(fot.voucher_refund_amt,0),3) as balance_national_acc_owes'
#,'round(nvl(fot.LineItemSubsidyCharge,0),3) as balance_national_acc_owes'
,'fo.ship_status as line_item_status'
,'round(fo.quantity*fo.pre_tax_total,3) as subtotal'
,'scodes.code as shipping_state_short'
,'fo.size as size_original'
,"to_date(fo.ship_date,'yyyyMMdd') as ship_date"
,"ship_charge"
,"(fot.voucher_refund_amt * -1) as voucher_refund_amt"
,"(fot.cc_refund_amt * - 1) as cc_refund_amt"
,"fo.replacement_upc as replacement_upc"
,"to_date(fo.return_exchange_date,'yyyyMMdd') as return_exchange_date"
,"from_utc_timestamp(current_timestamp(),'EST5EDT') as last_update_ts")
dpcce_sfcc_fact_orders.createOrReplaceTempView('order_voucher_data')
#display(dpcce_sfcc_fact_orders.select('*'))
#display(dpcce_sfcc_fact_orders.where("sf.order_no IN ('T1080045391','T1080042899')"))

In [None]:
%%sql
select * from (select * , row_number() over(partition by order_no,upc, employee_id, voucher_code,order_type order by order_date_time desc) rn 
from order_voucher_data) t where t.rn = 1;

## print duplicates from integrated(sfcc, fact.orders, voucher) data set

In [None]:
df_order_voucher_dups = spark.sql(f"""select * from (select * , row_number() over(partition by order_no, upc,employee_id, voucher_code,order_type order by order_date_time desc) rn 
from order_voucher_data) t where t.rn > 1""")
display(df_order_voucher_dups)

## select one row per orderno+voucherno and empid 

In [None]:
#dpcce_sfcc_fact_orders = spark.sql(f"""select * from (select * , row_number() over(partition by order_no,upc, employee_id, voucher_code, order_type order by order_date_time desc) rn 
#from order_voucher_data) t where t.rn = 1""")
#dpcce_sfcc_fact_orders = dpcce_sfcc_fact_orders.drop('rn')
#display(dpcce_sfcc_fact_orders)


## Write to DataFrame out (df_out) and perform final transformations

In [None]:
#Change line item status for exchanges
df_out = dpcce_sfcc_fact_orders.withColumn("line_item_status", when(col("replacement_upc").isNull(), col("line_item_status")).otherwise("EXCHANGED"))

#Update aggregate amounts on returns
df_out = df_out.withColumn("line_item_subsidy_charge", when(col("order_type") == "O", col("line_item_subsidy_charge")).otherwise("0.00").cast('decimal(12,2)'))
df_out = df_out.withColumn("balance_national_acc_owes", when(col("order_type") == "O", col("balance_national_acc_owes")).otherwise("0.00").cast('decimal(12,2)'))
df_out = df_out.withColumn("voucher_refund_amt", when(col("order_type") == "O", col("voucher_refund_amt")).otherwise("0.00").cast('decimal(12,2)'))
df_out = df_out.withColumn("cc_refund_amt", when(col("order_type") == "O", col("cc_refund_amt")).otherwise("0.00").cast('decimal(12,2)'))
df_out = df_out.withColumn("line_item_credit_card_charge", when(col("order_type") == "O", col("line_item_credit_card_charge")).otherwise("0.00").cast('decimal(12,2)'))
df_out = df_out.withColumn("subtotal",when(col("order_type") == "O", col("subtotal")).otherwise(col("subtotal") * -1).cast('decimal(12,2)'))

#Update line item amounts on return-exchange lines - we do not want to subtract these values as it causes false reporting - value should remain
df_out = df_out.withColumn("product_qty", when(concat(df_out["line_item_status"], df_out["order_type"]) == "EXCHANGEDR", 0).otherwise(col("product_qty")))
df_out = df_out.withColumn("line_item_price", when(concat(df_out["line_item_status"], df_out["order_type"]) == "EXCHANGEDR", 0).otherwise(col("line_item_price")))
df_out = df_out.withColumn("line_item_total", when(concat(df_out["line_item_status"], df_out["order_type"]) == "EXCHANGEDR", 0).otherwise(col("line_item_total")))
df_out = df_out.withColumn("sale_price", when(concat(df_out["line_item_status"], df_out["order_type"]) == "EXCHANGEDR", 0).otherwise(col("sale_price")))
df_out = df_out.withColumn("subtotal", when(concat(df_out["line_item_status"], df_out["order_type"]) == "EXCHANGEDR", 0).otherwise(col("subtotal")))

#Convert order_date_time from UTC to EST and overwrite order_date with this
df_out = df_out.withColumn('order_date_time', F.from_utc_timestamp('order_date_time', 'America/New_York'))
df_out = df_out.withColumn('order_date', F.date_format(col("order_date_time"),"yyyy-MM-dd").cast(DateType()))

#display(df_out.where("order_no IN ('T1080044631')"))

## Write dataframes to parquet files

In [None]:
#Delete yesterday's data and re-create new folder with current data
if mssparkutils.fs.exists(gold_adls_path + 'b2b2c/order_history_data_full') == True:
  mssparkutils.fs.rm(gold_adls_path + 'b2b2c/order_history_data_full',True)
  df_out.write.parquet(gold_adls_path + 'b2b2c/order_history_data_full')
else:
  df_out.write.parquet(gold_adls_path + 'b2b2c/order_history_data_full')

if mssparkutils.fs.exists(gold_adls_path + 'b2b2c/voucher_master_data_full') == True:
  mssparkutils.fs.rm(gold_adls_path + 'b2b2c/voucher_master_data_full',True)
  df_csv_voucher.write.parquet(gold_adls_path + 'b2b2c/voucher_master_data_full')
else:
  df_csv_voucher.write.parquet(gold_adls_path + 'b2b2c/voucher_master_data_full')

#Legacy delta lake load - will maybe revisit in the future to work out de-dupes.
#dict_tables = {
#      "b2b2c/order_voucher_data": { "source_df": dpcce_sfcc_fact_orders
#                 ,"where_condition":"""target.order_no = source.order_no 
#                  and target.upc = source.upc 
#                  and target.employee_id = source.employee_id 
#                  and target.voucher_code= source.voucher_code 
#                  and target.order_type = source.order_type"""                        
#                 ,"target_table":"lakedb_gold.b2b2c_order_history_data2" 
#                 ,"partition":"order_date"
#                 ,"path": gold_adls_path + 'b2b2c/order_history_data2'
#                       
#                   }
#      ,"b2b2c/voucher_master_data": 
#                { "source_df": df_csv_voucher
#                 ,"where_condition":"target.voucher_code = source.voucher_code"                        
#                 ,"target_table":"lakedb_gold.b2b2c_voucher_master_data2" 
#                 ,"partition":""
#                 ,"path": gold_adls_path + 'b2b2c/voucher_master_data2'
#                }
#                            
#               }
#perform_merge(dict_tables)   
               

## exit the notebook - send an email about the processing

In [None]:
#move the processed files to archive
if  v_xml_file_cnt >=1:  
  # for j in mssparkutils.fs.ls(f"{raw_path}sfcc/"):
  #   if j.size>0:  
  #     print(f'moving ', j.name, ' to archive' )
  #     mssparkutils.fs.mv(f"{raw_path}sfcc/{j.name}", f"{raw_path}sfcc/archive/{j.name}",overwrite=True)
  mssparkutils.notebook.exit("sfcc file")  

elif  v_xml_file_cnt <1:
  mssparkutils.notebook.exit("None")   

 

In [None]:
# df_fact_order = spark.read.format('delta').load('abfss://bronze@azwwwnonproddevadapadls.dfs.core.windows.net/SAP/BW/ZSOM_D12')
# (len(df_fact_order.columns))
# cols1= ['YACTDATE', 'YAMOUNT', 'YARTICLE', 'YORGPLANT', 'YPLANT', 'YQTY', 'YREASON', 'YREGISTER', 'YRTLCOST', 'YSHIPCHRG', 'YSLSPRSN', 'YTAXAMT1', 'YTAXAMT2', 'ZAEDAT', 'ZCARRIER', 'ZCHGTIME', 'ZCOUPON', 'ZCOUPON2', 'ZCOUPON3', 'ZDEMANDLC', 'ZDISCMSG', 'ZDISCMSGP', 'ZFINALSAL', 'ZINVOICED', 'ZLOCALE', 'ZMATNUM', 'ZREC_TYPE', 'ZRPA_TCD', 'ZRTMSRP', 'ZSALETYPE', 'ZSDTRACK', 'ZSHOESZ', 'ZSHOEWD', 'ZSKU', 'ZSOMACTID', 'ZSOMBILCC', 'ZSOMBILCT', 'ZSOMBILPC', 'ZSOMBILST', 'ZSOMCSRNM', 'ZSOMDLVCC', 'ZSOMDLVCT', 'ZSOMDLVPC', 'ZSOMDLVST', 'ZSOMFSTAT', 'ZSOMFTIME', 'ZSOMGCORD', 'ZSOMGCTND', 'ZSOMHSTAT', 'ZSOMIACDT', 'ZSOMIACTM', 'ZSOMLINE', 'ZSOMODISC', 'ZSOMODLMD', 'ZSOMOISST', 'ZSOMOODT', 'ZSOMOORD', 'ZSOMOOTM', 'ZSOMOPTIN', 'ZSOMOSMNM', 'ZSOMOSTTL', 'ZSOMOTTL', 'ZSOMPDISC', 'ZSOMPYST', 'ZSOMRFST', 'ZSOMRTNDT', 'ZSOMSAPTD', 'ZSOMSAPTT', 'ZSOMSDISC', 'ZSOMSHPDT', 'ZSOMSLMD', 'ZSOMSTIME', 'ZSOMTYPE', 'ZSTOREID', 'ZUPCU', 'ZVATVAL', 'AEDAT', 'BASE_UOM', 'CALDAY', 'CALMONTH', 'CALMONTH2', 'CALWEEK', 'CALYEAR', 'CREA_TIME', 'CURRENCY', 'DOC_CURRCY', 'DOC_NUMBER', 'EMPLOYEE', 'ERDAT', 'FISCPER', 'FISCPER3', 'FISCVARNT', 'FISCYEAR', 'LOAD_DATE', 'LOC_CURRCY', 'RPA_DTC', 'RPA_REA', 'RPA_TAT', 'RPA_TCD', 'RPA_TTC', 'SR_COUNTER', 'TIME', 'ZEMPPURCH', 'ZPRODEAL', 'ZISMOBILE', 'ZISCUSTOM', 'ZCSRORD', 'ZPCIPAL', 'ZPREORDER', 'ZOUTLETF', 'ZINSTORE', 'ZSTORETYP', 'ZSOMSLSCH', 'ZISLEGACY', 'ZSOMCRDT', 'ZSOMVEMP', 'ZSOMVSTE', 'ZSOMVBBU', 'ZSOMVOUPY']
# cols_renamed = ['ActualTransactionDate', 'UnitPrice', 'RetailProductCode', 'OriginatingStore', 'Site', 'Quantity', 'ReturnReason', 'Register', 'RetailCost', 'ShippingCharges', 'StoreSalesAssociate', 'TaxAmount1', 'TaxAmount2', 'ItemChangedDate', 'ShipmentCarrier', 'ItemChangedTime', 'ShipCouponCode', 'OrderCouponCode', 'ProductCouponCode', 'DemandLocalCurrency', 'OrderPromoMessage', 'ProductPromoMessage', 'FinalSale', 'Invoiced', 'Locale', 'StockNumber', 'RecordType', 'GiftCardPayIndicator', 'ListPrice', 'SaleType', 'TrackingNumber', 'ItemSize', 'ItemWidth', 'StockKeepingUnit', 'CustomerAccountId', 'BillToCountryCode', 'BillToCity', 'BillToPostalCode', 'BillToRegion', 'CSRWWWUserId', 'ShipToCountryCode', 'ShipToCity', 'ShipToPostalCode', 'ShipToRegion', 'FulfillmentStatus', 'FulfillOrderTime', 'GiftCardOrderIndicator', 'GiftCardTenderValue', 'OrderStatus', 'ItemAdjCreatedDate', 'ItemAdjCreatedTime', 'LineNumber', 'OrderDiscount', 'ShippingMethod', 'ItemStatus', 'OriginalOrderDate', 'OriginalOrderNumber', 'OriginalOrderTime', 'OptInFlag', 'OrderNumber', 'PreTaxTotal', 'GrandTotal', 'ProductDiscount', 'PaymentStatus', 'RefundStatus', 'ReturnDate', 'SAPTransactDate', 'SAPTransactTime', 'ShippingDiscount', 'ShipmentCreateDate', 'ShipLastModifyDate', 'ShipmentCreateTime', 'OrderType', 'CidStoreId', 'ProductCode', 'VATTax', 'OrderChangedDate', 'UnitOfMeasure', 'OrderedDate', 'CalendarYearMonth', 'CalendarMonth', 'OrderedWeek', 'CalendarYear', 'OrderCreatedTime', 'CurrencyKey', 'DocumentCurrency', 'SAPSalesOrder', 'EmployeeNumber', 'OrderCreatedDate', 'FiscalYearPeriod', 'FiscalPeriod', 'FiscalCalendar', 'FiscalYear', 'BWLoadDate', 'LocalCurrency', 'DiscountTypesFlag', 'TotalDiscount', 'TenderValue', 'MeansOfPayment', 'TransType', 'NumberOfTransactions', 'OrderedTime', 'EmployeePurchaseFlag', 'ProDealFlag', 'MobileOrderFlag', 'CustomOrderFlag', 'CsrOrderFlag', 'PciPalFlag', 'PreOrderFlag', 'OutletFlag', 'InStoreFlag', 'StoreType', 'SalesChannel', 'LegacyFlag', 'CancelReturnDate', 'VoucherEmployeeID', 'VoucherSiteID', 'VoucherBrandBusinessUnit', 'LineItemSubsidyCharge']
# #cols_renamed_filter = [x for x in cols_renamed if x in []
# cols_zip = list(zip(cols1,cols_renamed))
# for j in cols_zip:
#   print(j[0]+' as '+j[1])
# # display(spark.sql("""select * from parquet.`abfss://raw@azwwwnonproddevadapadls.dfs.core.windows.net/SAP/BW/ZSOM_D12/2024/03/25/2024-03-25T17:59:31.9176378Z.parquet`
# # where ZSOMOSMNM like 'T1080030340%'"""))  
