# Sales combined union


## Imports

In [None]:
##import requests
##import json
from pyspark.sql.functions import lit
##from msal import ConfidentialClientApplication
from azure.storage.blob import BlobServiceClient
##from io import BytesIO
from notebookutils import mssparkutils
import urllib.parse
import re
#import pyspark.pandas as ps
import pandas as pd
from pyspark.sql.functions import col, to_date, coalesce, regexp_replace, expr, unbase64,col, hex
from pyspark.sql import functions as F, Window
from delta.tables import DeltaTable
import os

## Includes

In [None]:
%run /utils/common_functions

### Set Configuration and Get Secrets

In [None]:
# SharePoint API Details
tenant_id = "deace5d6-717b-4f79-ab12-6357206c0c36"

match = re.search(r'@([^.]+)\.dfs\.core\.windows\.net', raw_adls_path)
storage_account = match.group(1) if match else None
print(f"storage_account: {storage_account}")

source_container = "bronze"
target_container = "gold"

## Get Source data

In [None]:
ns_path = f"abfss://{target_container}@{storage_account}.dfs.core.windows.net/NewStore/"
foj_path = f"abfss://raw@{storage_account}.dfs.core.windows.net/Snowflake/ANALYTICS_PROD/ANALYTICS_DATA/FACT_ORDER_JOURNEY_PRE_POST_BC/*.parquet"
cust_path = f"abfss://raw@{storage_account}.dfs.core.windows.net/Snowflake/ANALYTICS_PROD/ANALYTICS_DATA/DIM_CUSTOMER_MASTER/*.parquet"
target_path = f"abfss://{target_container}@{storage_account}.dfs.core.windows.net/SalesUnion/"
price_path = f"abfss://{target_container}@{storage_account}.dfs.core.windows.net/SAP/AFS/ProductPricingHistorySimplified/"
hub_path = f"abfss://raw@{storage_account}.dfs.core.windows.net/Snowflake/ANALYTICS_PROD/DV_RDV/HUB_CUSTOMER/*.parquet"
exchange_path = f"abfss://gold@{storage_account}.dfs.core.windows.net/SAP/BW/FxRatesExtendedCalendarDay/"
custkey_path = f"abfss://silver@{storage_account}.dfs.core.windows.net/mParticle/CustomerKeyLookup/"
site_path = f"abfss://gold@{storage_account}.dfs.core.windows.net/SAP/BW/Site/"
ecom_path = f"abfss://gold@{storage_account}.dfs.core.windows.net/SOM/rptEcommerceDemand/"
loc_path = f"abfss://raw@{storage_account}.dfs.core.windows.net/Snowflake/ANALYTICS_PROD/ANALYTICS_DATA/DIM_LOCATION/*.parquet"
custmaster_path = f"abfss://gold@{storage_account}.dfs.core.windows.net/mParticle/CustomerMasterHistory/"

df1 = spark.read.format("delta").load(ns_path)
df_price = spark.read.format("delta").load(price_path)
foj_df = spark.read.format("parquet").load(foj_path)
ex_df = spark.read.format("delta").load(exchange_path)
hub_df = spark.read.format("parquet").load(hub_path)
cust_df = spark.read.format("parquet").load(cust_path)
custkey_df = spark.read.format("delta").load(custkey_path)
site_df = spark.read.format("delta").load(site_path)
ecom_df = spark.read.format("delta").load(ecom_path)
loc_df = spark.read.format("parquet").load(loc_path)
custmaster_df = spark.read.format("delta").load(custmaster_path)

##sf_df.show(2)

#print(f"ns_path: {ns_path}")
#print(f"target_path: {target_path}")
#print(f"foj_path: {foj_path}")
#print(f"exchange_path: {exchange_path}")
#print(f"hub_path: {hub_path}")
#print(f"price_path: {price_path}")
#print(f"cust_path: {cust_path}")
#print(f"custkey_path: {custkey_path}")


In [None]:
### add filters 
###ex_df = ex_df.filter(ex_df.ExchangeRateType == 'Z1')
hub_df2 = hub_df.select(
        hex("CUSTOMER_HKEY").alias("CUSTOMER_HKEY_STRING"),
        regexp_replace(col("CUSTOMER_KEY"), ".EDW", "").alias("CUSTOMER_KEY")
        )
foj_df = foj_df.withColumn("OrderDate", to_date(col("Derived_Order_Datetime")))

cust_df = cust_df.withColumn(
    "RN",
    F.row_number().over(Window.partitionBy("GLOBAL_CUSTOMER_HKEY").orderBy(F.col("EFFECTIVE_FROM").desc()))
).filter("RN = 1")

ns_df = df1.filter(F.col("Tenant").like("%sweatybetty%")).withColumnRenamed("Date", "Date1")

custkey_mp_df = custkey_df.withColumn("RN",F.row_number().over(Window.partitionBy("NewStoreId").orderBy(F.col("EffectiveFrom").desc()))).filter("RN = 1")
custkey_email_df = custkey_df.withColumn("RN",F.row_number().over(Window.partitionBy("Email").orderBy(F.col("EffectiveFrom").desc()))).filter("RN = 1")

# ecom filters
ecom_df = ecom_df.where("LineNumber <> '1000' AND RecordType = 'O' AND SalesChannel = 'SWEATYBETTY.COM'")
### create temp views 

custmaster_df = custmaster_df.select("mparticleuserkey","isOptinInsider","IsFitnessInstructor","ValidFrom","ValidTo","OptintoInsiderDate")

foj_df.createOrReplaceTempView("foj_table")
hub_df2.createOrReplaceTempView("hub_table")
ex_df.createOrReplaceTempView("exchangerate")
cust_df.createOrReplaceTempView("cust_table")
ns_df.createOrReplaceTempView("ns_table")
custkey_mp_df.createOrReplaceTempView("cust_mp")
custkey_email_df.createOrReplaceTempView("cust_email")
site_df.createOrReplaceTempView("site")
ecom_df.createOrReplaceTempView("ecom_table")
loc_df.createOrReplaceTempView("loc_table")

### NewStore table

In [None]:
ns_result = spark.sql("""SELECT
		Tenant,
		coalesce(mp.MparticleUserID, e.MparticleUserID) as MparticleUserID,
		CustomerId,
		ExternalId as OrderId,
		cast(null as varchar(200)) as OrderNumber,
		cast(null as varchar(200)) as OrderLineNumber,
		DiscountId,
		case when coalesce(Discount,0) <> 0 then 'Y' else 'N' end as DiscountFlag,
		Product,
		UPC as Barcode,
		Case when IsReturn = 'true' then 'Returns' else 'Sales' end as TransactionType,
		cast(null as varchar(200)) as TransactionStatus,
		l.reporting_channel_top_level as TransactionChannel,
		cast(null as varchar(200)) as DocumentNumber,
		cast(null as varchar(200)) as DocumentLineNumber,
		Date1 as OrderDate,
		to_timestamp(DateTime) as OrderDateTime,
		DateTimeGreenwichmean as OrderDateTimeUTC,
		case when IsReturn = 'true' then 1 else 0 end as IsReturn,
		Currency,
		PriceIncludingTax as sales_value_incl_tax,
		PriceIncludingTaxUSD as sales_value_incl_tax_usd,
		PriceIncludingTaxGBP as sales_value_incl_tax_gbp,
		PriceIncludingTax as order_value_incl_tax,
		PriceIncludingTaxUSD as order_value_incl_tax_usd,
		PriceIncludingTaxGBP as order_value_incl_tax_gbp,
		ArticleMSRP as sales_original_rrp_value_incl_tax,
		case when PriceMethod = 'tax_excluded' then Price + Discount 
			 when PriceMethod = 'tax_included' then PriceIncludingTax + Discount 
		end as sales_pos_rrp_value_incl_tax,

		case when PriceMethod = 'tax_excluded' then PriceUSD + DiscountUSD 
			 when PriceMethod = 'tax_included' then PriceIncludingTaxUSD + DiscountUSD 
		end as sales_pos_rrp_value_incl_tax_usd,

		case when PriceMethod = 'tax_excluded' then PriceGBP + DiscountGBP 
			 when PriceMethod = 'tax_included' then PriceIncludingTaxGBP + DiscountGBP 
		end as sales_pos_rrp_value_incl_tax_gbp,

		Price as sales_value_excl_tax,
		PriceUSD as sales_value_excl_tax_usd,
		PriceGBP as sales_value_excl_tax_gbp,
		quantity as Salesquantity,
		quantity as Orderquantity,
		NS.Store,
		s.CountryKey as shippingcountrycode,
		PriceTypeDerived as PriceType,
		CASE WHEN CURRENCY = 'USD' THEN  sales_original_rrp_value_incl_tax 
			ELSE  sales_original_rrp_value_incl_tax * usd.FxRate end as sales_original_rrp_value_incl_tax_usd,
		CASE WHEN CURRENCY = 'GBP' THEN  sales_original_rrp_value_incl_tax 
			ELSE  sales_original_rrp_value_incl_tax * gbp.FxRate end as sales_original_rrp_value_incl_tax_gbp,
		cast (null as varchar (2000)) as PromoId,
		cast(null as boolean) as IsLegacyOrder,
		'NewStore' as Source
	FROM ns_table NS
	LEFT JOIN exchangerate usd ON NS.Date1 = usd.CalendarDay and NS.Currency = usd.fromCurrency and usd.ToCurrency = 'USD' and usd.ExchangeRateType = 'Z1'
	LEFT JOIN exchangerate gbp ON NS.Date1 = gbp.CalendarDay and NS.Currency = gbp.fromCurrency and gbp.ToCurrency = 'GBP' and gbp.ExchangeRateType = 'Z1'
	LEFT JOIN cust_mp mp on mp.NewStoreId = NS.CustomerId
	LEFT JOIN cust_email e on e.Email = NS.CustomerEmail
	LEFT JOIN site s on s.site = NS.store
	LEFT JOIN loc_table l on l.store_key = ns.store and l.store_key is not null
""")

##ns_result.show(2)


### ECOM/SOM Data
- Deduping: Exclude records from SOM if Digital Records already exist in Snowflake data (foj_table)

In [None]:

ecom_result = spark.sql("""SELECT
		'sweatybetty'  as Tenant,
		ecom.MparticleId as MparticleUserID,
		ecom.AccountId as CustomerId,
		ecom.OrderNumber as OrderId,
		ecom.OrderNumber as OrderNumber,
		ecom.LineNumber as OrderLineNumber,
		cast(null as varchar(200)) as DiscountId,
		case when coalesce(ecom.DiscountAmount,0) <> 0 then 'Y' else 'N' end as DiscountFlag,
		ecom.Material as Product,
		ecom.UPC as Barcode,
		case when ecom.OrderItemStatus = 'RETURNED' then 'Returns' else 'Sales' end as TransactionType,
        ecom.OrderItemStatus as TransactionStatus,
		'Digital' as TransactionChannel,
		ecom.OrderNumber as DocumentNumber,
		ecom.LineNumber as DocumentLineNumber,
		cAST(ecom.d_OrderedDate as DATE) as OrderDate,
		to_timestamp(ecom.OrderedDate) as OrderDateTime,
        to_timestamp(ecom.OrderedDate) as OrderDateTimeUTC,
		case when ecom.OrderItemStatus = 'RETURNED' then 1 else 0 end as IsReturn,
		ecom.CurrencyIsoCode as Currency,
		CAST(ecom.OriginalTotalLineAmountWithTax as decimal(38,3)) as sales_value_incl_tax,
		cast(CASE
            WHEN CurrencyIsoCode = 'GBP' AND OriginalTotalLineAmountWithTax IS NOT NULL THEN OriginalTotalLineAmountWithTax * FxGBPToUSD.FxRate
            WHEN CurrencyIsoCode = 'EUR' AND OriginalTotalLineAmountWithTax IS NOT NULL THEN OriginalTotalLineAmountWithTax * FxEURToUSD.FxRate
            WHEN CurrencyIsoCode = 'CAD' AND OriginalTotalLineAmountWithTax IS NOT NULL THEN OriginalTotalLineAmountWithTax * FxCADToUSD.FxRate
            WHEN CurrencyIsoCode = 'SEK' AND OriginalTotalLineAmountWithTax IS NOT NULL THEN OriginalTotalLineAmountWithTax * FxSEKToUSD.FxRate
            WHEN CurrencyIsoCode = 'USD' AND OriginalTotalLineAmountWithTax IS NOT NULL THEN OriginalTotalLineAmountWithTax
        END as decimal(17,2)) as sales_value_incl_tax_usd,
		cast(CASE 
            WHEN CurrencyIsoCode = 'USD' AND OriginalTotalLineAmountWithTax IS NOT NULL THEN OriginalTotalLineAmountWithTax * FxUSDToGBP.FxRate
            WHEN CurrencyIsoCode = 'CAD' AND OriginalTotalLineAmountWithTax IS NOT NULL THEN OriginalTotalLineAmountWithTax * FxCADToGBP.FxRate
            WHEN CurrencyIsoCode = 'EUR' AND OriginalTotalLineAmountWithTax IS NOT NULL THEN OriginalTotalLineAmountWithTax * FxEURToGBP.FxRate
            WHEN CurrencyIsoCode = 'SEK' AND OriginalTotalLineAmountWithTax IS NOT NULL THEN OriginalTotalLineAmountWithTax * FxSEKToGBP.FxRate
            WHEN CurrencyIsoCode = 'GBP' AND OriginalTotalLineAmountWithTax IS NOT NULL THEN OriginalTotalLineAmountWithTax 
        END as decimal(17,2)) as sales_value_incl_tax_gbp,
		CAST(OriginalTotalLineAmountWithTax as decimal(38,3)) as order_value_incl_tax,
		cast(CASE
            WHEN CurrencyIsoCode = 'GBP' AND OriginalTotalLineAmountWithTax IS NOT NULL THEN OriginalTotalLineAmountWithTax * FxGBPToUSD.FxRate
            WHEN CurrencyIsoCode = 'EUR' AND OriginalTotalLineAmountWithTax IS NOT NULL THEN OriginalTotalLineAmountWithTax * FxEURToUSD.FxRate
            WHEN CurrencyIsoCode = 'CAD' AND OriginalTotalLineAmountWithTax IS NOT NULL THEN OriginalTotalLineAmountWithTax * FxCADToUSD.FxRate
            WHEN CurrencyIsoCode = 'SEK' AND OriginalTotalLineAmountWithTax IS NOT NULL THEN OriginalTotalLineAmountWithTax * FxSEKToUSD.FxRate
            WHEN CurrencyIsoCode = 'USD' AND OriginalTotalLineAmountWithTax IS NOT NULL THEN OriginalTotalLineAmountWithTax
        END as decimal(17,2)) as order_value_incl_tax_usd,
		cast(CASE 
            WHEN CurrencyIsoCode = 'USD' AND OriginalTotalLineAmountWithTax IS NOT NULL THEN OriginalTotalLineAmountWithTax * FxUSDToGBP.FxRate
            WHEN CurrencyIsoCode = 'CAD' AND OriginalTotalLineAmountWithTax IS NOT NULL THEN OriginalTotalLineAmountWithTax * FxCADToGBP.FxRate
            WHEN CurrencyIsoCode = 'EUR' AND OriginalTotalLineAmountWithTax IS NOT NULL THEN OriginalTotalLineAmountWithTax * FxEURToGBP.FxRate
            WHEN CurrencyIsoCode = 'SEK' AND OriginalTotalLineAmountWithTax IS NOT NULL THEN OriginalTotalLineAmountWithTax * FxSEKToGBP.FxRate
            WHEN CurrencyIsoCode = 'GBP' AND OriginalTotalLineAmountWithTax IS NOT NULL THEN OriginalTotalLineAmountWithTax 
        END as decimal(17,2)) as order_value_incl_tax_gbp,
		ProductPriceHistoryListPrice as sales_original_rrp_value_incl_tax,
		CAST(OriginalUnitPrice as decimal(38,3)) as sales_pos_rrp_value_incl_tax,
		cast(CASE
            WHEN CurrencyIsoCode = 'GBP' AND OriginalUnitPrice IS NOT NULL THEN OriginalUnitPrice * FxGBPToUSD.FxRate
            WHEN CurrencyIsoCode = 'EUR' AND OriginalUnitPrice IS NOT NULL THEN OriginalUnitPrice * FxEURToUSD.FxRate
            WHEN CurrencyIsoCode = 'CAD' AND OriginalUnitPrice IS NOT NULL THEN OriginalUnitPrice * FxCADToUSD.FxRate
            WHEN CurrencyIsoCode = 'SEK' AND OriginalUnitPrice IS NOT NULL THEN OriginalUnitPrice * FxSEKToUSD.FxRate
            WHEN CurrencyIsoCode = 'USD' AND OriginalUnitPrice IS NOT NULL THEN OriginalUnitPrice
        END as decimal(17,2)) sales_pos_rrp_value_incl_tax_usd,
		cast(CASE 
            WHEN CurrencyIsoCode = 'USD' AND OriginalUnitPrice IS NOT NULL THEN OriginalUnitPrice * FxUSDToGBP.FxRate
            WHEN CurrencyIsoCode = 'CAD' AND OriginalUnitPrice IS NOT NULL THEN OriginalUnitPrice * FxCADToGBP.FxRate
            WHEN CurrencyIsoCode = 'EUR' AND OriginalUnitPrice IS NOT NULL THEN OriginalUnitPrice * FxEURToGBP.FxRate
            WHEN CurrencyIsoCode = 'SEK' AND OriginalUnitPrice IS NOT NULL THEN OriginalUnitPrice * FxSEKToGBP.FxRate
            WHEN CurrencyIsoCode = 'GBP' AND OriginalUnitPrice IS NOT NULL THEN OriginalUnitPrice 
        END as decimal(17,2)) as sales_pos_rrp_value_incl_tax_gbp,
		OriginalPreTaxSubTotal as sales_value_excl_tax,
		cast(CASE
            WHEN CurrencyIsoCode = 'GBP' AND OriginalPreTaxSubTotal IS NOT NULL THEN OriginalPreTaxSubTotal * FxGBPToUSD.FxRate
            WHEN CurrencyIsoCode = 'EUR' AND OriginalPreTaxSubTotal IS NOT NULL THEN OriginalPreTaxSubTotal * FxEURToUSD.FxRate
            WHEN CurrencyIsoCode = 'CAD' AND OriginalPreTaxSubTotal IS NOT NULL THEN OriginalPreTaxSubTotal * FxCADToUSD.FxRate
            WHEN CurrencyIsoCode = 'SEK' AND OriginalPreTaxSubTotal IS NOT NULL THEN OriginalPreTaxSubTotal * FxSEKToUSD.FxRate
            WHEN CurrencyIsoCode = 'USD' AND OriginalPreTaxSubTotal IS NOT NULL THEN OriginalPreTaxSubTotal
        END as decimal(17,2)) as sales_value_excl_tax_usd,
		cast(CASE 
            WHEN CurrencyIsoCode = 'USD' AND OriginalPreTaxSubTotal IS NOT NULL THEN OriginalPreTaxSubTotal * FxUSDToGBP.FxRate
            WHEN CurrencyIsoCode = 'CAD' AND OriginalPreTaxSubTotal IS NOT NULL THEN OriginalPreTaxSubTotal * FxCADToGBP.FxRate
            WHEN CurrencyIsoCode = 'EUR' AND OriginalPreTaxSubTotal IS NOT NULL THEN OriginalPreTaxSubTotal * FxEURToGBP.FxRate
            WHEN CurrencyIsoCode = 'SEK' AND OriginalPreTaxSubTotal IS NOT NULL THEN OriginalPreTaxSubTotal * FxSEKToGBP.FxRate
            WHEN CurrencyIsoCode = 'GBP' AND OriginalPreTaxSubTotal IS NOT NULL THEN OriginalPreTaxSubTotal 
        END as decimal(17,2)) as sales_value_excl_tax_gbp,
		CAST(OriginalQuantity as decimal(38,0)) as Salesquantity,
		CAST(OriginalQuantity as decimal(38,0)) as Orderquantity,
		cast(null as varchar(5)) as Store,
		DeliverToCountryCode as shippingcountrycode,
		cast(CASE
            WHEN ProductPriceHistoryListPrice <> OriginalTotalLineAmount THEN 'MD'
            ELSE 'FP'
        END as string) as PriceType,
		cast(CASE
            WHEN CurrencyIsoCode = 'GBP' AND ProductPriceHistoryListPrice IS NOT NULL THEN ProductPriceHistoryListPrice * FxGBPToUSD.FxRate
            WHEN CurrencyIsoCode = 'EUR' AND ProductPriceHistoryListPrice IS NOT NULL THEN ProductPriceHistoryListPrice * FxEURToUSD.FxRate
            WHEN CurrencyIsoCode = 'CAD' AND ProductPriceHistoryListPrice IS NOT NULL THEN ProductPriceHistoryListPrice * FxCADToUSD.FxRate
            WHEN CurrencyIsoCode = 'SEK' AND ProductPriceHistoryListPrice IS NOT NULL THEN ProductPriceHistoryListPrice * FxSEKToUSD.FxRate
            WHEN CurrencyIsoCode = 'USD' AND ProductPriceHistoryListPrice IS NOT NULL THEN ProductPriceHistoryListPrice
        END as decimal(17,2)) as sales_original_rrp_value_incl_tax_usd,
		cast(CASE 
            WHEN CurrencyIsoCode = 'USD' AND ProductPriceHistoryListPrice IS NOT NULL THEN ProductPriceHistoryListPrice * FxUSDToGBP.FxRate
            WHEN CurrencyIsoCode = 'CAD' AND ProductPriceHistoryListPrice IS NOT NULL THEN ProductPriceHistoryListPrice * FxCADToGBP.FxRate
            WHEN CurrencyIsoCode = 'EUR' AND ProductPriceHistoryListPrice IS NOT NULL THEN ProductPriceHistoryListPrice * FxEURToGBP.FxRate
            WHEN CurrencyIsoCode = 'SEK' AND ProductPriceHistoryListPrice IS NOT NULL THEN ProductPriceHistoryListPrice * FxSEKToGBP.FxRate
            WHEN CurrencyIsoCode = 'GBP' AND ProductPriceHistoryListPrice IS NOT NULL THEN ProductPriceHistoryListPrice 
        END as decimal(17,2)) as sales_original_rrp_value_incl_tax_gbp,
        nvl(OrderDiscountDescription || '|','') || nvl(ProductDiscountDescription,'') as PromoId,
        IsLegacyOrder,
		'ECOM' as Source
	FROM ecom_table ecom
   LEFT JOIN exchangerate FxUSDToGBP ON  ecom.d_OrderedDate = FxUSDToGBP.CalendarDay
        AND FxUSDToGBP.FromCurrency = 'USD' 
        AND FxUSDToGBP.ToCurrency = 'GBP' 
        AND FxUSDToGBP.ExchangeRateType = 'Z3'
	LEFT JOIN exchangerate FxCADToGBP on ecom.d_OrderedDate = FxCADToGBP.CalendarDay
        AND FxCADToGBP.FromCurrency = 'CAD' 
        AND FxCADToGBP.ToCurrency = 'GBP' 
        AND FxCADToGBP.ExchangeRateType = 'Z3'
	LEFT JOIN exchangerate FxEURToGBP on ecom.d_OrderedDate = FxEURToGBP.CalendarDay
        AND FxEURToGBP.FromCurrency = 'EUR' 
        AND FxEURToGBP.ToCurrency = 'GBP' 
        AND FxEURToGBP.ExchangeRateType = 'Z3'
    LEFT JOIN exchangerate FxSEKToGBP on ecom.d_OrderedDate = FxSEKToGBP.CalendarDay
        AND FxSEKToGBP.FromCurrency = 'SEK' 
        AND FxSEKToGBP.ToCurrency = 'GBP' 
        AND FxSEKToGBP.ExchangeRateType = 'Z3'
	LEFT JOIN exchangerate FxGBPToUSD on ecom.d_OrderedDate = FxGBPToUSD.CalendarDay
        AND FxGBPToUSD.FromCurrency = 'GBP' 
        AND FxGBPToUSD.ToCurrency = 'USD' 
        AND FxGBPToUSD.ExchangeRateType = 'Z3'
    LEFT JOIN exchangerate FxEURToUSD on ecom.d_OrderedDate = FxEURToUSD.CalendarDay
        AND FxEURToUSD.FromCurrency = 'EUR' 
        AND FxEURToUSD.ToCurrency = 'USD' 
        AND FxEURToUSD.ExchangeRateType = 'Z3'
    LEFT JOIN exchangerate FxCADToUSD on ecom.d_OrderedDate = FxCADToUSD.CalendarDay
        AND FxCADToUSD.FromCurrency = 'CAD' 
        AND FxCADToUSD.ToCurrency = 'USD' 
        AND FxCADToUSD.ExchangeRateType = 'Z3'
    LEFT JOIN exchangerate FxSEKToUSD on ecom.d_OrderedDate = FxSEKToUSD.CalendarDay
        AND FxSEKToUSD.FromCurrency = 'SEK' 
        AND FxSEKToUSD.ToCurrency = 'USD' 
        AND FxSEKToUSD.ExchangeRateType = 'Z3'
""")

### Snowflake Data

In [None]:
ns_result.createOrReplaceTempView("ns_result")
ecom_result.createOrReplaceTempView("ecom_result")
exclusion_orders = spark.sql("""
        SELECT OrderId, Barcode, TransactionType FROM ns_result
        UNION 
        SELECT OrderId, Barcode, TransactionType FROM ecom_result
""")
exclusion_orders.createOrReplaceTempView("exclusion_orders")

In [None]:
### create foj select 

foj_res = spark.sql("""SELECT
		'sweatybetty' as Tenant,
		HUB.CUSTOMER_KEY AS MparticleUSerid,
		R.GLOBAL_CUSTOMER_HKEY as CustomerID,
		oRDER_iD AS OrderId,
		cast(null as varchar(200)) as OrderNumber,
		Order_Line_no as OrderLineNumber,
		cast(null as varchar(200)) AS DiscountId,
		discount_flag as DiscountFlag,
		Item_Number as Product,
		r.Barcode,
		Transaction_Type as TransactionType,
		Transaction_Channel_Top_level as TransactionChannel,
		cast(null as varchar(200)) as TransactionStatus,
		Document_nO as DocumentNumber,
		Document_Line_no as DocumentLineNumber,
		OrderDate,
		to_timestamp(Derived_Order_Datetime)  as OrderDateTime,
		to_timestamp(Derived_Order_Datetime)  as OrderDateTimeUTC,		
		is_return as IsReturn,
		'GBP' as Currency,
		sales_value_incl_tax_gbp as sales_value_incl_tax,
		sales_value_incl_tax_gbp * e.fxrate as sales_value_incl_tax_usd,
		sales_value_incl_tax_gbp,
		CASE when Transaction_Channel_Top_level = 'Retail' then coalesce(order_value_incl_tax_gbp,sales_value_incl_tax_gbp)
			else order_value_incl_tax_gbp end as order_value_incl_tax,
		CASE when Transaction_Channel_Top_level = 'Retail' then coalesce(order_value_incl_tax_gbp,sales_value_incl_tax_gbp)
			else order_value_incl_tax_gbp end * e.fxrate as order_value_incl_tax_usd,
		CASE when Transaction_Channel_Top_level = 'Retail' then coalesce(order_value_incl_tax_gbp,sales_value_incl_tax_gbp)
			else order_value_incl_tax_gbp end as order_value_incl_tax_gbp,
		sales_original_rrp_value_incl_tax_gbp as sales_original_rrp_value_incl_tax,
		sales_original_rrp_value_incl_tax_gbp * fxrate as sales_original_rrp_value_incl_tax_usd,
		sales_original_rrp_value_incl_tax_gbp,
		sales_pos_rrp_value_incl_tax_GBP as sales_pos_rrp_value_incl_tax,
		sales_pos_rrp_value_incl_tax_GBP * e.fxrate as sales_pos_rrp_value_incl_tax_usd,
		sales_pos_rrp_value_incl_tax_GBP,
		sales_value_excl_tax_gbp as sales_value_excl_tax,
		sales_value_excl_tax_gbp * e.fxrate as sales_value_excl_tax_usd,
		sales_value_excl_tax_gbp,
		Sales_Quantity as SalesQuantity,
		CASE when Transaction_Channel_Top_level = 'Retail' then coalesce(Order_Quantity, Sales_Quantity)
			else Order_Quantity end as OrderQuantity,
		Store_Code as Store,
		Shipping_Country_Code as ShippingCountryCode,
		PRICE_TYPE AS PriceType,
		PROMOTION_LIST as PromoId,
		cast(null as boolean) as IsLegacyOrder,
		'Snowflake' as Source
FROM foj_table R
LEFT JOIN cust_table C ON C.GLOBAL_CUSTOMER_HKEY = R.GLOBAL_CUSTOMER_HKEY
LEFT JOIN hub_table HUB ON HUB.CUSTOMER_HKEY_STRING = C.MPARTICLE_USER_HKEY
LEFT JOIN exchangerate e on e.CalendarDay = R.OrderDate and Fromcurrency = 'GBP' and ToCurrency = 'USD' and e.ExchangeRateType = 'Z1'
LEFT JOIN exclusion_orders EO ON nvl(split_part(R.Order_Id,'-',1),'x') = EO.OrderId 
							and EO.TransactionType = R.Transaction_Type
							and nvl(eo.Barcode,'x') = nvl(r.Barcode,'x')
where EO.OrderId is null
""")
##foj_res.show(2)

## Union Sales

In [None]:
df_union = ns_result.unionByName(foj_res).unionByName(ecom_result)
df_union.createOrReplaceTempView("df_union_table")

### Customer Type

In [None]:
#### get first order date time to derive customer type
firstorder_df = spark.sql(""" 
                SELECT
                    MparticleUserId,
                    min(OrderDatetime) AS FirstSalesDateTime,
                    count(distinct OrderId) as OrderCount
                from df_union_table
                group by MparticleUserId
            """)

firstorder_df.createOrReplaceTempView("firstorder_df_table")

df_cust_type = spark.sql(""" SELECT 
                d.*,
                case 
                    when d.OrderDateTime = f.FirstSalesDateTime  then 'New'
                    when d.OrderDateTime > f.FirstSalesDateTime and OrderCount = 1 then 'New'
                    when d.OrderDateTime > f.FirstSalesDateTime and OrderCount > 1 then 'Returning'
                end as CustomerType,
                current_date() as EffectiveFrom
                FROM df_union_table d 
                LEFT JOIN firstorder_df_table f on d.MparticleUserId = f.MparticleUserId 
            """)

df_cust_type.createOrReplaceTempView("df_cust_type")

### Insider and Fitness instructor flag

In [None]:
custmaster_df.createOrReplaceTempView("custmaster_df")

df_insider = spark.sql("""
with cust_min as 
(
    select *, row_number() over(partition by mparticleuserkey order by validfrom, validto) as R FROM custmaster_df
),
cust as 
(
    select * from custmaster_df
),
insider as 
(
    select mparticleuserkey, min(OptintoInsiderDate) as OptintoInsiderDate FROM custmaster_df group by mparticleuserkey
)
select 
        s.*,
        i.OptintoInsiderDate,
        coalesce(case when s.OrderDateTime >= i.OptintoInsiderDate then 1 end,c.isOptinInsider,m.isOptinInsider,0) as isOptinInsider,
        coalesce(c.IsFitnessInstructor,m.IsFitnessInstructor,False) as IsFitnessInstructor
from df_cust_type s 
left join cust c on s.mparticleuserid = c.mparticleuserkey and date_format(s.orderdatetime, 'yyyy-MM-dd HH:mm:ss') >= date_format(c.validfrom, 'yyyy-MM-dd HH:mm:ss') and date_format(s.orderdatetime, 'yyyy-MM-dd HH:mm:ss') < date_format(c.validto, 'yyyy-MM-dd HH:mm:ss') 
left join cust_min m on s.mparticleuserid = m.mparticleuserkey and date_format(s.orderdatetime, 'yyyy-MM-dd HH:mm:ss') < date_format(m.validfrom, 'yyyy-MM-dd HH:mm:ss') and m.r = 1 
left join insider i on s.mparticleuserid = i.mparticleuserkey
""")

## Write result into Gold Delta Table

In [None]:
df_insider.write.format("parquet").mode("overwrite").option("overwriteSchema", "true").save(target_path)

## Exit

In [None]:
mssparkutils.notebook.exit("0")

## Miscellaneous ad hoc code cells

In [None]:
df_cust_type.createOrReplaceTempView("df_cust_type")

df = spark.sql("""
                SELECT
                    *
                from df_cust_type d
                WHERE  d.mparticleuserid = '6878537199352175983'

""")
df.show(15,truncate=False)

In [None]:
df = spark.sql("""
                SELECT 
                d.OrderId, d.OrderDateTime, d.IsReturn, f.*,
                case 
                    when d.OrderDateTime < f.FirstSalesDateTime or f.FirstSalesDateTime is null then 'Registered'
                    when d.OrderDateTime = f.FirstSalesDateTime  then 'New'
                    when d.OrderDateTime > f.FirstSalesDateTime then 'Returning'
                end as CustomerType,
                current_date() as EffectiveFrom
                FROM df_union_table d 
                LEFT JOIN firstorder_df_table f on d.MparticleUserId = f.MparticleUserId 
                where d.mparticleuserid = '6878537199352175983'
""")
df.show(15,truncate=False)

## 