Steps: 
- Create a Temp View from a DF
- Query the Lakehouse in the same time frame

In [None]:
from pyspark.sql.functions import to_date, date_format, current_timestamp, col, concat_ws

file_path = "abfss://b2c899fb-e571-4496-aebf-c7a23083635a@onelake.dfs.fabric.microsoft.com/a00cf91a-f92e-498a-9f14-ba10221fb05a/Files/User Created Data/fmovarti_hist_api"

df = (
    spark.read.parquet(file_path)
    .select(
        col("ATMSEQ").alias("seq_per_store"),
        col("TCLAVE").alias("store_id_from"), 
        col("ICLAVE").alias("product_id"), 
        col("PCLAVE").alias("supplier_id"), ## need to calculate based on prov_something table and date.  
        col("ATMREF").alias("store_id_to"),
        col("ATMDESC").alias("movement_id"),
        col("ATMCANT").alias("qty"),
        col("ATMCOSTO").alias("cost_amt"), 
        (-1 * col("ATMVENTA")).alias("sale_amt"), ## case statement for switching the sales amount to positive. 
        to_date(col("ATMFECHA")).alias("movement_date"),
        date_format(col("ATMFECHA"), "HH:mm:ss").alias("movement_time"),
        to_date(col("UDDATE")).alias("src_update_date"),
        date_format(col("UDDATE"), "HH:mm:ss").alias("src_update_time")
    )
    .withColumn("src_pk", concat_ws("_", col("seq_per_store"), col("store_id_from"), col("product_id")))
    .withColumn("fabric_load_date", current_timestamp())
    .filter(col("movement_date") <= "2025-01-27")  # Filtering on the original column
)

df.createTempView('temp_fmovarti')

In [None]:
%%sql
SELECT SUM(mt.sale_amt) AS sale_amt_1, SUM(tm.sale_amt) AS sale_amt_2
FROM  temp_fmovarti mt
LEFT JOIN fmovarti_test tm ON mt.src_pk = tm.src_pk
WHERE 1=1
    AND mt.movement_date = DATE '2025-02-24'
    AND mt.movement_id = 'VT'
    

In [None]:
%%sql
select * from fmovarti_test
where 1=1 
and src_pk in ('1051936564_1401_13505','1051937080_1401_12068','1051065674_1757_13505','1051066325_1757_12068','1051936966_1401_21745','1051936678_1401_21743')

In [None]:
%%sql
SELECT mt.src_update_date, count(mt.src_pk) 
FROM temp_fmovarti  mt
WHERE 1=1
    AND NOT EXISTS (
        SELECT 1 FROM fmovarti_test tm 
        WHERE tm.src_pk = mt.src_pk
    )
group by src_update_date
order by count(mt.src_pk);


In [None]:
%%sql 
select * from temp_fmovarti
WHERE 1=1
    and src_pk = '1047066188_2126_14930'

In [None]:
%%sql
SELECT SUM(mt.sale_amt) AS sale_amt_1, SUM(tm.sale_amt) AS sale_amt_2
FROM  temp_fmovarti mt
LEFT JOIN fmovarti_test tm ON mt.src_pk = tm.src_pk
WHERE 1=1
    AND mt.movement_date = DATE '2025-02-24'
    AND mt.movement_id = 'VT'
    


In [None]:
%%sql
CREATE TABLE fact_sales_monthly_by_group
USING DELTA TBLPROPERTIES("delta.parquet.vorder.enabled" = "true")
AS
SELECT 
    CAST(store_id_from as int) as store_id_from,
    p.group_name,
    CAST(store_id_to as int) as store_id_to,
    movement_id,
    CAST(DATE_TRUNC('MONTH', movement_date) as date) AS movement_day,
    SUM(qty) AS total_qty,
    SUM(cost_amt) AS total_cost_amt,
    SUM(sale_amt) AS total_sale_amt
FROM DEV_SLV_LH.fmovarti_test fm 
JOIN DEV_SLV_LH.dimproducts p 
    ON fm.product_id = p.src_product_id
WHERE movement_id IN ('VT', 'ME', 'DC')
GROUP BY 
    store_id_from, 
    p.group_name, 
    store_id_to, 
    movement_id, 
    DATE_TRUNC('MONTH', movement_date)


StatementMeta(, 5be538ef-73a7-45aa-bebc-d37422afb098, 19, Finished, Available, Finished)

<Spark SQL result set with 0 rows and 0 fields>

In [2]:
%%sql
select movement_key, Movement_Description__English_ from dimmovements

StatementMeta(, 83bc4430-ca31-4600-9cbb-91234b092b9f, 3, Finished, Available, Finished)

<Spark SQL result set with 12 rows and 2 fields>