In [0]:
import pyspark.sql.functions as F
from pyspark.sql.types import ArrayType, StringType

In [0]:
datalake_container = "abfss://datalake@dls0tfm.dfs.core.windows.net"
bronze_path = f"{datalake_container}/bronze/"
silver_path = f"{datalake_container}/silver/"
datasource = 'puerta'
dataset = 'pedidos'
dataset_bronze_path = f"{bronze_path}{datasource}/{dataset}/"

In [0]:
df_pedidos = spark.read.format("delta").load(dataset_bronze_path).orderBy(F.col("_timestamp").desc())

In [0]:
df_pedidos.printSchema

<bound method DataFrame.printSchema of DataFrame[_key: binary, _value: binary, _topic: string, _partition: int, _offset: bigint, _timestamp: timestamp, _timestampType: int, _headers: array<struct<key:string,value:binary>>, _ingested_at: timestamp, id: bigint, mesa: string, comida: string, bebida: string]>

In [0]:
df_pedidos = df_pedidos.select('comida','bebida','_timestamp').withColumn('date',F.col('_timestamp').cast('date')).drop('_timestamp')

In [0]:
comidas = df_pedidos.select(F.col('comida').alias('comanda'), 'date')
bebidas = df_pedidos.select(F.col('bebida').alias('comanda'),'date')


In [0]:
df_pedidos = comidas.unionByName(bebidas)

In [0]:
df_pedidos.printSchema

<bound method DataFrame.printSchema of DataFrame[comanda: string, dia: date]>

In [0]:
df_pedidos = df_pedidos.withColumn('name', F.explode(F.from_json("comanda", ArrayType(StringType())))).drop('comanda')

In [0]:
df_pedidos = df_pedidos.groupBy('date', 'name').agg(F.count('*').cast('integer').alias('quantity')).orderBy(F.desc('date'))

In [0]:
df_art = spark.read.format("delta").load(f'{silver_path}articulos')

In [0]:
df_pedidos = df_pedidos.join(df_art, on = 'name', how = 'left').select('id', 'name', 'quantity', 'preis1', 'date')\
    .withColumnRenamed('id','new_id').withColumnRenamed('preis1', 'unity_price')\
    .withColumns({ 'total_price': F.round(F.col('quantity')*F.col('unity_price'),1), 'processing_date': F.current_date() })\
    .select('new_id', 'name', 'total_price', 'quantity', 'unity_price', 'date', 'processing_date')

In [0]:
df_pedidos.printSchema

<bound method DataFrame.printSchema of DataFrame[new_id: string, name: string, total_price: double, quantity: int, unity_price: double, date: date, processing_date: date]>

In [0]:
#df_ventas = spark.read.format("delta").load(f'{silver_path}ventas')
#df_ventas = df_ventas.union(df_pedidos)
# df_ventas.write.format("delta").mode("overwrite").save(f'{silver_path}ventas')