In [0]:
import pyspark.sql.functions as F
from pyspark.sql.window import Window

In [0]:
path_silver = 'abfss://datalake@dls0tfm.dfs.core.windows.net/silver/'
path_gold = 'abfss://datalake@dls0tfm.dfs.core.windows.net/gold/'

In [0]:
df_dias = spark.read.format('delta').load(f'{path_gold}dias')
df_empanadas = spark.read.format('delta').load(f'{path_gold}empanadas')
df_empanadas_pivot = spark.read.format('delta').load(f'{path_gold}empanadas_pivot')
df_octavos = spark.read.format('delta').load(f'{path_gold}octavos')
df_vinos = spark.read.format('delta').load(f'{path_silver}vinos')
df_ventas_con_grupo = spark.read.format('delta').load(f'{path_gold}ventas_con_grupo')

### Ganancias

In [0]:
df_ganancias_dia = df_dias.select('fecha','ganancia').orderBy(F.col('fecha').desc()).cache()

In [0]:
df_ganancias_dia.display()

fecha,ganancia
2025-07-31,2108.1
2025-07-30,1961.0
2025-07-29,2036.5
2025-07-28,1510.1
2025-07-26,2085.6
2025-07-25,1579.2
2025-07-24,1617.6
2025-07-23,1983.6
2025-07-22,2654.8
2025-07-21,1670.2


In [0]:
df_ganancias_mes = df_ganancias_dia.groupBy(F.date_format('fecha','yyyy-MM').alias('mes')).agg(F.round(F.sum('ganancia'),1).alias('ganancia_mes')).orderBy(F.desc('mes')).cache()

In [0]:
df_ganancias_mes.display()

mes,ganancia_mes
2025-07,54450.0
2025-06,52991.4
2025-05,54458.5
2025-04,51802.9
2025-03,52473.4
2025-02,49902.9
2025-01,45216.3
2024-12,42482.1
2024-11,54482.0
2024-10,55723.0


In [0]:
df_ganancias_mes.groupBy(F.date_format('mes','yyyy-Q').alias('trimestre')).agg(F.round(F.sum('ganancia_mes'),1).alias('ganancia_trimestre')).orderBy(F.desc('trimestre')).display()

trimestre,ganancia_trimestre
2025-3,54450.0
2025-2,159252.8
2025-1,147592.6
2024-4,152687.1
2024-3,152254.0
2024-2,157771.8
2024-1,138467.5
2023-4,131782.5
2023-3,134144.6
2023-2,136092.1


In [0]:
# dinero medio por día de la semana

df_dias.groupBy('dia_semana').agg(F.round(F.avg('ganancia'),1).alias('ganancia_media')).orderBy(F.desc('ganancia_media')).display()


dia_semana,ganancia_media
6,1847.2
7,1808.5
5,1590.2
4,1569.4
3,1509.5
2,1396.2
1,-4.7


In [0]:
df_dias.filter(F.col('fecha') >= F.to_date(F.lit('2023-01-01'))).groupBy('dia_semana').agg(F.round(F.avg('ganancia'),1).alias('ganancia_media_desde_2023')).orderBy(F.desc('ganancia_media_desde_2023')).display()

dia_semana,ganancia_media_desde_2023
6,2132.9
7,2125.3
5,1873.8
4,1845.1
3,1818.7
2,1682.5
1,-9.4


### Vino

In [0]:
df_vinos.groupBy('Tipo').count().display()

Tipo,count
Blanco,17
Tinto,58
Rosado,7


In [0]:
df_octavos.groupBy('date','Region').agg(F.round(F.sum('octavos'),1).alias('vasos')).orderBy(F.col('date').desc(),F.col('vasos').desc()).display()

date,Region,vasos
2025-07-31,Rioja,42.0
2025-07-31,Ribera del Duero,9.0
2025-07-31,Penedes,7.6
2025-07-31,Rias Baixas,1.0
2025-07-31,Bierzo,1.0
2025-07-31,Priorat,1.0
2025-07-30,Rioja,10.0
2025-07-30,Ribera del Duero,5.6
2025-07-30,Argentina,2.0
2025-07-30,Cataluña,1.0


In [0]:
df_octavos.groupBy(F.date_format('date', 'yyyy-MM').alias('mes'),'Region').agg(F.round(F.sum('octavos'),1).alias('vasos_mes')).orderBy(F.col('mes').desc(),F.col('vasos_mes').desc()).display()

mes,Region,vasos_mes
2025-07,Rioja,768.0
2025-07,Portugal,115.2
2025-07,Rueda,107.0
2025-07,Ribera del Duero,106.4
2025-07,Toro,54.8
2025-07,Penedes,48.4
2025-07,Navarra,42.4
2025-07,Rias Baixas,40.4
2025-07,Priorat,33.2
2025-07,Cataluña,22.8


In [0]:
df_octavos.groupBy(F.date_format('date', 'yyyy-MM').alias('mes'),'Region').agg(F.round(F.avg('octavos'),1).alias('media_mensual_vasos')).orderBy(F.col('mes').desc(),F.col('media_mensual_vasos').desc()).display()

mes,Region,media_mensual_vasos
2025-07,Portugal,6.8
2025-07,Cataluña,5.7
2025-07,Rueda,5.6
2025-07,Navarra,5.3
2025-07,Rioja,5.2
2025-07,Rias Baixas,3.7
2025-07,Ribera del Duero,3.2
2025-07,Priorat,3.0
2025-07,Penedes,2.8
2025-07,Argentina,2.6


In [0]:
df_octavos.groupBy(F.date_format('date', 'yyyy-MM').alias('mes'),'Tipo').agg(F.round(F.avg('octavos'),1).alias('media_mensual_vasos')).orderBy(F.col('mes').desc(),F.col('media_mensual_vasos').desc()).display()

mes,Tipo,media_mensual_vasos
2025-07,Blanco,5.0
2025-07,Rosado,3.9
2025-07,Tinto,3.9
2025-06,Blanco,5.0
2025-06,Tinto,4.3
2025-06,Rosado,4.0
2025-05,Blanco,5.3
2025-05,Rosado,4.1
2025-05,Tinto,4.0
2025-04,Blanco,4.9


In [0]:
df_octavos.groupBy(F.date_format('date', 'yyyy-MM').alias('mes'),F.col('name').alias('nombre')).agg(F.round(F.sum('octavos'),1).alias('vasos')).orderBy(F.col('mes').desc(),F.col('vasos').desc()).display()

mes,nombre,vasos
2025-07,d moreno crianza,202.6
2025-07,d moreno blanco,146.6
2025-07,vinho verde,114.2
2025-07,altanza blanco,82.8
2025-07,celeste,66.6
2025-07,zinio,58.8
2025-07,d moreno reserva,48.2
2025-07,vina esmeralda,44.4
2025-07,ramon bilbao cr,43.8
2025-07,laureatus,40.4


In [0]:
df_octavos.groupBy(F.date_format('date', 'yyyy').alias('ano'),F.col('name').alias('nombre')).agg(F.round(F.sum('octavos'),1).alias('octavos')).orderBy(F.col('ano').desc(),F.col('octavos').desc()).display()

ano,nombre,octavos
2025,d moreno crianza,1978.6
2025,d moreno blanco,1291.2
2025,d moreno reserva,495.8
2025,altanza blanco,445.4
2025,zinio,414.0
2025,vina esmeralda,370.8
2025,vinho verde,323.4
2025,ramon bilbao cr,319.4
2025,d moreno rosado,311.4
2025,tres matas,278.2


In [0]:
df_octavos.groupBy(F.date_format('date', 'yyyy').alias('ano'),'Region').agg(F.round(F.sum('octavos'),1).alias('vasos_ano')).orderBy(F.col('ano').desc(),F.col('vasos_ano').desc()).display()

ano,Region,vasos_ano
2025,Rioja,6614.4
2025,Ribera del Duero,1027.4
2025,Rueda,663.8
2025,Toro,449.6
2025,Portugal,434.0
2025,Penedes,411.6
2025,Priorat,347.0
2025,Navarra,276.0
2025,Rias Baixas,245.6
2025,Castilla,221.2


### Empanadas

In [0]:
df_empanadas_pivot.columns

['date',
 '4_quesos',
 'arabe',
 'arvejas',
 'atun',
 'caprese',
 'carne',
 'ceb',
 'cir_pan',
 'espinaca',
 'hongos',
 'humita',
 'jyq',
 'lentejas',
 'picante',
 'pollo',
 'dia_semana',
 'mes',
 'ano',
 'tipo',
 'processing_date']

In [0]:
cols_to_sum = [c for c in df_empanadas_pivot.columns if c not in ('date','dia_semana','mes','tipo','processing_date')]

In [0]:
# empanadas por mes
df_empanadas_pivot.groupBy(F.date_format('date','yyyy-MM').alias('mes')).agg(*[F.sum(c).alias(c) for c in cols_to_sum]).orderBy(F.col('mes').desc()).display()


mes,4_quesos,arabe,arvejas,atun,caprese,carne,ceb,cir_pan,espinaca,hongos,humita,jyq,lentejas,picante,pollo,ano
2025-07,116,107,41,90,144,181,85,82,146,126,95,117,79,137,142,54675
2025-06,97,94,30,85,118,166,79,75,124,118,80,114,77,117,152,50625
2025-05,148,117,39,105,151,190,113,114,158,127,138,147,93,135,157,52650
2025-04,105,107,41,74,147,125,107,96,142,100,88,109,86,131,135,52650
2025-03,130,112,45,91,120,138,99,105,129,123,83,117,84,120,131,52650
2025-02,130,116,36,74,116,138,96,132,126,84,87,101,73,112,143,48600
2025-01,98,106,29,66,92,132,56,68,114,77,70,82,59,98,100,46575
2024-12,68,63,40,57,79,68,57,35,72,81,52,65,57,67,74,36432
2024-11,141,114,48,87,112,152,93,102,130,132,85,91,87,120,124,52624
2024-10,132,93,31,116,147,158,90,97,115,109,86,124,74,106,137,54648


In [0]:
# empanadas por trimestre
df_empanadas_pivot.groupBy(F.date_format('date','yyyy-Q').alias('trimestre')).agg(*[F.sum(c).alias(c) for c in cols_to_sum]).orderBy(F.col('trimestre').desc()).display()

trimestre,4_quesos,arabe,arvejas,atun,caprese,carne,ceb,cir_pan,espinaca,hongos,humita,jyq,lentejas,picante,pollo,ano
2025-3,116,107,41,90,144,181,85,82,146,126,95,117,79,137,142,54675
2025-2,350,318,110,264,416,481,299,285,424,345,306,370,256,383,444,155925
2025-1,358,334,110,231,328,408,251,305,369,284,240,300,216,330,374,147825
2024-4,341,270,119,260,338,378,240,234,317,322,223,280,218,293,335,143704
2024-3,415,316,133,264,463,484,291,254,405,377,334,367,256,375,445,157872
2024-2,376,327,126,252,372,452,273,282,413,362,282,307,264,308,360,155848
2024-1,281,228,118,217,346,322,200,177,337,286,209,200,240,254,260,151800
2023-4,281,235,116,192,299,307,197,199,268,270,194,203,198,229,228,145656
2023-3,351,325,137,242,381,427,259,193,383,373,284,268,250,314,348,155771
2023-2,347,338,178,207,390,506,267,208,393,369,253,297,310,283,383,155771


### Ventas productos

In [0]:
df_ventas_con_grupo.groupBy(F.date_format('date','yyyy-MM').alias('mes')).agg(F.round(F.sum('total_price'),1).alias('ganancia'),F.round(F.sum(F.when(F.col('new_id')==713, F.col('total_price')).otherwise(0)),1).alias('ganancia_berenjenas'),F.round(F.sum(F.when(F.col('new_id')==718, F.col('total_price')).otherwise(0)),1).alias('ganancia_pepinos')).orderBy(F.desc('mes')).withColumn('porcentaje_berenjenas', F.round(F.col('ganancia_berenjenas')/F.col('ganancia')*100,2)).withColumn('porcentaje_pepinos', F.round(F.col('ganancia_pepinos')/F.col('ganancia')*100,2)).display()

mes,ganancia,ganancia_berenjenas,ganancia_pepinos,porcentaje_berenjenas,porcentaje_pepinos
2025-07,54449.9,279.5,245.9,0.51,0.45
2025-06,52991.4,361.4,223.3,0.68,0.42
2025-05,54458.2,254.8,231.0,0.47,0.42
2025-04,51802.6,288.6,228.8,0.56,0.44
2025-03,52473.3,237.9,204.6,0.45,0.39
2025-02,49902.8,312.0,209.0,0.63,0.42
2025-01,45216.2,239.2,151.8,0.53,0.34
2024-12,42482.0,182.0,121.0,0.43,0.28
2024-11,54481.8,254.8,158.4,0.47,0.29
2024-10,55722.7,247.0,180.4,0.44,0.32


In [0]:
df_ventas_con_grupo.groupBy('date','nombre_wgr').agg(F.round(F.sum('total_price'),1).alias('ganancia_por_grupo')).orderBy(F.desc('date'),F.desc('ganancia_por_grupo')).display()

date,nombre_wgr,ganancia_por_grupo
2025-07-31,tapas calientes,777.2
2025-07-31,tapas frias,469.7
2025-07-31,"wein, rot",289.9
2025-07-31,empanadas,217.1
2025-07-31,bier,133.8
2025-07-31,"wein, weiss",102.1
2025-07-31,alkoholfrei,76.9
2025-07-31,spirituosen,36.6
2025-07-31,kaffee,4.8
2025-07-31,nachrichten,0.0
