In [0]:
from pyspark.sql.functions import from_json, col, coalesce, lit
from pyspark.sql.types import *

In [0]:
# ler camada bronze
df_bronze = spark.table("project_data_football_bronze.pontuacao_rodada")
# ler dim rodada
df_dim_rodada = spark.table("project_data_football_silver.dim_rodada")

In [0]:
%sql
select * from project_data_football_silver.dim_rodada

In [0]:
%sql
SELECT *
FROM project_data_football_bronze.pontuacao_rodada

In [0]:
%sql
DESCRIBE project_data_football_bronze.pontuacao_rodada

In [0]:
schema_scout = StructType([
    StructField("G", LongType(), True),
    StructField("A", LongType(), True),
    StructField("SG", LongType(), True),
    StructField("CA", LongType(), True),
    StructField("CV", LongType(), True),
    StructField("FS", LongType(), True),
    StructField("FC", LongType(), True)
])

In [0]:
df_bronze = df_bronze.withColumn(
    "scout_struct",
    from_json(col("scout"), schema_scout)
)

In [0]:
df_silver = df_bronze.select(
    "atleta_id",
    "clube_id",
    "rodada",
    "pontuacao",
    coalesce(col("scout_struct.G"), lit(0)).alias("gols"),
    coalesce(col("scout_struct.A"), lit(0)).alias("assistencias"),
    coalesce(col("scout_struct.SG"), lit(0)).alias("saldo_gols"),
    coalesce(col("scout_struct.CA"), lit(0)).alias("cartoes_amarelos"),
    coalesce(col("scout_struct.CV"), lit(0)).alias("cartoes_vermelhos"),
    coalesce(col("scout_struct.FS"), lit(0)).alias("faltas_sofridas"),
    coalesce(col("scout_struct.FC"), lit(0)).alias("faltas_cometidas"),
    "entrou_em_campo",
    "dt_ingestao"
)

In [0]:
df_silver = df_silver.join(
    df_dim_rodada,
    df_silver["rodada"] == df_dim_rodada["numero_rodada"],
    "left"
).select(
    "rodada_id",
    "campeonato_id",
    "atleta_id",
    "clube_id",
    col("numero_rodada").alias("rodada"),
    "pontuacao",
    "gols",
    "assistencias",
    "saldo_gols",
    "cartoes_amarelos",
    "cartoes_vermelhos",
    "faltas_sofridas",
    "faltas_cometidas",
    "entrou_em_campo",
    "dt_ingestao"
)

In [0]:
display(df_silver)

In [0]:
#salvando delta
df_silver.write \
    .format("delta") \
    .mode("overwrite") \
    .option("mergeSchema", "true")\
    .saveAsTable("project_data_football_silver.fato_pontuacao")

In [0]:
%sql
SELECT *
FROM project_data_football_silver.fato_pontuacao