In [None]:
# Databricks notebook source

# MAGIC %md
# MAGIC # Camada Silver
# MAGIC
# MAGIC Lendo os dados da camada Bronze, aplicando transformações, limpeza e enriquecimento.

# COMMAND ----------

# DBTITLE 1,Definir o caminho da Camada Silver
silver_path = "/mnt/silver/movies_db"

# COMMAND ----------

# DBTITLE 1,Carregar as tabelas da camada Bronze
df_movies_bronze = spark.read.format("delta").table("bronze.movies")
df_genres_bronze = spark.read.format("delta").table("bronze.genres")
df_directors_bronze = spark.read.format("delta").table("bronze.directors")
df_actors_bronze = spark.read.format("delta").table("bronze.actors")
df_countries_bronze = spark.read.format("delta").table("bronze.countries")

# COMMAND ----------

# DBTITLE 1,Tabela de Filmes (Movies) - Limpeza e Transformação
from pyspark.sql.functions import col, current_timestamp
from pyspark.sql.types import IntegerType, FloatType

df_movies_silver = df_movies_bronze.withColumn("year", col("year").cast(IntegerType())) \
    .withColumn("runtime_mins", col("runtime_mins").cast(IntegerType())) \
    .withColumn("imdb_rating", col("imdb_rating").cast(FloatType())) \
    .withColumn("rotten_tomatoes_pct", col("rotten_tomatoes_pct").cast(IntegerType())) \
    .withColumn("metacritic_score", col("metacritic_score").cast(IntegerType())) \
    .withColumn("oscars_won", col("oscars_won").cast(IntegerType())) \
    .withColumn("box_office_million", col("box_office_million").cast(FloatType())) \
    .withColumn("processed_at", current_timestamp())

df_movies_silver.write.format("delta").mode("overwrite").save(f"{silver_path}/movies")
spark.sql("CREATE TABLE IF NOT EXISTS silver.movies USING DELTA LOCATION '" + f"{silver_path}/movies'")

display(df_movies_silver)


# COMMAND ----------

# DBTITLE 1,Demais tabelas da camada Silver (sem grandes transformações neste exemplo)
# Para este exemplo, as demais tabelas não necessitam de grandes transformações.
# Em um cenário real, poderíamos ter limpeza de strings, normalização, etc.

df_genres_bronze.write.format("delta").mode("overwrite").save(f"{silver_path}/genres")
spark.sql("CREATE TABLE IF NOT EXISTS silver.genres USING DELTA LOCATION '" + f"{silver_path}/genres'")

df_directors_bronze.write.format("delta").mode("overwrite").save(f"{silver_path}/directors")
spark.sql("CREATE TABLE IF NOT EXISTS silver.directors USING DELTA LOCATION '" + f"{silver_path}/directors'")

df_actors_bronze.write.format("delta").mode("overwrite").save(f"{silver_path}/actors")
spark.sql("CREATE TABLE IF NOT EXISTS silver.actors USING DELTA LOCATION '" + f"{silver_path}/actors'")

df_countries_bronze.write.format("delta").mode("overwrite").save(f"{silver_path}/countries")
spark.sql("CREATE TABLE IF NOT EXISTS silver.countries USING DELTA LOCATION '" + f"{silver_path}/countries'")