In [0]:
silver_path = "dbfs:/Volumes/nycproject/default/silver/nyc_taxi"
df = spark.read.format("delta").load(silver_path)
df.createOrReplaceTempView("nyc_taxi_analytics")

# Faturamento por tipo de pagamento
query = """
SELECT 
  payment_type,
  COUNT(*) AS total_viagens,
  ROUND(SUM(total_amount), 2) AS faturado
FROM nyc_taxi_analytics
GROUP BY payment_type
ORDER BY faturado DESC
"""
display(spark.sql(query))

In [0]:
# Criar view temporária a partir da camada Silver 
silver_path = "dbfs:/Volumes/nycproject/default/silver/nyc_taxi"
df_silver = spark.read.format("delta").load(silver_path)
df_silver.createOrReplaceTempView("nyc_taxi_analytics")

# Definir gold_viz_df com agregação por tipo de pagamento
gold_viz_df = spark.sql("""
SELECT 
  payment_type,
  COUNT(*) AS total_viagens
FROM nyc_taxi_analytics
GROUP BY payment_type
""")


In [0]:
import seaborn as sns
import matplotlib.pyplot as plt
from pyspark.sql.functions import col

# Converter para Pandas ordenado
pandas_df = gold_viz_df.orderBy(col("total_viagens").desc()).toPandas()

# Gráfico
plt.figure(figsize=(10, 6))
barplot = sns.barplot(data=pandas_df, x="payment_type", y="total_viagens")

# Texto acima das barras
for index, row in pandas_df.iterrows():
    barplot.text(index, row["total_viagens"] + 1000,
                 str(int(row["total_viagens"])),
                 color='black', ha="center", fontsize=10)

# Legenda traduzida
legenda_pt = {
    "1": "Cartão de crédito",
    "2": "Dinheiro",
    "3": "Sem custo",
    "4": "Disputa",
    "5": "Desconhecido",
    "6": "Viagem anulada"
}
handles = [plt.Line2D([0], [0], color='gray', lw=4, label=f"{k} = {v}")
           for k, v in legenda_pt.items()]
plt.legend(handles=handles, title="Tipo de Pagamento", bbox_to_anchor=(1.05, 1), loc='upper left')

# Estética
plt.title("Total de Viagens por Tipo de Pagamento")
plt.xlabel("Código do Pagamento")
plt.ylabel("Total de Viagens")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
