In [0]:
# Receita média mensal por bairro e tipo de propriedade

def gold_monthly_income_neighbourhood_property(db_name):
    gold = spark.sql(f"""
                     SELECT 
                        L.neighbourhood, 
                        L.property_type, 
                        DATE_fORMAT(C.date, 'yyyy-MM') AS month,
                        AVG(C.price) as avg_price,
                        COUNT(CASE WHEN C.available = False THEN 1 END) AS total_reservations
                     
                     FROM projeto_ae.silver_listings AS L JOIN projeto_ae.silver_calendar AS C ON L.id = C.listing_id
                     
                     GROUP BY L.neighbourhood, L.property_type, month
                     
                     ORDER BY L.neighbourhood, L.property_type, month;
                     
                     """)
    
    gold.write.mode("overwrite").saveAsTable(f"{db_name}.gold_monthly_income_neighbourhood_property")


In [0]:
# Relação entre a quantidade de reviews e a taxa de ocupação de um imóvel

def gold_reviews_occupancy(db_name):

   gold = spark.sql(f"""
                     SELECT
                        C.listing_id,
                        COUNT(R.id) AS total_reviews,
                        ROUND((COUNT(CASE WHEN C.available = False THEN 1 END) / COUNT(*)) * 100, 2) AS occupancy_rate
                     
                     FROM projeto_ae.silver_calendar AS C JOIN projeto_ae.silver_reviews AS R ON C.listing_id = R.listing_id
                     
                     GROUP BY C.listing_id
                     
                     ORDER BY C.listing_id
                  
                  """)

   gold.write.mode("overwrite").saveAsTable(f"{db_name}.gold_reviews_occupancy")

In [0]:
# Bairros com maior número de superhosts e como a taxa de ocupação deles se compara com a de outros hosts

def gold_neighbourhood_superhost(db_name):
    gold = spark.sql(f"""
                    SELECT 
                        L.neighbourhood,
                        L.host_is_superhost,
                        COUNT(DISTINCT L.id) AS superhost_count,
                        ROUND((COUNT(CASE WHEN C.available = False THEN 1 END) / COUNT(*)) * 100, 2) AS avg_occupancy_rate
                     
                    FROM projeto_ae.silver_listings AS L JOIN projeto_ae.silver_calendar AS C ON L.id = C.listing_id
                    
                    GROUP BY L.neighbourhood, L.host_is_superhost
                    
                    ORDER BY L.neighbourhood, L.host_is_superhost
                    
                    """)

    gold.write.mode("overwrite").saveAsTable(f"{db_name}.gold_neighbourhood_superhost")

In [0]:
# Nota média dos imóveis por bairro e tipo de acomodação

def gold_neighbourhood_roomtype_review_scores(db_name):
    gold = spark.sql(f"""
                    SELECT 
                        L.neighbourhood,
                        L.room_type,
                        AVG(L.review_scores_rating) AS avg_review_score
                    
                    FROM projeto_ae.silver_listings AS L JOIN projeto_ae.silver_reviews AS R ON L.id = R.listing_id
                    
                    GROUP BY L.neighbourhood, L.room_type
                    
                    ORDER BY L.neighbourhood, L.room_type
                    
                    """)

    gold.write.mode("overwrite").saveAsTable(f"{db_name}.gold_neighbourhood_roomtype_review_scores")

In [0]:
# Impacto da quantidade de comodidades na quantidade de reviews e ocupação dos imóveis

def gold_amenities_occupancy_reviews(db_name):
    gold = spark.sql(f"""
                    SELECT 
                        size(split(L.amenities, ',')) AS amenities_count,
                        COUNT(DISTINCT R.id) AS total_reviews,
                        ROUND((COUNT(CASE WHEN C.available = False THEN 1 END) / COUNT(*)) * 100, 2) AS avg_occupancy_rate
                    
                    FROM projeto_ae.silver_listings AS L 
                        JOIN projeto_ae.silver_calendar AS C ON L.id = C.listing_id 
                        JOIN projeto_ae.silver_reviews AS R ON L.id = R.listing_id
                    
                    GROUP BY amenities_count
                    
                    ORDER BY amenities_count
                    
                    """)

    gold.write.mode("overwrite").saveAsTable(f"{db_name}.gold_amenities_occupancy_reviews")
