In [0]:
from pyspark.sql.functions import count, year, desc, col, when, lit, concat_ws

#### Create games dataframe from delta table

In [0]:
games_df = spark.read.table("steam_processed.games")

In [0]:
display(games_df)

#### Select wanted columns

In [0]:
selected_games_df = games_df.select(
                                    games_df["appid"],
                                    year(games_df["release_date"]).alias("release_year"),
                                    games_df["windows"],
                                    games_df["mac"],
                                    games_df["linux"])

#### Create list of games by year

In [0]:
games_by_pc_platforms = selected_games_df.groupBy("windows","mac","linux","release_year").agg(count("appid").alias("game_count")).withColumn("windows", when(col("windows"),("windows")).otherwise(None)).withColumn("mac", when(col("mac"),("mac")).otherwise(None)).withColumn("linux", when(col("linux"),("linux")).otherwise(None))

In [0]:
games_by_pc_platforms = games_by_pc_platforms.withColumn("platforms", concat_ws(",", games_by_pc_platforms.windows,games_by_pc_platforms.mac,games_by_pc_platforms.linux)).orderBy("release_year")

In [0]:
games_by_pc_platforms = games_by_pc_platforms.select("platforms","release_year","game_count")

In [0]:
display(games_by_pc_platforms)

#### Write to delta table

In [0]:
games_by_pc_platforms.write.mode("overwrite").format("delta").saveAsTable("steam_presentation.games_by_pc_platforms")

#### Test table

In [0]:
%sql
SELECT
platforms,
release_year,
game_count
FROM steam_presentation.games_by_pc_platforms
WHERE
release_year BETWEEN 2010 AND date_part('year', CURRENT_DATE)
ORDER BY
game_count ASC;