In [0]:
df = spark.read.table('sleeper.bronze_players')

In [0]:
display(df)

In [0]:
%sql

SELECT * FROM sleeper.bronze_rosters

In [0]:
from pyspark.sql.functions import current_timestamp, expr, explode

In [0]:
df = spark.read.table("sleeper.bronze_rosters") \
    .withColumn("player_id", explode("players")) \
    .withColumn("is_starter", expr("array_contains(starters, player_id)")) \
    .withColumn("player_nickname", expr("metadata['p_nick_' || player_id]"))

df = df.select("owner_id", "roster_id", "player_id", "is_starter", "player_nickname", "_league_id", "_matchup_week", "_year", "_ingested_ts")\
    .withColumn("_snapshot_ts", current_timestamp())
display(df)

In [0]:
%sql

SELECT * FROM sleeper.bronze_rosters

In [0]:
df = spark.read.table("sleeper.bronze_rosters") \
    .withColumn("streak", expr("metadata['streak']")) \
    .withColumn("record", expr("metadata['record']"))\
    .withColumn("wins", expr("settings['wins']"))\
    .withColumn("losses", expr("settings['losses']"))\
    .withColumn("ties", expr("settings['ties']"))\
    .withColumn("fpts", expr("settings['fpts'] + settings['fpts_decimal'] / 100")) \
    .withColumn("fpts_against", expr("settings['fpts_against'] + settings['fpts_against_decimal'] / 100")) \
    .withColumn("total_moves", expr("settings['total_moves']")) \
    .withColumn("waiver_budget_used", expr("settings['waiver_budget_used']")) \
    .withColumn("waiver_position", expr("settings['waiver_position']"))

df = df.select(
    "owner_id", 
    "roster_id",
    "streak",
    "record",
    "wins",
    "losses",
    "ties",
    "fpts",
    "fpts_against",
    "total_moves",
    "waiver_budget_used",
    "waiver_position",
    "_league_id",
    "_matchup_week",
    "_year",
    "_ingested_ts",
)

display(df)

In [0]:
%sql

SELECT * FROM sleeper.bronze_matchups

In [0]:
df = spark.read.table('sleeper.bronze_matchups')\
    .select(
        "matchup_id",
        "roster_id",
        "points",
        "_league_id",
        "_matchup_week",
        "_year",
        "_ingested_ts"
    ).withColumn("_snapshot_ts", current_timestamp())

In [0]:
%sql

SELECT * FROM sleeper.bronze_matchups

In [0]:
from pyspark.sql.functions import array_contains, col, explode

df = spark.read.table('sleeper.bronze_matchups') \
    .withColumn("player_id", explode(col("players"))) \
    .withColumn("is_starter", array_contains(col("starters"), col("player_id"))) \
    .withColumn("player_points", col("players_points")[col("player_id")])

df = df.select(
    "roster_id",
    "matchup_id",
    "player_id",
    "player_points",
    "is_starter",
    "_league_id",
    "_matchup_week",
    "_year",
    "_ingested_ts",
)

display(df)

In [0]:
%sql

SELECT * FROM sleeper.bronze_users

In [0]:
df = spark.read.table('sleeper.bronze_users')\
    .withColumnRenamed("display_name", "owner_name")\
    .withColumnRenamed("user_id", "owner_id")\
    .withColumnRenamed("is_owner", "is_commissioner")\
    .withColumn("team_name", col("metadata.team_name"))

df = df.select(
    "owner_id",
    "owner_name",
    "is_bot",
    "is_commissioner",
    "team_name",
    "_league_id",
    "_matchup_week",
    "_year",
    "_ingested_ts"
)

display(df)

In [0]:
i