In [0]:
# ---- Config ----
CATALOG = "music_demo"
BRONZE_SCHEMA = "bronze"

# Path to the CSVs you uploaded to the Unity Catalog Volume (landing schema)
RAW_BASE_PATH = "/Volumes/music_demo/landing/raw_files/music_discovery_demo"

print("CATALOG:", CATALOG)
print("SCHEMA :", BRONZE_SCHEMA)
print("RAW_BASE_PATH:", RAW_BASE_PATH)


In [0]:
spark.sql(f"CREATE CATALOG IF NOT EXISTS {CATALOG}")
spark.sql(f"USE CATALOG {CATALOG}")
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {BRONZE_SCHEMA}")
spark.sql(f"USE {BRONZE_SCHEMA}")

In [0]:
spark.sql("""
CREATE TABLE IF NOT EXISTS artists (
  artist_id STRING,
  artist_name STRING,
  genre STRING,
  country_of_origin STRING,
  debut_year STRING
) USING DELTA
""")

spark.sql("""
DROP TABLE IF EXISTS tracks""")      

spark.sql("""
CREATE TABLE IF NOT EXISTS tracks (
  track_id STRING,
  artist_id STRING,
  track_title STRING,
  release_date STRING,
  primary_genre STRING
) USING DELTA
""")

spark.sql("""
DROP TABLE IF EXISTS daily_metrics""") 

spark.sql("""
CREATE TABLE IF NOT EXISTS daily_metrics (
  date STRING,
  platform STRING,
  region STRING,
  artist_id STRING,
  track_id STRING,
  streams STRING,
  views STRING,
  likes STRING,
  comments STRING,
  shares STRING,
  followers_gained STRING,
  rank_estimate STRING
) USING DELTA
""")


In [0]:
spark.sql(f"""
COPY INTO artists
FROM '{RAW_BASE_PATH}/artists.csv'
FILEFORMAT = CSV
FORMAT_OPTIONS('header' = 'true')
COPY_OPTIONS('force'='true')
""")

spark.sql(f"""
COPY INTO tracks
FROM '{RAW_BASE_PATH}/tracks.csv'
FILEFORMAT = CSV
FORMAT_OPTIONS('header' = 'true')
COPY_OPTIONS('force'='true')
""")

spark.sql(f"""
COPY INTO daily_metrics
FROM '{RAW_BASE_PATH}/daily_metrics.csv'
FILEFORMAT = CSV
FORMAT_OPTIONS('header' = 'true')
COPY_OPTIONS('force'='true')
""")


In [0]:
display(spark.sql("SELECT COUNT(*) AS artists_rows FROM artists"))
display(spark.sql("SELECT COUNT(*) AS tracks_rows FROM tracks"))
display(spark.sql("SELECT COUNT(*) AS metrics_rows FROM daily_metrics"))

display(spark.sql("SELECT * FROM artists LIMIT 5"))
display(spark.sql("SELECT * FROM tracks LIMIT 5"))
display(spark.sql("""
SELECT date, platform, region, streams, views
FROM daily_metrics
ORDER BY date DESC
LIMIT 10
"""))