Download External Packages

In [0]:
%pip install yt_dlp

Python interpreter will be restarted.
Collecting yt_dlp
  Downloading yt_dlp-2025.2.19-py3-none-any.whl (3.2 MB)
Installing collected packages: yt-dlp
Successfully installed yt-dlp-2025.2.19
Python interpreter will be restarted.


Import Libraries

In [0]:
from pyspark.sql import SparkSession
from pyspark.sql import Row
import pandas as pd
from pyspark.sql.types import StructType, StructField, StringType, TimestampType
from datetime import datetime
pd.set_option('display.max_colwidth', None)

Create Spark Session

In [0]:
spark = SparkSession.builder \
    .appName("AudioDownloader") \
    .getOrCreate()

Establish Song List and Show in DF

In [0]:
# Song list
songs = [
    "Luther Kendrick Lamar & SZA",
    "Die With A Smile Lady Gaga & Bruno Mars",
    "Not Like Us Kendrick Lamar",
    "A Bar (Tipsy) Shaboozey",
    "TV Off Kendrick Lamar Featuring Lefty Gunplay",
    "APT. ROSE & Bruno Mars",
    "Pink Pony Club Chappell Roan",
    "Nokia Drake",
    "Birds Of A Feather Billie Eilish",
    "Lose Control Teddy Swims"
]

# Create Spark DataFrame
songs_df = spark.createDataFrame([Row(song_name=s) for s in songs])
songs_df.show(truncate=False)

+---------------------------------------------+
|song_name                                    |
+---------------------------------------------+
|Luther Kendrick Lamar & SZA                  |
|Die With A Smile Lady Gaga & Bruno Mars      |
|Not Like Us Kendrick Lamar                   |
|A Bar (Tipsy) Shaboozey                      |
|TV Off Kendrick Lamar Featuring Lefty Gunplay|
|APT. ROSE & Bruno Mars                       |
|Pink Pony Club Chappell Roan                 |
|Nokia Drake                                  |
|Birds Of A Feather Billie Eilish             |
|Lose Control Teddy Swims                     |
+---------------------------------------------+



Create Directories for Data

In [0]:
dbutils.fs.mkdirs("dbfs:/FileStore/cookies_info/")
dbutils.fs.mkdirs("dbfs:/FileStore/bronze/mp3/")

Out[58]: True

Cookies File for Import

In [0]:
cookies_df = spark.read.text("dbfs:/FileStore/cookies_info/youtube_com_cookies.txt")
cookies_df.show()

+--------------------+
|               value|
+--------------------+
|# Netscape HTTP C...|
|# http://curl.hax...|
|# This is a gener...|
|                    |
|.youtube.com\tTRU...|
|.youtube.com\tTRU...|
|consent.youtube.c...|
|.youtube.com\tTRU...|
|.youtube.com\tTRU...|
|.youtube.com\tTRU...|
|.youtube.com\tTRU...|
|.youtube.com\tTRU...|
|.youtube.com\tTRU...|
|.youtube.com\tTRU...|
|.youtube.com\tTRU...|
|.youtube.com\tTRU...|
|.youtube.com\tTRU...|
|.youtube.com\tTRU...|
|.youtube.com\tTRU...|
|.youtube.com\tTRU...|
+--------------------+
only showing top 20 rows



Function to download songs (Would be UDF but library requires internet access)

In [0]:
def download_song(song_name):
    try:
        ydl_opts = {
            'format': 'bestaudio/best',
            'default_search': 'ytsearch1',
            'noplaylist': True,
            'outtmpl': f'{download_path}/%(title)s.%(ext)s',
            'cookiefile': '/dbfs/FileStore/cookies_info/youtube_com_cookies.txt',
            'postprocessors': [{
                'key': 'FFmpegExtractAudio',
                'preferredcodec': 'mp3',
                'preferredquality': '192',
            }],
            'quiet': False
        }

        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            info = ydl.extract_info(song_name, download=True)
            title = info.get("title", "unknown")
            filepath = f"dbfs:/FileStore/bronze/mp3/{title}.mp3"
            return "success", filepath

    except Exception as e:
        return f"failed: {str(e)}", None

Download Songs & Create DataFrame for the Logs

In [0]:
schema = StructType([
    StructField("song_name", StringType(), True),
    StructField("download_status", StringType(), True),
    StructField("download_path", StringType(), True),
    StructField("download_time", StringType(), True)
])

# Collect logs
download_logs = []

for row in songs_df.toLocalIterator():
    song = row['song_name']
    status, path = download_song(song)
    download_logs.append((song, status, path, str(datetime.now())))

# Create DataFrame with schema
logs_df = spark.createDataFrame(download_logs, schema=schema)

# Preview
logs_df.show(truncate=False)

[generic] Extracting URL: Luther Kendrick Lamar & SZA
[youtube:search] Extracting URL: ytsearch1:Luther Kendrick Lamar & SZA
[download] Downloading playlist: Luther Kendrick Lamar & SZA
[youtube:search] query "Luther Kendrick Lamar & SZA": Downloading web client config
[youtube:search] query "Luther Kendrick Lamar & SZA" page 1: Downloading API JSON
[youtube:search] Playlist Luther Kendrick Lamar & SZA: Downloading 1 items of 1
[download] Downloading item 1 of 1
[youtube] Extracting URL: https://www.youtube.com/watch?v=HfWLgELllZs
[youtube] HfWLgELllZs: Downloading webpage
[youtube] HfWLgELllZs: Downloading tv client config
[youtube] HfWLgELllZs: Downloading player 7d1d50a6
[youtube] HfWLgELllZs: Downloading tv player API JSON
[youtube] HfWLgELllZs: Downloading ios player API JSON


ERROR: [youtube] HfWLgELllZs: Sign in to confirm you’re not a bot. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies


[generic] Extracting URL: Die With A Smile Lady Gaga & Bruno Mars
[youtube:search] Extracting URL: ytsearch1:Die With A Smile Lady Gaga & Bruno Mars
[download] Downloading playlist: Die With A Smile Lady Gaga & Bruno Mars
[youtube:search] query "Die With A Smile Lady Gaga & Bruno Mars": Downloading web client config
[youtube:search] query "Die With A Smile Lady Gaga & Bruno Mars" page 1: Downloading API JSON
[youtube:search] Playlist Die With A Smile Lady Gaga & Bruno Mars: Downloading 1 items of 1
[download] Downloading item 1 of 1
[youtube] Extracting URL: https://www.youtube.com/watch?v=kPa7bsKwL-c
[youtube] kPa7bsKwL-c: Downloading webpage
[youtube] kPa7bsKwL-c: Downloading tv client config
[youtube] kPa7bsKwL-c: Downloading player 7d1d50a6
[youtube] kPa7bsKwL-c: Downloading tv player API JSON
[youtube] kPa7bsKwL-c: Downloading ios player API JSON


ERROR: [youtube] kPa7bsKwL-c: Sign in to confirm you’re not a bot. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies


[generic] Extracting URL: Not Like Us Kendrick Lamar
[youtube:search] Extracting URL: ytsearch1:Not Like Us Kendrick Lamar
[download] Downloading playlist: Not Like Us Kendrick Lamar
[youtube:search] query "Not Like Us Kendrick Lamar": Downloading web client config
[youtube:search] query "Not Like Us Kendrick Lamar" page 1: Downloading API JSON
[youtube:search] Playlist Not Like Us Kendrick Lamar: Downloading 1 items of 1
[download] Downloading item 1 of 1
[youtube] Extracting URL: https://www.youtube.com/watch?v=H58vbez_m4E
[youtube] H58vbez_m4E: Downloading webpage
[youtube] H58vbez_m4E: Downloading tv client config
[youtube] H58vbez_m4E: Downloading player 7d1d50a6
[youtube] H58vbez_m4E: Downloading tv player API JSON
[youtube] H58vbez_m4E: Downloading ios player API JSON


ERROR: [youtube] H58vbez_m4E: Sign in to confirm you’re not a bot. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies


[generic] Extracting URL: A Bar (Tipsy) Shaboozey
[youtube:search] Extracting URL: ytsearch1:A Bar (Tipsy) Shaboozey
[download] Downloading playlist: A Bar (Tipsy) Shaboozey
[youtube:search] query "A Bar (Tipsy) Shaboozey": Downloading web client config
[youtube:search] query "A Bar (Tipsy) Shaboozey" page 1: Downloading API JSON
[youtube:search] Playlist A Bar (Tipsy) Shaboozey: Downloading 1 items of 1
[download] Downloading item 1 of 1
[youtube] Extracting URL: https://www.youtube.com/watch?v=t7bQwwqW-Hc
[youtube] t7bQwwqW-Hc: Downloading webpage
[youtube] t7bQwwqW-Hc: Downloading tv client config
[youtube] t7bQwwqW-Hc: Downloading player 7d1d50a6
[youtube] t7bQwwqW-Hc: Downloading tv player API JSON
[youtube] t7bQwwqW-Hc: Downloading ios player API JSON


ERROR: [youtube] t7bQwwqW-Hc: Sign in to confirm you’re not a bot. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies


[generic] Extracting URL: TV Off Kendrick Lamar Featuring Lefty Gunplay
[youtube:search] Extracting URL: ytsearch1:TV Off Kendrick Lamar Featuring Lefty Gunplay
[download] Downloading playlist: TV Off Kendrick Lamar Featuring Lefty Gunplay
[youtube:search] query "TV Off Kendrick Lamar Featuring Lefty Gunplay": Downloading web client config
[youtube:search] query "TV Off Kendrick Lamar Featuring Lefty Gunplay" page 1: Downloading API JSON
[youtube:search] Playlist TV Off Kendrick Lamar Featuring Lefty Gunplay: Downloading 1 items of 1
[download] Downloading item 1 of 1
[youtube] Extracting URL: https://www.youtube.com/watch?v=XIwrwOEx5i8
[youtube] XIwrwOEx5i8: Downloading webpage
[youtube] XIwrwOEx5i8: Downloading tv client config
[youtube] XIwrwOEx5i8: Downloading player 7d1d50a6
[youtube] XIwrwOEx5i8: Downloading tv player API JSON
[youtube] XIwrwOEx5i8: Downloading ios player API JSON


ERROR: [youtube] XIwrwOEx5i8: Sign in to confirm you’re not a bot. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies


[generic] Extracting URL: APT. ROSE & Bruno Mars
[youtube:search] Extracting URL: ytsearch1:APT. ROSE & Bruno Mars
[download] Downloading playlist: APT. ROSE & Bruno Mars
[youtube:search] query "APT. ROSE & Bruno Mars": Downloading web client config
[youtube:search] query "APT. ROSE & Bruno Mars" page 1: Downloading API JSON
[youtube:search] Playlist APT. ROSE & Bruno Mars: Downloading 1 items of 1
[download] Downloading item 1 of 1
[youtube] Extracting URL: https://www.youtube.com/watch?v=ekr2nIex040
[youtube] ekr2nIex040: Downloading webpage
[youtube] ekr2nIex040: Downloading tv client config
[youtube] ekr2nIex040: Downloading player 7d1d50a6
[youtube] ekr2nIex040: Downloading tv player API JSON
[youtube] ekr2nIex040: Downloading ios player API JSON


ERROR: [youtube] ekr2nIex040: Sign in to confirm you’re not a bot. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies


[generic] Extracting URL: Pink Pony Club Chappell Roan
[youtube:search] Extracting URL: ytsearch1:Pink Pony Club Chappell Roan
[download] Downloading playlist: Pink Pony Club Chappell Roan
[youtube:search] query "Pink Pony Club Chappell Roan": Downloading web client config
[youtube:search] query "Pink Pony Club Chappell Roan" page 1: Downloading API JSON
[youtube:search] Playlist Pink Pony Club Chappell Roan: Downloading 1 items of 1
[download] Downloading item 1 of 1
[youtube] Extracting URL: https://www.youtube.com/watch?v=GR3Liudev18
[youtube] GR3Liudev18: Downloading webpage
[youtube] GR3Liudev18: Downloading tv client config
[youtube] GR3Liudev18: Downloading player 7d1d50a6
[youtube] GR3Liudev18: Downloading tv player API JSON
[youtube] GR3Liudev18: Downloading ios player API JSON


ERROR: [youtube] GR3Liudev18: Sign in to confirm you’re not a bot. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies


[generic] Extracting URL: Nokia Drake
[youtube:search] Extracting URL: ytsearch1:Nokia Drake
[download] Downloading playlist: Nokia Drake
[youtube:search] query "Nokia Drake": Downloading web client config
[youtube:search] query "Nokia Drake" page 1: Downloading API JSON
[youtube:search] Playlist Nokia Drake: Downloading 1 items of 1
[download] Downloading item 1 of 1
[youtube] Extracting URL: https://www.youtube.com/watch?v=YAaIgrWtRYk
[youtube] YAaIgrWtRYk: Downloading webpage
[youtube] YAaIgrWtRYk: Downloading tv client config
[youtube] YAaIgrWtRYk: Downloading player 7d1d50a6
[youtube] YAaIgrWtRYk: Downloading tv player API JSON
[youtube] YAaIgrWtRYk: Downloading ios player API JSON


ERROR: [youtube] YAaIgrWtRYk: Sign in to confirm you’re not a bot. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies


[generic] Extracting URL: Birds Of A Feather Billie Eilish
[youtube:search] Extracting URL: ytsearch1:Birds Of A Feather Billie Eilish
[download] Downloading playlist: Birds Of A Feather Billie Eilish
[youtube:search] query "Birds Of A Feather Billie Eilish": Downloading web client config
[youtube:search] query "Birds Of A Feather Billie Eilish" page 1: Downloading API JSON
[youtube:search] Playlist Birds Of A Feather Billie Eilish: Downloading 1 items of 1
[download] Downloading item 1 of 1
[youtube] Extracting URL: https://www.youtube.com/watch?v=V9PVRfjEBTI
[youtube] V9PVRfjEBTI: Downloading webpage
[youtube] V9PVRfjEBTI: Downloading tv client config
[youtube] V9PVRfjEBTI: Downloading player e011b4d7
[youtube] V9PVRfjEBTI: Downloading tv player API JSON
[youtube] V9PVRfjEBTI: Downloading ios player API JSON


ERROR: [youtube] V9PVRfjEBTI: Sign in to confirm you’re not a bot. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies


[generic] Extracting URL: Lose Control Teddy Swims
[youtube:search] Extracting URL: ytsearch1:Lose Control Teddy Swims
[download] Downloading playlist: Lose Control Teddy Swims
[youtube:search] query "Lose Control Teddy Swims": Downloading web client config
[youtube:search] query "Lose Control Teddy Swims" page 1: Downloading API JSON
[youtube:search] Playlist Lose Control Teddy Swims: Downloading 1 items of 1
[download] Downloading item 1 of 1
[youtube] Extracting URL: https://www.youtube.com/watch?v=9gWIIIr2Asw
[youtube] 9gWIIIr2Asw: Downloading webpage
[youtube] 9gWIIIr2Asw: Downloading tv client config
[youtube] 9gWIIIr2Asw: Downloading player 7d1d50a6
[youtube] 9gWIIIr2Asw: Downloading tv player API JSON
[youtube] 9gWIIIr2Asw: Downloading ios player API JSON


ERROR: [youtube] 9gWIIIr2Asw: Sign in to confirm you’re not a bot. Use --cookies-from-browser or --cookies for the authentication. See  https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp  for how to manually pass cookies. Also see  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for tips on effectively exporting YouTube cookies


+---------------------------------------------+---------------------------------------------------------------------------------------------------+-------------+--------------------------+
|song_name                                    |download_status                                                                                    |download_path|download_time             |
+---------------------------------------------+---------------------------------------------------------------------------------------------------+-------------+--------------------------+
|Luther Kendrick Lamar & SZA                  |failed: [Errno 2] No such file or directory: '/dbfs/FileStore/cookies_info/youtube_com_cookies.txt'|null         |2025-03-18 10:57:34.901873|
|Die With A Smile Lady Gaga & Bruno Mars      |failed: [Errno 2] No such file or directory: '/dbfs/FileStore/cookies_info/youtube_com_cookies.txt'|null         |2025-03-18 10:57:37.363779|
|Not Like Us Kendrick Lamar                   |failed: 

In [0]:
logs_df.toPandas()

Unnamed: 0,song_name,download_status,download_path,download_time
0,Luther Kendrick Lamar & SZA,failed: [Errno 2] No such file or directory: '/dbfs/FileStore/cookies_info/youtube_com_cookies.txt',,2025-03-18 10:57:34.901873
1,Die With A Smile Lady Gaga & Bruno Mars,failed: [Errno 2] No such file or directory: '/dbfs/FileStore/cookies_info/youtube_com_cookies.txt',,2025-03-18 10:57:37.363779
2,Not Like Us Kendrick Lamar,failed: [Errno 2] No such file or directory: '/dbfs/FileStore/cookies_info/youtube_com_cookies.txt',,2025-03-18 10:57:39.603613
3,A Bar (Tipsy) Shaboozey,failed: [Errno 2] No such file or directory: '/dbfs/FileStore/cookies_info/youtube_com_cookies.txt',,2025-03-18 10:57:41.632012
4,TV Off Kendrick Lamar Featuring Lefty Gunplay,failed: [Errno 2] No such file or directory: '/dbfs/FileStore/cookies_info/youtube_com_cookies.txt',,2025-03-18 10:57:43.525207
5,APT. ROSE & Bruno Mars,failed: [Errno 2] No such file or directory: '/dbfs/FileStore/cookies_info/youtube_com_cookies.txt',,2025-03-18 10:57:45.735223
6,Pink Pony Club Chappell Roan,failed: [Errno 2] No such file or directory: '/dbfs/FileStore/cookies_info/youtube_com_cookies.txt',,2025-03-18 10:57:47.621698
7,Nokia Drake,failed: [Errno 2] No such file or directory: '/dbfs/FileStore/cookies_info/youtube_com_cookies.txt',,2025-03-18 10:57:49.620030
8,Birds Of A Feather Billie Eilish,failed: [Errno 2] No such file or directory: '/dbfs/FileStore/cookies_info/youtube_com_cookies.txt',,2025-03-18 10:57:51.727375
9,Lose Control Teddy Swims,failed: [Errno 2] No such file or directory: '/dbfs/FileStore/cookies_info/youtube_com_cookies.txt',,2025-03-18 10:57:53.805194


Push Logs to DBFS

In [0]:
logs_df.write.mode("overwrite").option("header", True).csv("dbfs:/FileStore/bronze/download_logs/")