 # SunoAI Reddit Downloader



 This notebook downloads songs from the SunoAI subreddit based on the JSONL data.

In [1]:
# Install required packages if needed
!pip install yt-dlp requests tqdm pandas matplotlib seaborn ipywidgets jupyter

Collecting yt-dlp
  Using cached yt_dlp-2025.2.19-py3-none-any.whl.metadata (171 kB)
Collecting requests
  Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting tqdm
  Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting pandas
  Using cached pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)
Collecting matplotlib
  Using cached matplotlib-3.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting seaborn
  Using cached seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Collecting ipywidgets
  Using cached ipywidgets-8.1.5-py3-none-any.whl.metadata (2.3 kB)
Collecting jupyter
  Using cached jupyter-1.1.1-py2.py3-none-any.whl.metadata (2.0 kB)
Collecting charset-normalizer<4,>=2 (from requests)
  Using cached charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (35 kB)
Collecting idna<4,>=2.5 (from requests)
  Using cached idna-3.10-py3-no

In [2]:
import os
import re
import time
import json
from typing import Optional, Dict, List, Any, Union
from pathlib import Path
import requests
import pandas as pd
from urllib.parse import urlparse, parse_qs
from requests.adapters import HTTPAdapter
from urllib3.util import Retry
# Replace youtube-dl with yt-dlp
import yt_dlp as youtube_dl
from tqdm.notebook import tqdm
from IPython.display import Audio, display, Video


 ## Define the Downloader Class

In [3]:
class SunoDownloader:
    def __init__(
        self, output_dir: Union[str, Path] = "downloads", skip_existing: bool = True
    ):
        """
        Initialize the downloader with an output directory.

        Args:
            output_dir: Directory to save downloads
            skip_existing: If True, skip downloads that already exist
        """
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(exist_ok=True)
        self.skip_existing = skip_existing

        # Create subdirectories for different sources
        self.dirs: Dict[str, Path] = {
            "reddit": self.output_dir / "reddit",
            "youtube": self.output_dir / "youtube",
            "suno": self.output_dir / "suno",
            "spotify": self.output_dir / "spotify",
            "soundcloud": self.output_dir / "soundcloud",
            "others": self.output_dir / "others",
        }

        for directory in self.dirs.values():
            directory.mkdir(exist_ok=True)

        # Set up a requests session with retries
        self.session = requests.Session()
        retries = Retry(
            total=5, backoff_factor=0.1, status_forcelist=[500, 502, 503, 504]
        )
        self.session.mount("http://", HTTPAdapter(max_retries=retries))
        self.session.mount("https://", HTTPAdapter(max_retries=retries))
        self.session.headers.update(
            {
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
            }
        )

        # youtube-dl options
        self.ydl_opts = {
            "format": "bestaudio/best",
            "postprocessors": [
                {
                    "key": "FFmpegExtractAudio",
                    "preferredcodec": "mp3",
                    "preferredquality": "192",
                }
            ],
            "quiet": True,
            "no_warnings": True,
            "nocheckcertificate": True,
            "ignoreerrors": True,
            "no_color": True,
            "geo_bypass": True,
            "retries": 10,
            "fragment_retries": 10,
        }

    def sanitize_filename(self, filename: str) -> str:
        """Sanitize a filename to remove invalid characters."""
        # Replace invalid characters with underscores
        return re.sub(r'[\\/*?:"<>|]', "_", filename)

    def check_existing_file(self, filepath: Path) -> Optional[Path]:
        """
        Check if a file already exists and return it if skip_existing is True.

        Args:
            filepath: Path to check

        Returns:
            Path if file exists and should be skipped, None otherwise
        """
        if filepath.exists() and self.skip_existing:
            print(f"  Found existing file: {filepath}, skipping download")
            return filepath
        return None

    def download_reddit_video(self, post_data: Dict[str, Any]) -> Optional[Path]:
        """Download a Reddit video using direct download."""
        post_id = post_data["id"]
        title = self.sanitize_filename(post_data.get("title", post_id))
        filename = f"{post_id}_{title[:50]}.mp4"
        filepath = self.dirs["reddit"] / filename

        # Check if file already exists
        existing = self.check_existing_file(filepath)
        if existing:
            return existing

        try:
            # Direct download if we have video information
            if (
                post_data.get("is_video", False)
                and post_data.get("secure_media")
                and post_data["secure_media"].get("reddit_video")
            ):
                video_url = post_data["secure_media"]["reddit_video"].get(
                    "fallback_url"
                )
                if not video_url:
                    print(f"  No fallback URL found in post data")
                    return None

                print(f"  Downloading directly: {video_url}")
                # For reddit videos, download directly
                response = self.session.get(video_url, stream=True)
                if response.status_code == 200:
                    print(f"  Direct download successful, saving to: {filepath}")
                    with open(filepath, "wb") as f:
                        for chunk in response.iter_content(chunk_size=8192):
                            f.write(chunk)
                    return filepath
                else:
                    print(
                        f"  Direct download failed with status code: {response.status_code}"
                    )
            else:
                print(f"  No video information found in post data")

        except Exception as e:
            print(f"  Error downloading Reddit video: {e}")

        return None

    def download_youtube_video(self, url: str, post_id: str) -> Optional[Path]:
        """Download a YouTube video as MP3 using yt-dlp."""
        filename = f"{self.sanitize_filename(post_id)}"
        filepath = self.dirs["youtube"] / filename

        # Check if file already exists
        existing = self.check_existing_file(filepath)
        if existing:
            return existing

        try:
            # Configure yt-dlp with more robust options
            ydl_opts = {
                "format": "bestaudio/best",
                "postprocessors": [
                    {
                        "key": "FFmpegExtractAudio",
                        "preferredcodec": "mp3",
                        "preferredquality": "192",
                    }
                ],
                "outtmpl": str(filepath),
                "quiet": True,
                "no_warnings": True,
                # Add options to work around common YouTube restrictions
                "nocheckcertificate": True,
                "ignoreerrors": True,
                "no_color": True,
                # Handle geo-restrictions
                "geo_bypass": True,
                # More connection retries
                "retries": 10,
                "fragment_retries": 10,
            }

            with youtube_dl.YoutubeDL(ydl_opts) as ydl:
                ydl.download([url])
                # Check if the file was actually created
                if filepath.exists():
                    return filepath
        except Exception as e:
            print(f"Error downloading YouTube video {url}: {e}")

        return None

    def extract_script_content(self, html):
        """Extract JavaScript content from Suno webpage."""
        pattern = r'<script>\s*self\.__next_f\.push\(\[.*?,"(.*?)"\]\s*\)</script>'
        matches = re.findall(pattern, html, re.DOTALL)
        unified_content = (
            "".join(matches)
            .replace(r"\"", '"')
            .replace(r"\\n", "\n")
            .replace(r"\\t", "\t")
        )
        # Decode all UTF-8 encoded sequences
        unified_content = unified_content.encode().decode("unicode_escape")
        return unified_content

    def extract_json(self, unified_content):
        """Extract JSON data from the script content."""
        # Extract the main JSON block
        main_pattern = r'{"clip":{.*?}}'
        main_match = re.search(main_pattern, unified_content, re.DOTALL)
        if main_match:
            json_str = main_match.group(0)

            try:
                json_data = json.loads(json_str)
                return json_data
            except json.JSONDecodeError:
                # Attempt to clean the JSON string and retry
                json_str_cleaned = re.sub(r"[\x00-\x1f\x7f]", "", json_str)
                try:
                    json_data = json.loads(json_str_cleaned)
                    return json_data
                except json.JSONDecodeError as e:
                    print(f"Failed to parse JSON after cleaning: {e}")
                    return None
        return None

    def download_suno_audio(self, url: str, post_id: str) -> Optional[Path]:
        """
        Download audio from Suno.ai using the simplified method.

        Args:
            url: Original Suno URL
            post_id: Post ID for the filename

        Returns:
            Path to the downloaded file or None if failed
        """
        # Create a filename based on the post ID
        filename = f"{post_id}.mp3"
        filepath = self.dirs["suno"] / filename

        # Check if file already exists
        existing = self.check_existing_file(filepath)
        if existing:
            return existing

        try:
            # Extract the song ID from the URL
            # URL pattern: https://suno.com/song/{song_id}
            parsed_url = urlparse(url)
            path_parts = parsed_url.path.strip("/").split("/")

            # If the URL format is as expected
            if len(path_parts) >= 2 and path_parts[0] == "song":
                song_id = path_parts[1]
                # Construct the direct CDN URL
                cdn_url = f"https://cdn1.suno.ai/{song_id}.mp3"

                print(f"  Using direct CDN URL: {cdn_url}")

                # Download the audio file
                response = self.session.get(cdn_url, stream=True)
                if response.status_code == 200:
                    print(f"  Downloading audio to: {filepath}")
                    with open(filepath, "wb") as f:
                        for chunk in response.iter_content(chunk_size=8192):
                            f.write(chunk)
                    return filepath
                else:
                    print(f"  Failed to download audio: HTTP {response.status_code}")
                    print(f"  URL attempted: {cdn_url}")
            else:
                # If we can't extract the song ID from the URL, try to get it from the URL itself
                match = re.search(
                    r"([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})",
                    url,
                )
                if match:
                    song_id = match.group(1)
                    cdn_url = f"https://cdn1.suno.ai/{song_id}.mp3"

                    print(f"  Using direct CDN URL (from regex): {cdn_url}")

                    # Download the audio file
                    response = self.session.get(cdn_url, stream=True)
                    if response.status_code == 200:
                        print(f"  Downloading audio to: {filepath}")
                        with open(filepath, "wb") as f:
                            for chunk in response.iter_content(chunk_size=8192):
                                f.write(chunk)
                        return filepath
                    else:
                        print(
                            f"  Failed to download audio: HTTP {response.status_code}"
                        )
                        print(f"  URL attempted: {cdn_url}")
                else:
                    print(f"  Could not extract Suno song ID from URL: {url}")
        except Exception as e:
            print(f"  Error downloading Suno audio {url}: {e}")

        return None

    def download_soundcloud(self, url: str, post_id: str) -> Optional[Path]:
        """Download audio from SoundCloud."""
        filename = f"{post_id}.mp3"
        filepath = self.dirs["soundcloud"] / filename

        # Check if file already exists
        existing = self.check_existing_file(filepath)
        if existing:
            return existing

        try:
            # Configure youtube-dl for SoundCloud
            ydl_opts = self.ydl_opts.copy()
            ydl_opts["outtmpl"] = str(filepath)

            with youtube_dl.YoutubeDL(ydl_opts) as ydl:
                ydl.download([url])
                return filepath
        except Exception as e:
            print(f"Error downloading SoundCloud audio {url}: {e}")

        return None

    def download_generic_url(
        self, url: str, post_id: str, domain: str
    ) -> Optional[Path]:
        """Download from a generic URL."""
        parsed_url = urlparse(url)
        filename = Path(parsed_url.path).name
        if not filename or "." not in filename:
            filename = f"{post_id}.mp3"

        domain_dir = self.dirs.get(domain, self.dirs["others"])
        filepath = domain_dir / filename

        # Check if file already exists
        existing = self.check_existing_file(filepath)
        if existing:
            return existing

        try:
            response = self.session.get(url, stream=True)
            if response.status_code == 200:
                with open(filepath, "wb") as f:
                    for chunk in response.iter_content(chunk_size=8192):
                        f.write(chunk)
                return filepath
        except Exception as e:
            print(f"Error downloading from URL {url}: {e}")

        return None


def download_songs_from_dataframe(
    df: pd.DataFrame,
    output_dir: Union[str, Path] = "downloads",
    max_items: Optional[int] = None,
    skip_existing: bool = True,
) -> pd.DataFrame:
    """
    Process a dataframe of Suno AI posts and download all songs.

    Args:
        df: Pandas DataFrame with Suno AI posts
        output_dir: Directory to save downloads
        max_items: Maximum number of items to download (for testing)
        skip_existing: If True, skip downloads that already exist

    Returns:
        Updated DataFrame with download paths
    """
    downloader = SunoDownloader(output_dir=output_dir, skip_existing=skip_existing)

    # Create a new column for download paths
    df["download_path"] = None
    # Create a column for download status
    df["download_status"] = None

    # Filter to keep only rows that might have audio
    audio_domains = [
        "v.redd.it",
        "youtube.com",
        "suno.com",
        "cdn1.suno.ai",
        "soundcloud.com",
        "spotify.com",
        "open.spotify.com",
    ]
    potential_audio = df[df["domain_unified"].isin(audio_domains) | df["is_video"]]

    # Limit number of items if specified
    if max_items and max_items > 0:
        potential_audio = potential_audio.head(max_items)

    # Download each post
    for idx, row in tqdm(potential_audio.iterrows(), total=len(potential_audio)):
        post_id = row["id"]
        title = row.get("title", "No title")
        url = row.get("url", "No URL")
        domain = row.get("domain_unified", "Unknown domain")
        permalink = row.get("permalink", None)

        # Construct Reddit URL if permalink exists
        reddit_url = f"https://reddit.com{permalink}" if permalink else "No Reddit URL"

        print(f"Processing [{post_id}] - Domain: {domain}")
        print(f"  Title: {title}")
        print(f"  URL: {url}")
        print(f"  Reddit URL: {reddit_url}")

        # Check if the URL is valid
        if not url or url == "No URL":
            status = "Skipped: No valid URL found"
            print(f"  Status: {status}")
            df.at[idx, "download_status"] = status
            continue

        # Determine appropriate downloader for this domain
        if domain == "v.redd.it":
            print(f"  Using: Reddit video downloader")
            download_path = downloader.download_reddit_video(row)
        elif domain == "youtube.com":
            print(f"  Using: YouTube downloader")
            download_path = downloader.download_youtube_video(url, post_id)
        elif domain in ["suno.com", "cdn1.suno.ai"]:
            print(f"  Using: Suno audio downloader")
            download_path = downloader.download_suno_audio(url, post_id)
        elif domain == "soundcloud.com":
            print(f"  Using: SoundCloud downloader")
            download_path = downloader.download_soundcloud(url, post_id)
        else:
            print(f"  Using: Generic URL downloader for {domain}")
            download_path = downloader.download_generic_url(url, post_id, domain)

        # Record the download path and status
        if download_path:
            if skip_existing and "skipping download" in str(download_path):
                status = "Skipped: File already exists"
            else:
                status = f"Downloaded to: {download_path}"
            df.at[idx, "download_path"] = str(download_path)
        else:
            status = "Failed: Download was not successful"

        df.at[idx, "download_status"] = status
        print(f"  Status: {status}")
        print("-" * 80)

        # Sleep to avoid rate limiting
        time.sleep(0.5)

    # Print summary of downloads
    success = df["download_path"].notna().sum()
    failed = len(potential_audio) - success

    print("\nDownload Summary:")
    print(f"  Total processed: {len(potential_audio)}")
    print(f"  Successfully downloaded: {success} ({success/len(potential_audio):.1%})")
    print(f"  Failed: {failed} ({failed/len(potential_audio):.1%})")

    # Group by status for more detailed summary
    if "download_status" in df.columns:
        status_counts = df["download_status"].value_counts()
        print("\nStatus breakdown:")
        for status, count in status_counts.items():
            print(f"  {status}: {count}")

    return df

 ## Load Data and Unify Domains

In [4]:
# Load the JSONL file
input_path = Path('r_sunoai_posts.jsonl')
df = pd.read_json(input_path, lines=True)

# Filter by flairs
interested_flairs = ["Song - Audio Upload", "Song - Human Written Lyrics", "Song", "Meme Song"]
ai_songs = df[df["link_flair_text"].isin(interested_flairs)]
print(f"Found {len(ai_songs)} posts with song flairs")

# Unify domains
def unify_domain(domain: str) -> str:
    if not domain:
        return "N/A"
    d = domain.lower().strip()
    # unify youtube
    if d in ["youtube.com", "youtu.be", "m.youtube.com", "music.youtube.com"]:
        return "youtube.com"
    # unify soundcloud
    if d in ["soundcloud.com", "m.soundcloud.com", "on.soundcloud.com"]:
        return "soundcloud.com"
    # unify X/Twitter
    if d == "x.com":
        return "twitter.com"
    # handle empty domains
    if not d:
        return "N/A"
    # for everything else, just return as is
    return d

ai_songs["domain_unified"] = ai_songs["domain"].apply(unify_domain)

# Display domain counts
ai_songs["domain_unified"].value_counts().head(10)


Found 19025 posts with song flairs


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ai_songs["domain_unified"] = ai_songs["domain"].apply(unify_domain)


domain_unified
youtube.com         6509
self.sunoai         4591
suno.com            3142
v.redd.it            929
N/A                  229
open.spotify.com     167
i.redd.it            148
soundcloud.com       110
cdn1.suno.ai          22
reddit.com            14
Name: count, dtype: int64

 ## Download Songs (Small Sample)

In [5]:
# Download a small test set first to make sure everything works
sample_size = 5  # Start with a small sample
output_dir = Path("downloads")

# Download sample
sample_df = download_songs_from_dataframe(ai_songs.copy(), output_dir=output_dir, max_items=sample_size)

# Print statistics
success_count = sample_df['download_path'].notna().sum()
print(f"Downloaded {success_count} out of {sample_size} songs ({success_count/sample_size:.2%})")

  0%|          | 0/5 [00:00<?, ?it/s]

Processing [1c6q7op] - Domain: suno.com
  Title: Made a song about being a plant parent. I hope you'll enjoy :)
  URL: https://suno.com/song/b99e8ada-0a7f-496b-ab32-d94b45f4032e
  Reddit URL: https://reddit.com/r/SunoAI/comments/1c6q7op/made_a_song_about_being_a_plant_parent_i_hope/
  Using: Suno audio downloader
  Using direct CDN URL: https://cdn1.suno.ai/b99e8ada-0a7f-496b-ab32-d94b45f4032e.mp3
  Downloading audio to: downloads/suno/1c6q7op.mp3
  Status: Downloaded to: downloads/suno/1c6q7op.mp3
--------------------------------------------------------------------------------
Processing [1c6qa6o] - Domain: /r/sunoai/comments/1c6qa6o/i_made_a_song_about_being_a_plant_parent_i_hope/
  Title: I made a song about being a plant parent. I hope you'll enjoy! :)
  URL: https://v.redd.it/ml7yym0cx4vc1
  Reddit URL: https://reddit.com/r/SunoAI/comments/1c6qa6o/i_made_a_song_about_being_a_plant_parent_i_hope/
  Using: Generic URL downloader for /r/sunoai/comments/1c6qa6o/i_made_a_song_about_bei

 ## Play a Downloaded Audio File

In [6]:
# Play one of the downloaded files if available
successful_downloads = sample_df[sample_df['download_path'].notna()]
if len(successful_downloads) > 0:
    sample_path = Path(successful_downloads.iloc[0]['download_path'])
    print(f"Playing: {sample_path}")
    if "mp3" not in sample_path.suffix:
        display(Video(sample_path))
    else:
        display(Audio(sample_path))
else:
    print("No successful downloads to play")


Playing: downloads/suno/1c6q7op.mp3


 ## Download All Songs



 This may take a long time depending on how many songs are in the dataset.

In [None]:
# Uncomment and run this cell to download all songs
# Note: This might take a very long time!

# # Set to None to download all, or a specific number to limit
# max_items: Optional[int] = 100

# # Download songs
# result_df = download_songs_from_dataframe(ai_songs.copy(), output_dir=output_dir, max_items=max_items)

# # Save the updated dataframe
# output_df_path = Path('ai_songs_with_downloads.jsonl')
# result_df.to_json(output_df_path, orient='records', lines=True)

# # Print statistics
# success_count = result_df['download_path'].notna().sum()
# total_count = len(result_df)
# print(f"Downloaded {success_count} out of {total_count} songs ({success_count/total_count:.2%})")


  0%|          | 0/100 [00:00<?, ?it/s]

Processing [1c6q7op] - Domain: suno.com
  Title: Made a song about being a plant parent. I hope you'll enjoy :)
  URL: https://suno.com/song/b99e8ada-0a7f-496b-ab32-d94b45f4032e
  Reddit URL: https://reddit.com/r/SunoAI/comments/1c6q7op/made_a_song_about_being_a_plant_parent_i_hope/
  Using: Suno audio downloader
  Found existing file: downloads/suno/1c6q7op.mp3, skipping download
  Status: Downloaded to: downloads/suno/1c6q7op.mp3
--------------------------------------------------------------------------------
Processing [1c6qa6o] - Domain: /r/sunoai/comments/1c6qa6o/i_made_a_song_about_being_a_plant_parent_i_hope/
  Title: I made a song about being a plant parent. I hope you'll enjoy! :)
  URL: https://v.redd.it/ml7yym0cx4vc1
  Reddit URL: https://reddit.com/r/SunoAI/comments/1c6qa6o/i_made_a_song_about_being_a_plant_parent_i_hope/
  Using: Generic URL downloader for /r/sunoai/comments/1c6qa6o/i_made_a_song_about_being_a_plant_parent_i_hope/
  Status: Failed: Download was not success

ERROR: [soundcloud] Unable to download JSON metadata: HTTP Error 404: Not Found (caused by <HTTPError 404: Not Found>)


  Status: Downloaded to: downloads/soundcloud/1c705zk.mp3
--------------------------------------------------------------------------------
Processing [1c70fhc] - Domain: suno.com
  Title: Santa’s Sweet Treat (New Christmas Banger)
  URL: https://suno.com/song/01f87a79-7508-4095-b78f-046be4c063d5
  Reddit URL: https://reddit.com/r/SunoAI/comments/1c70fhc/santas_sweet_treat_new_christmas_banger/
  Using: Suno audio downloader
  Using direct CDN URL: https://cdn1.suno.ai/01f87a79-7508-4095-b78f-046be4c063d5.mp3
  Downloading audio to: downloads/suno/1c70fhc.mp3
  Status: Downloaded to: downloads/suno/1c70fhc.mp3
--------------------------------------------------------------------------------
Processing [1c70gm4] - Domain: /r/sunoai/comments/1c70gm4/made_this_for_my_pregnant_wife/
  Title: Made this for my pregnant wife
  URL: https://v.redd.it/84i2c45jy7vc1
  Reddit URL: https://reddit.com/r/SunoAI/comments/1c70gm4/made_this_for_my_pregnant_wife/
  Using: Generic URL downloader for /r/sun

ERROR: [youtube] l3iMKBYovnE: Video unavailable


  Status: Failed: Download was not successful
--------------------------------------------------------------------------------
Processing [1c76hu6] - Domain: suno.com
  Title: Reinforcements Incoming - Silly Helldivers 2 song
  URL: https://suno.com/song/1237273c-93c1-4024-9463-76d17769a8ae
  Reddit URL: https://reddit.com/r/SunoAI/comments/1c76hu6/reinforcements_incoming_silly_helldivers_2_song/
  Using: Suno audio downloader
  Using direct CDN URL: https://cdn1.suno.ai/1237273c-93c1-4024-9463-76d17769a8ae.mp3
  Downloading audio to: downloads/suno/1c76hu6.mp3
  Status: Downloaded to: downloads/suno/1c76hu6.mp3
--------------------------------------------------------------------------------
Processing [1c776iv] - Domain: suno.com
  Title: Suno Challenge: Start with a song from an old vinyl record, then turn it into a sick remix
  URL: https://suno.com/song/8ac974fd-9f4b-48c1-84db-5a04b45d325f
  Reddit URL: https://reddit.com/r/SunoAI/comments/1c776iv/suno_challenge_start_with_a_song_f

 ## Resume Downloads



 If your download was interrupted, you can resume from where you left off.

In [None]:
# Uncomment and run this cell to resume downloads

# # Load the previously saved dataframe
# saved_df_path = Path('ai_songs_with_downloads.jsonl')
# saved_df = pd.read_json(saved_df_path, lines=True)
# 
# # Find which items have already been downloaded
# processed_ids = set(saved_df[saved_df['download_path'].notna()]['id'])
# print(f"Found {len(processed_ids)} already processed posts")
# 
# # Filter out already processed posts
# remaining_df = ai_songs[~ai_songs['id'].isin(processed_ids)]
# print(f"Remaining {len(remaining_df)} posts to process")
# 
# # Set max items to limit or None for all
# max_items: Optional[int] = None
# 
# # Download remaining songs
# result_df = download_songs_from_dataframe(remaining_df, output_dir=output_dir, max_items=max_items)
# 
# # Merge with previous results
# full_df = pd.concat([saved_df, result_df])
# 
# # Save the updated dataframe
# full_df.to_json(saved_df_path, orient='records', lines=True)
# 
# # Print statistics
# success_count = full_df['download_path'].notna().sum()
# total_count = len(full_df)
# print(f"Downloaded {success_count} out of {total_count} songs ({success_count/total_count:.2%})")
