trying to make playlists from youtube music, sync in our library. 


the idea is that, we have a bunch of m3u files that we can always generate on the spot. we just now gotta populate them with real links. 

In [1]:
# importing things
from pathlib import Path
from mutagen import File
from mutagen.easyid3 import EasyID3
import json
import yaml
import pprint
import os
from rapidfuzz import process
pp = pprint.PrettyPrinter(indent=4)

In [2]:
# constants
SONG_EXTENSIONS = ['.mp3', '.wav', '.flac', '.ogg', '.m4a', '.aac']

In [3]:
# so first we import the folders
playlists_folder = Path("playlists")
albums_folder = Path(r"\\KRISH-HOME-NAS\EntireSystem\music\albums")
resolved_playlists_folder = Path("resolved_playlists")
# final playlists will be stored in the the albums folder, next to the albums. paste these there. 

In [4]:
import re

def clean_title(title: str) -> str:
    # 1. Remove leading track numbers like "01. ", "12 - ", "3) "
    title = re.sub(r'^\s*\d+\s*[\.\-)\]]+\s*', '', title)

    # 2. Remove content inside (), [], {}
    title = re.sub(r'\s*[\(\[\{].*?[\)\]\}]\s*', ' ', title)

    # 3. Remove special characters (keep letters, numbers, spaces, hyphens, and apostrophes)
    title = re.sub(r"[^\w\s\-\']", '', title)

    # 4. Collapse multiple spaces into one
    title = re.sub(r'\s+', ' ', title)

    # 5. Strip leading/trailing whitespace
    return title.strip()


# utility functions
def getTitle(file_path):
    """Get the title of the song from the file path."""
    song = File(file_path)
    if song is not None and song.tags is not None:
        title = song.get('title', [None])[0]
        if title is None:
            title = file_path.stem
    else:
        title = file_path.stem

    return clean_title(title)

def getArtist(file_path):
    """Get the artist of the song from the file path depending on file type"""
    # if file is mp3 using id3, or else flac
    if file_path.suffix == '.mp3':
        song = EasyID3(file_path)
        return song.get('artist', [None])[0]
    elif file_path.suffix in ['.flac', '.ogg', '.m4a', '.aac']:
        song = File(file_path, easy=True)
        return song.get('artist', [None])[0]

In [5]:
# then we cache, so we take in all info from the albums folder into a list of dictionaries, because songs names can be same but artist names different

In [6]:
# dictionary of song:artist
songs = {}
song_paths = {}
# walk through the albums folder and read all song metadata, put it in the songs list
for file in albums_folder.rglob("*"):
    if file.is_file() and file.suffix.lower() in SONG_EXTENSIONS:
        try:
            songs[getTitle(file)] = getArtist(file)
            song_paths[getTitle(file)] = file
        except Exception as e:
            print(f"Error reading {file}: {e}")
            continue

In [7]:
len(songs)

2817

In [9]:
import os

class M3UPlaylist:
    """
    A class to handle M3U playlists. (will split based on --)
    """
    def __init__(self, filepath):
        self.filepath = filepath
        self.entries = []  # List of dicts: {"title": ..., "path": ...}
        self._parse()

    def _parse(self):
        current_title = None
        with open(self.filepath, 'r', encoding='utf-8') as file:
            for line in file:
                line = line.strip()
                if not line or line.startswith("#EXTM3U"):
                    continue
                elif line.startswith("#EXTINF"):
                    # Example: #EXTINF:123,Queen -- Bohemian Rhapsody
                    if ',' in line:
                        current_title = line.split(",", 1)[1].strip()
                    else:
                        current_title = None
                else:
                    # This is the path line
                    path = line
                    title = current_title if current_title else os.path.basename(path)
                    self.entries.append({"title": title.split('--')[1].strip(), "artist": title.split('--')[0].strip(), "path": path})
                    current_title = None  # Reset after use

    def get_songs(self):
        """Returns a list of song titles."""
        return [entry["title"] for entry in self.entries]

    def set_path(self, song_title, new_path):
        """
        Updates the path of a song by title.
        Returns True if updated, False if not found.
        """
        for entry in self.entries:
            if entry["title"].lower() == song_title.lower():
                entry["path"] = new_path
                return True
        return False

    def save(self, output_path=None):
        """
        Optionally save the modified playlist back to disk.
        If output_path is not given, overwrite the original.
        """
        output_path = output_path or self.filepath
        with open(output_path, 'w', encoding='utf-8') as f:
            f.write("#EXTM3U\n")
            for entry in self.entries:
                f.write(f"#EXTINF:-1,{entry['title']}\n")
                f.write(f"{entry['path']}\n")


In [13]:
# then we start iterating through m3u files one by one
skipped = 0
done = 0
for file in playlists_folder.rglob("*.m3u"):
    # read the m3u file and get the lines
    with open(file, 'r', encoding='utf-8') as f:
        playlist = M3UPlaylist(file)
        print(f"Processing playlist: {file}")
        for song in playlist.entries:
            # get a list of 5 best matches
            match = process.extractOne(song["title"], songs.keys())
            # if score is less than 90, skip
            if match[1] < 86:
                print(f"Skipping {song['title']} (score: {match[1]})")
                skipped += 1
                continue
            # get the path of the song
            song_path = song_paths[match[0]]
            relative_song_path = os.path.relpath(song_path, start=Path(r"\\KRISH-HOME-NAS\EntireSystem"))
            # update the path in the playlist
            playlist.set_path(song["title"], '/' + str(relative_song_path).replace('\\', '/'))
            # save it 
            playlist.save(output_path=resolved_playlists_folder / file.name)
            print(f"Updated {song['title']} to {relative_song_path}")
            done += 1
    print(f"Done processing {file}. Skipped: {skipped}, Updated: {done}")

Processing playlist: playlists\2 Song Repeat.m3u
Updated Thank You to music\albums\No Angel\14. Thank You (Deep Dish Vocal).flac
Updated Dandelions to music\albums\Safe Haven\2. Dandelions.mp3
Done processing playlists\2 Song Repeat.m3u. Skipped: 0, Updated: 2
Processing playlist: playlists\90's and Pre 90's.m3u
Updated Dooba Dooba to music\albums\Silk Route_ Boondein\1. Dooba Dooba.mp3
Updated Afreen Afreen to music\albums\Coke Studio Season 9_ Sound Of The Nation\2. Afreen Afreen.mp3
Updated Tera Chehra to music\albums\Tera Chehra\2. Tera Chehra.mp3
Updated Tere Bin Nahin Lagda to music\albums\Sorrows, Vol. 69\3. Tere Bin Nahin Lagda (Original).mp3
Updated Kabhi To Nazar Milao (feat. Asha Bhosle) to music\albums\Kabhi To Nazar Milao\2. Kabhi To Nazar Milao.mp3
Updated Ahista to music\albums\Do Hazaar Millennium - Indian Hits Songs (Vol 1)\15. Ahista.mp3
Updated O Sanam to music\albums\Sunoh\1. O Sanam.mp3
Updated Meri Aashiqui to music\albums\Singles\Jubin Nautiyal, Rochak Kohli, Ras

In [None]:
# in each iteration we fuzzy search the best match from cache, where artist matches, and song names match. maybe theres a better library out there for searching, that provides the closest matching search results. 

In [None]:
# we find that song, get its url, find a relative path from the m3u file path, and put it there. i dont want this to be interactive so ill just auto populate the best match. well keep strict thresholds instead. if nothing is found, talk about it, show details and say, do you wanna download this file? if yes, download it using ytdlp and then make sure there is enough metadata there, which you will get from ytmusicapi. then put that file in album/track and then finally put it in the m3u file.

In [None]:
# this means, to update your library, you only have to make empty m3u files using ytmusic api, which we have done already, and we can probaly add that to this file too, and then just run these cells. for new music you download, 

In [93]:
list(songs.keys())

['Bad',
 'Smooth Criminal',
 'Leave Me Alone',
 'The Way You Make Me Feel',
 'Speed Demon',
 'Liberian Girl',
 'Just Good Friends',
 'Another Part Of Me',
 'Man In The Mirror',
 'I Just Can’t Stop Loving You',
 'Dirty Diana',
 '10. Main Jat Yamla Pagla',
 '1. Ek Ladki Ko Dekha',
 'Young Girls',
 'If I Knew',
 'Locked Out Of Heaven',
 '2. Locked Out Of Heaven',
 'Gorilla',
 'Treasure',
 'Moonshine',
 'When I Was Your Man',
 'Natalie',
 'Show Me',
 'Money Make Her Smile',
 '7. September',
 '1. Hey Girl',
 '2. See The Light',
 '3. Mountain Peaks',
 '4. Please Don’t Go Home Yet',
 '5. Easy On My Eyes',
 '6. Because Of You',
 '7. Until I Found You',
 'One',
 'The Man',
 'Thinking Out Loud',
 'Afire Love',
 'Take It Back',
 'Shirtsleeves',
 'Even My Dad Does Sometimes',
 'I’m A Mess',
 'Sing',
 'Don’t',
 'Nina',
 'Photograph',
 'Bloodstream',
 'Tenerife Sea',
 'Runaway',
 'Put It All On Me',
 'Nothing On You',
 'I Don’t Want Your Money',
 '1000 Nights',
 'Way To Break My Heart',
 'BLOW',
 'S

In [95]:
for i in songs.keys():
    if 'count on me' in i.lower():
        print(i)
        print(songs[i])
        print(song_paths[i])
        break

9. Count On Me
Bruno Mars
\\KRISH-HOME-NAS\EntireSystem\music\albums\Doo-Wops & Hooligans\9. Count On Me.mp3


In [128]:
process.extract('what is love', songs.keys(), limit=5)


[('Clover', 72.0, 2680),
 ('Lover', 67.5, 612),
 ('1. What Is Love', 66.66666666666667, 989),
 ('This Love', 66.66666666666667, 1051),
 ('Wave', 60.00000000000001, 1124)]

In [120]:
import difflib

# get close matches
difflib.get_close_matches('higher', songs.keys(), n=1, cutoff=0.6)

['Fighter']