# Play List Processing

## Settings

In [1]:
import re
from tqdm import tqdm

# Settings
playlist_dir = "/home/koke_cacao/.local/share/rhythmbox/playlists.xml"
db_dir = "/home/koke_cacao/.local/share/rhythmbox/rhythmdb.xml"

# Global Storage
playlist_texts = None

def property_regexp(name):
    # find property
    # find name in <playlist name="dev" show-browser="false" search-type="search-match" type="static">
    return r"(?<= {}=\")\w*(?=\"( |>))".format(name)

def surounding_regexp(name):
    # find one line surounding
    # <location>file:xxx.mp4</location>
    return r"(?<=<{}>).*(?=<\/{}>)".format(name, name)

def block_regexp(name):
    return r"<{}.*>[\s\S]*?<\/{}>".format(name, name)

## Functions

In [2]:
# read playlist

def location_list(file):
    d = dict()
    
    with open(file, "r") as f:
        text = f.read()
        # find all static playlist
        playlist_texts = re.findall(r"<playlist name=\".*\".*?type=\"static\">\n    <location>[\s\S]*?<\/playlist>\n", text)

    for playlist_text in tqdm(playlist_texts):
        lines = playlist_text.split("\n")
        name = re.search(property_regexp("name"), lines[0]).group()
        print("\nthrow away the following lines in {}:".format(name))
        print("- {}".format(lines[0]))
        print("- {}".format(lines[-1]))
        print("- {}".format(lines[-2]))
        lines = lines[1:-2]
        lines = [re.search(surounding_regexp("location"), l).group() for l in lines if re.search(surounding_regexp("location"), l)]
        d[name] = lines
    print("\ndetected playlist: {}".format(d.keys()))
    return d


def location_list_to_song_list(db, location_list):
    song_dict = dict()
    
    def map_playlist_to_songs(db, playlist):
        def f(entry):
            for song in db:
                if song.location == entry: return song
            raise Exception("no such song in db")
        return list(map(f, playlist))

    for key, value in location_list.items():
        song_dict[key] = map_playlist_to_songs(db, value)
    return song_dict

In [3]:
# read playlist template

def template(file):
    """
    ENSURES: template(file) => (a, b) where
             a+[playlists]+b can be a good playlist file
    """
    r = None
    with open(file, "r") as f:
        regex = re.compile(r"<playlist name=\".*\".*?type=\"static\">\n    <location>[\s\S]*?<\/playlist>\n")
        r = regex.sub("[REPLACED]\n", f.read())
    r = r.split("[REPLACED]\n")
    return (r[0], r[-1])

In [4]:
# Read song database

class Song:
    def __init__(self, title, genre=None,
                 artist=None, album=None,
                 duration=None, file_size=None,
                 location=None, mtime=None,
                 first_seen=None, last_seen=None,
                 rating=None, play_count=None,
                 last_played=None, date=None,
                 media_type=None, composer=None):
        self.title = title
        self.genre = genre
        self.artist = artist
        self.album = album
        self.duration = duration
        self.file_size = file_size
        self.location = location
        self.mtime = mtime
        self.first_seen = first_seen
        self.last_seen = last_seen
        self.rating = rating
        self.play_count = play_count
        self.last_played = last_played
        self.date = date
        self.media_type = media_type
        self.composer = composer
    def __repr__(self):
        return "{}: {}-{}\n".format(self.title, self.location, self.genre)
        
def song_db(file):
    l = []
    
    with open(file, "r") as f:
        text = f.read()
        db_texts = re.findall(block_regexp("entry"), text)
        print(len(db_texts))
        
        for db_text in tqdm(db_texts):
            d = dict()
            properties = ["title", "genre",
                      "artist", "album",
                      "duration", "file-size",
                      "location", "mtime",
                      "first-seen", "last-seen",
                      "rating", "play-count",
                      "last-played", "date",
                      "media-type", "composer"]
            for p in properties:
                if re.search(surounding_regexp(p), db_text):
                    d[p.replace("-", "_")] = re.search(surounding_regexp(p), db_text).group()
                
            song = Song(**d)
            l.append(song)
    print("\nwe got {} songs".format(len(l)))
    return l

## Read Song Lists

In [5]:
l_list = location_list(playlist_dir)
print(l_list["dev"][:5])

db = song_db(db_dir)
print(db[:10])

s_list = location_list_to_song_list(db, l_list)
print(s_list["dev"][:5])

100%|██████████| 4/4 [00:00<00:00, 312.40it/s]
 19%|█▉        | 354/1825 [00:00<00:00, 3536.54it/s]


throw away the following lines in dev:
- <playlist name="dev" show-browser="false" browser-position="0" search-type="search-match" type="static">
- 
-   </playlist>

throw away the following lines in en:
- <playlist name="en" show-browser="false" browser-position="0" search-type="search-match" type="static">
- 
-   </playlist>

throw away the following lines in jp:
- <playlist name="jp" show-browser="false" browser-position="0" search-type="search-match" type="static">
- 
-   </playlist>

throw away the following lines in zh:
- <playlist name="zh" show-browser="false" browser-position="0" search-type="search-match" type="static">
- 
-   </playlist>

detected playlist: dict_keys(['dev', 'en', 'jp', 'zh'])
['file:///home/koke_cacao/Music/LuoXue/%E3%81%82%E3%81%84%E3%81%8F%E3%82%8B%E3%81%97%E3%81%84(For%20SS3A%20rearrange%20Mix%E3%82%AA%E3%83%AA%E3%82%B8%E3%83%8A%E3%83%AB%E3%83%BB%E3%82%AB%E3%83%A9%E3%82%AA%E3%82%B1)%20-%20%E3%83%AB%E3%82%A5%E3%83%86%E3%82%A3%E3%83%B3.mp3', 'file:///home

100%|██████████| 1825/1825 [00:00<00:00, 4411.03it/s]


we got 1825 songs
[&#x6708;&#x306B;&#x53E2;&#x96F2;&#x83EF;&#x306B;&#x98A8;: file:///home/koke_cacao/Music/LuoXue/%E6%9C%88%E3%81%AB%E5%8F%A2%E9%9B%B2%E8%8F%AF%E3%81%AB%E9%A2%A8%20-%20%E6%A3%AE%E6%B0%B8%E7%9C%9F%E7%94%B1%E7%BE%8E.mp3-jp; touhou;
, Minecraft-23-Ballad_of_the_Cats.m4a: file:///home/koke_cacao/Music/Other/Minecraft-23-Ballad_of_the_Cats.m4a-mc; am; set;
, Minecraft-15-Wet_Hands.m4a: file:///home/koke_cacao/Music/Other/Minecraft-15-Wet_Hands.m4a-mc; am; set;
, Minecraft-41-Mall.m4a: file:///home/koke_cacao/Music/Other/Minecraft-41-Mall.m4a-mc; am; set;
, Minecraft-44-Strad.m4a: file:///home/koke_cacao/Music/Other/Minecraft-44-Strad.m4a-mc; am; set;
, Minecraft-51-Moog_City.m4a: file:///home/koke_cacao/Music/Other/Minecraft-51-Moog_City.m4a-mc; am; set;
, Minecraft-14-Dry_Hands.m4a: file:///home/koke_cacao/Music/Other/Minecraft-14-Dry_Hands.m4a-mc; am; set;
, Minecraft-16-Mice_on_Venus.m4a: file:///home/koke_cacao/Music/Other/Minecraft-16-Mice_on_Venus.m4a-mc; am; set;
, M




## Client Code

In [6]:
### CLIENT CODE ###
def extract_tags(f, args, name, browser_position=0):
    def map_songs_to_location_list(songs):
        def f(entry):
            return entry.location
        return list(map(f, songs))

    def location_list_to_str(location_list):
        return "\n".join(["    <location>"+location+"</location>" for location in location_list])
    
    prefix = "  <playlist name=\"{}\" show-browser=\"false\" browser-position=\"{}\" search-type=\"search-match\" type=\"static\">\n".format(name, browser_position)
    suffix = "\n  </playlist>"
    return prefix+location_list_to_str(map_songs_to_location_list(f(*args)))+suffix

def filter_match_songs_genre(genre, song_objects):
    def f(entry):
        return entry.genre == genre
    return list(filter(f, song_objects))

def filter_contain_songs_genre(genre, song_objects):
    def f(entry):
        return genre in entry.genre
    return list(filter(f, song_objects))

def idx(song_objects):
    return song_objects


### Backup

In [7]:
# backup
from shutil import copy2
from datetime import date
from os import path

playlists_dest = "/home/koke_cacao/Music/rhythmbox/backup/playlists-{}.xml".format(date.today().strftime("%Y-%m-%d"))
rhythmdb_dest = "/home/koke_cacao/Music/rhythmbox/backup/rhythmdb-{}.xml".format(date.today().strftime("%Y-%m-%d"))

if not path.exists(playlists_dest):
    copy2("/home/koke_cacao/.local/share/rhythmbox/playlists.xml", playlists_dest)
else:
    print("Backup {} already exists! Delete before proceed.".format(playlists_dest))

if not path.exists(rhythmdb_dest):
    copy2("/home/koke_cacao/.local/share/rhythmbox/rhythmdb.xml", rhythmdb_dest)
else:
    print("Backup {} already exists! Delete before proceed.".format(playlists_dest))

### Create List

In [14]:
# choose either `filter_contain_songs_genre` or `filter_match_songs_genre`, or `idx`(do nothing to it but contain in output)
# follow by one input tuple such as containing ([TagName], s_list[ListName])
# follow by new tag name
jp_str = extract_tags(filter_contain_songs_genre, ("jp;", s_list["dev"]), "jp")
zh_str = extract_tags(filter_contain_songs_genre, ("zh;", s_list["dev"]), "zh")
en_str = extract_tags(filter_contain_songs_genre, ("en;", s_list["dev"]), "en")
am_str = extract_tags(filter_contain_songs_genre, ("am;", s_list["dev"]), "am")
all_str = extract_tags(idx, (s_list["dev"],), "dev")

a, b = template(playlist_dir)
result = a+("\n".join([jp_str, zh_str, en_str, am_str, all_str]))+b

if not path.exists("./playlists.xml"):
    with open("./playlists.xml", "x") as f:
        f.write(result)
        f.close()
    print("./playlists.xml generate successful. Please copy the file to /home/koke_cacao/.local/share/rhythmbox/ manually")
else:
    print("./playlists.xml already exists! Delete if you want to generate a new one!")

./playlists.xml generate successful. Please copy the file to /home/koke_cacao/.local/share/rhythmbox/ manually
