In [None]:
WORKING_DIR = "Your Working Dir"
FILENAME_PREFIX = "Filename prefix"
ANSI_ENCODING = "gbk"
INPUT_FILE = "%s.ape" % FILENAME_PREFIX
OUTPUT_PREFIX = "output/"
PICTURE = "cover.jpg"
EXTRA_DATA_FILE = "%s.extra.ini" % FILENAME_PREFIX

ANSI_CUE = "%s.cue" % FILENAME_PREFIX
UTF8_CUE = "%s.utf8.cue" % FILENAME_PREFIX

In [None]:
import codecs

In [None]:
from collections import defaultdict

In [None]:
import re

In [None]:
import subprocess

In [None]:
import ConfigParser

In [None]:
import os

In [None]:
global_report = []

NOT_PARSED = 1
NO_TRACK = 2

In [None]:
def utf8(data):
    if type(data) == str:
        return codecs.decode(data, "utf8")
    elif type(data) == unicode:
        return data
    else:
        return codecs.decode(str(data), "utf8")


### Convert CUE to utf8

In [None]:
cd $WORKING_DIR

In [None]:
if not os.path.exists(UTF8_CUE):
    with open(ANSI_CUE, "r") as fp:
        ansi_content = fp.read()
        with open(UTF8_CUE, "w") as fp2:
            fp2.write(codecs.encode(
                codecs.decode(ansi_content, ANSI_ENCODING),
                "utf-8"))

### Parse CUE

In [None]:
def init_cue():
    cue = defaultdict(str)
    cue["tracks"] = defaultdict(dict)
    return cue

In [None]:
def trim_quote(text):
    if len(text) > 2 and text[0] == '"' and text[-1] == '"':
        text = text[1:-1]
    return text

In [None]:
def cue_match_performer(line, cue_dict, track=None):
    r = re.search('''PERFORMER\s+(?P<performer>.+)$''', line)
    if r:
        performer = trim_quote(r.groupdict()["performer"].strip())
        if track is None:
            cue_dict["albumartist"] = codecs.decode(performer, "utf8")
        else:
            cue_dict["tracks"][track]["artist"] = codecs.decode(performer, "utf8")
        return True
    return False

In [None]:
def cue_match_title(line, cue_dict, track=None):
    r = re.search('''TITLE\s+(?P<title>.+)''', line)
    if r:
        title = trim_quote(r.groupdict()["title"].strip())
        if track is None:
            cue_dict["album"] = codecs.decode(title, "utf8")
        else:
            cue_dict["tracks"][track]["title"] = codecs.decode(title, "utf8")
        return True
    return False

In [None]:
def cue_match_file(line, cue_dict, track=None):
    r = re.search('''FILE\s+(?P<file>.+)\s+\w+''', line)
    if r:
        title = trim_quote(r.groupdict()["file"].strip())
        if track is None:
            cue_dict["original_file"] = codecs.decode(title, "utf8")
        else:
            cue_dict["tracks"][track]["original_file"] = codecs.decode(title, "utf8")
        return True
    return False

In [None]:
def cue_match_track(line, cue_dict, track=None):
    r = re.search('''TRACK\s+(?P<track_num>\d+)\s+AUDIO''', line)
    if r:
        return True
    return False

In [None]:
def cue_match_index(line, cue_dict, track=None):
    r = re.search('''INDEX\s+(?P<index_num>\d+)\s+(?P<timing>.+)''', line)
    if r:
        index_num = trim_quote(r.groupdict()["index_num"].strip())
        timing = trim_quote(r.groupdict()["timing"].strip())
        if track is None:
            global_report.append((NO_TRACK, line, "No track for INDEX"))
        else:
            cue_dict["tracks"][track]["index_%s" % index_num] = codecs.decode(timing, "utf8")
        return True
    return False

In [None]:
def cue_read_global(fp):
    cue = init_cue()
    current_track = None
    for line in fp:
        if cue_match_performer(line, cue, current_track): continue
        if cue_match_title(line, cue, current_track): continue
        if cue_match_file(line, cue, current_track): continue
        if cue_match_track(line, cue, current_track):
            if current_track is None:
                current_track = 0
            current_track += 1
            cue["tracks"][current_track]["tracknumber"] = str(current_track)
            continue
        if cue_match_index(line, cue, current_track): continue
        global_report.append((NOT_PARSED, line, "Unknown line"))
    if current_track > 0:
        cue["tracktotal"] = current_track
    return cue, current_track

In [None]:
def load_extra_info(cue, config_file):
    config = ConfigParser.ConfigParser()
    try:
        config.readfp(open(config_file))
    except:
        return
    
    if config.has_section("default"):
        for item in config.items("default"):
            if item[0] not in cue:
                cue[item[0]] = item[1]
    
    if config.has_section("overwrite"):
        for item in config.items("overwrite"):
            cue[item[0]] = item[1]

### Covert Files

In [None]:
fp = open(UTF8_CUE, "r")
cue, num_tracks = cue_read_global(fp)
load_extra_info(cue, EXTRA_DATA_FILE)
fp.close()

In [None]:
cd $WORKING_DIR

In [None]:
!ffmpeg -i "$INPUT_FILE" "__temp.wav"

In [None]:
!bchunk -w "__temp.wav" "$UTF8_CUE" "__tracks"

In [None]:
!mkdir "$OUTPUT_PREFIX"

In [None]:
def tmp_track(id):
    return "__tracks%s.wav" % str(id).zfill(2)

In [None]:
def filename_safe(filename):
    for ch in '<>"?*\\/:':
        filename = filename.replace(ch, "_")
    return filename.strip()

In [None]:
def filename_format(cue, track):
    if "title" not in cue["tracks"][track]:
        raise ValueError
    filename = cue["tracks"][track]["title"]
    if "tracknumber" in cue["tracks"][track]:
        filename = "%s %s" % (cue["tracks"][track]["tracknumber"].zfill(2), filename)
    if "discnumber" in cue:
        filename = "%s-%s" % (cue["discnumber"], filename)
    return filename

In [None]:
rm -r output

In [None]:
folder = "%s%s" % (OUTPUT_PREFIX, filename_safe("%s" % cue["album"]))
!mkdir -p $folder

for i in range(1, num_tracks + 1):
    track_cue = cue["tracks"][i]
    cmd = [
        u"flac",
        u"--best"
    ]
    if PICTURE is not None:
        cmd.append(u'--picture="%s"' % utf8(PICTURE))
    for key, value in cue["tracks"][i].items():
        cmd.append(u'--tag="%s"="%s"' % (key, utf8(value)))
    for key in cue:
        if key == "tracks": continue
        if key in cue["tracks"][i]: continue
        cmd.append(u'--tag="%s"="%s"' % (key, utf8(cue[key])))
    filename = filename_safe("%s.flac" % filename_format(cue, i))
    cmd.append(u'-o "%s/%s"' % (folder, filename))
    cmd.append(u'"%s"' % tmp_track(i))
    print("Running the track #%d" % i)
    cmdline = ' '.join(cmd)
    print(cmdline)
    print(subprocess.check_output(
            cmdline, 
            shell=True,
            stderr=subprocess.STDOUT))

In [None]:
!rm __*.wav

In [None]:
codecs.decode(cue[key], "utf8")

### *playground*

In [None]:
print(filename_safe(" abcd<1234>**.flac"))

In [None]:
lines = fp.readlines()

In [None]:
cue["tracks"][6]

In [None]:
print(cue_match_performer(lines[0], c))

In [None]:
print(cue_match_title(lines[1], c))

In [None]:
c = ConfigParser.ConfigParser()
c.readfp(open(EXTRA_DATA_FILE))
