## I. Libraries

In [159]:
from lxml import etree
import xml.etree.ElementTree as ET
import pandas as pd
import os
import argparse
from colorama import Fore, Back, Style
import shutil
from datetime import datetime
import re

## II. Useful Functions

### Timestamp functions

In [109]:
def ts_str():
    return datetime.now().strftime("%Y%m%d_%H%M%S")
def ts_log():
    return datetime.now().strftime("%Y-%m-%d %H:%M:%S")

### Clean Directory

In [162]:
def replace_hex_codes(string):
    pattern = r'%([0-9a-fA-F]{2})'  # Regular expression pattern to match hexadecimal codes after '%'
    result = re.sub(pattern, lambda match: chr(int(match.group(1), 16)), string)
    return result

#### Example

In [163]:
# Example usage
string = "Hello%20World%21"
replaced_string = replace_hex_codes(string)
print(replaced_string)

Hello World!


### Output Directory

In [131]:
def init_output_dir(output_dir="output"):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

## III. Rekordbox XML data extraction and treatment

### 1. Parser for command prompt

In [158]:
def get_args(): 
    parser = argparse.ArgumentParser(description='Create Rekordbox Playlist tree structure in a target directory')
    parser.add_argument('xml_source', metavar='xml_source', type=str, help='source rekordbox xml file')
    parser.add_argument('target', metavar='target', type=str, help='target directory')
    parser.add_argument('mode', metavar='mode', type=str, help='excel or exec. excel will create an excel file with the playlist structure. exec will create the playlist structure in the target directory')
    args = parser.parse_args()
    return args
    
# Get the source and target directories from the command-line arguments
# XML = args.xml_source
# target_dir = args.target
# mode = args.mode


### 2. Parse XML

In [27]:
XML = r"C:\Users\nbibr\OneDrive\Documents\rekordbox\collection.xml"

def parse_xml(xml=XML):
    tree = etree.parse(xml)
    root = tree.getroot()
    return root

### 3. Get Playlist List

In [28]:
def get_playlist_list(root):
    playlist_list_xml = root.findall(".//NODE[@Type='1']")
    return(playlist_list_xml)

#### Example

In [29]:
root = parse_xml(XML)
playlist_list_xml = get_playlist_list(root)
for elt in playlist_list_xml:
    print(f'Name: {elt.attrib["Name"]} - Type: {elt.attrib["Type"]}')

Name: enzo - Type: 1
Name: quentin - Type: 1
Name: #Horse - Type: 1
Name: #Hard grove - Type: 1
Name: #Edits - Type: 1
Name: #Acid - Type: 1
Name: #Show off - Type: 1
Name: #8.6 - Type: 1
Name: #Round Kick - Type: 1
Name: Last - Type: 1
Name: #Dark kick - Type: 1
Name: ALL - Type: 1
Name: #Bonus - Type: 1
Name: 230603 - DL10 - Type: 1
Name: 230607 - DL9 - Type: 1
Name: #3 - Type: 1
Name: #1 - Type: 1
Name: #2 - Type: 1
Name: Show off - Type: 1
Name: DL7 - Type: 1
Name: DL8 - Type: 1
Name: DL9 - Type: 1
Name: show off - Type: 1
Name: Gang - Type: 1
Name: Psy - Type: 1
Name: Show off - Type: 1
Name: Hard groovy - Type: 1
Name: 8.6. - Type: 1
Name: Rumble - Type: 1
Name: Loud bass - Type: 1
Name: Dark indus - Type: 1
Name: Trippy - Type: 1
Name: Groovy - Type: 1
Name: Acid - Type: 1
Name: Hardest Kicks - Type: 1
Name: Jump up - Type: 1
Name: Fresh Edit - Type: 1
Name: w - DL7 - Type: 1
Name: w - DL6 - Type: 1
Name: w - DL5 - Type: 1
Name: w - decembre - Type: 1
Name: w - even fresher - Ty

### 4. Function to build parent path

In [30]:
def build_path(playlist,target_dir="D:\\Music\\rekordbox"):
    parent_list = []
    parent = playlist.getparent()
    temp_parent_Name = parent.attrib["Name"]
    while parent.attrib["Name"] != "ROOT":
        temp_parent_Name = parent.attrib["Name"]
        parent_list.append(temp_parent_Name)
        parent = parent.getparent()
    parent_list.append(target_dir)
    parent_list.reverse()
    parent_list.append(playlist.attrib["Name"])
    path = "\\".join(parent_list)
    return path

#### Example

In [31]:
for elt in playlist_list_xml:
    print(build_path(elt))

D:\Music\rekordbox\Friends\enzo
D:\Music\rekordbox\Friends\quentin
D:\Music\rekordbox\Prepa 2\#Horse
D:\Music\rekordbox\Prepa 2\#Hard grove
D:\Music\rekordbox\Prepa 2\#Edits
D:\Music\rekordbox\Prepa 2\#Acid
D:\Music\rekordbox\Prepa 2\#Show off
D:\Music\rekordbox\Prepa 2\#8.6
D:\Music\rekordbox\Prepa 2\#Round Kick
D:\Music\rekordbox\Prepa 2\Last
D:\Music\rekordbox\Prepa 2\#Dark kick
D:\Music\rekordbox\Prepa 2\ALL
D:\Music\rekordbox\Prepa\#Bonus
D:\Music\rekordbox\Prepa\NO_AC\230603 - DL10
D:\Music\rekordbox\Prepa\NO_AC\230607 - DL9
D:\Music\rekordbox\Prepa\#3
D:\Music\rekordbox\Prepa\#1
D:\Music\rekordbox\Prepa\#2
D:\Music\rekordbox\Prepa\Show off
D:\Music\rekordbox\SNCD_DL\DL7
D:\Music\rekordbox\SNCD_DL\DL8
D:\Music\rekordbox\SNCD_DL\DL9
D:\Music\rekordbox\XNIHILO 2.0\show off
D:\Music\rekordbox\XNIHILO 2.0\Gang
D:\Music\rekordbox\XNIHILO 2.0\Psy
D:\Music\rekordbox\XNIHILO 2.0\Show off
D:\Music\rekordbox\XNIHILO 2.0\Hard groovy
D:\Music\rekordbox\XNIHILO 2.0\8.6.
D:\Music\rekordbox\XNI

### 5. Function to retrieve track list in playlist

In [32]:
def get_track_list(playlist):
    track_list = []
    for track in playlist.findall(".//TRACK"):
        track_list.append(track.attrib['Key'])
    return track_list

#### Example

In [33]:
playlist_test = playlist_list_xml[0]
print(f'{playlist_test.attrib["Name"]} : {get_track_list(playlist_test)}')

enzo : ['207766732', '73599259', '107465617', '66182544', '70411483', '55260287', '148191793', '156828077', '24086362', '194814603', '75015666', '169948308', '94781532', '29398023', '108997333', '232667420', '28469334', '146291230', '151516801', '193178300', '118297766', '140687338', '190089288', '69501904', '236757548', '140358346', '37013430', '177733895', '122140062', '57259146', '138200796', '97075925', '40669832', '188298450', '93987710', '24791543', '40338347', '134919275', '59521151', '41673453', '57136971', '54497722', '179642980', '168527551', '140358690', '268416782', '218665157', '22933226', '123333544', '106437197', '238467685', '145555263', '124081145', '255129761', '4287047', '56352298', '45751072', '62742308', '19587952', '28279532', '104472907', '44842882']


### Build df with full playlists and track ids

In [34]:
def map_track__to_playlist(playlist_list = playlist_list_xml, target_dir="D:\\Music\\rekordbox"):
    full_playlist_list = []
    playlist_dict = {}
    for elt in playlist_list:
        playlist_dict = {}
        playlist_dict["Name"] = elt.attrib["Name"]
        playlist_dict["Path"] = build_path(elt,target_dir)
        playlist_dict["Track_List"] = get_track_list(elt)
        full_playlist_list.append(playlist_dict)

    full_track_list = []
    track_dict = {}
    for elt in full_playlist_list:
        for track in elt["Track_List"]:
            track_dict = {}
            track_dict["Playlist"] = elt["Name"]
            track_dict["Path"] = elt["Path"]
            track_dict["TrackID"] = track
            full_track_list.append(track_dict)

    df_tracks_in_playlists = pd.DataFrame(full_track_list)

    return df_tracks_in_playlists

#### Example

In [35]:
print(map_track__to_playlist(playlist_list_xml,target_dir="E:\\rekordbox"))

     Playlist                       Path    TrackID
0        enzo  E:\rekordbox\Friends\enzo  207766732
1        enzo  E:\rekordbox\Friends\enzo   73599259
2        enzo  E:\rekordbox\Friends\enzo  107465617
3        enzo  E:\rekordbox\Friends\enzo   66182544
4        enzo  E:\rekordbox\Friends\enzo   70411483
...       ...                        ...        ...
1945   bitchy        E:\rekordbox\bitchy  134264416
1946   bitchy        E:\rekordbox\bitchy  227228989
1947   bitchy        E:\rekordbox\bitchy  172996369
1948   bitchy        E:\rekordbox\bitchy    6124521
1949   bitchy        E:\rekordbox\bitchy  230426679

[1950 rows x 3 columns]


### 6. Function to retrieve full track list

In [167]:
def get_tracks(xml_file):
    tree = etree.parse(xml_file)
    root = tree.getroot()
    tracks = root.xpath("//COLLECTION/TRACK")
    all_tracks=[]
    for track in tracks:
        base_path = track.attrib['Location'][len('file://localhost/'):]
        if "D:/Music" in base_path:
            all_tracks.append({'TrackID': track.attrib['TrackID'], 'Name': track.attrib['Name'], 'Location': base_path.replace("/","\\")})
    df_tracks = pd.DataFrame(all_tracks)
    df_tracks['Location'] = df_tracks['Location'].apply(replace_hex_codes)
    return df_tracks

#### Example

In [37]:
print(get_tracks(XML))

        TrackID                                               Name  \
0     120200886                                     03-Gang-Bounce   
1     256154136                   Born This Way Session by ADZ 3.0   
2      34816252                                          The Purge   
3     188701841            AC-DC - Thunderstruck -Pawlowski Remix-   
4     244533221                                            Pandora   
...         ...                                                ...   
1439   42294505              Vizionn - Future Energy [D0p-ziv6MXE]   
1440   77722571                       War On My Mind [vx80_FNKWr8]   
1441  156339765  WHØMAN - Anxiety [BLCKCHN026] (Premiere) [1Fnk...   
1442  249104348                             Yacumama [9JAq4dkkl-E]   
1443  196325268             You May Want To Sit Down [E6O_DkPvZvs]   

                                               Location  
0     D:\Music\Rekordbox\Contents\UnknownArtist\Unkn...  
1     D:\Music\Rekordbox\Contents\UnknownAr

### 7. Map Playlist composition and full track list

In [170]:
def merge_tracks_and_playlists(tracks, playlists,mode="excel"):
    df_tracks_in_playlists = pd.merge(tracks, playlists, on='TrackID', how='inner')
    df_tracks_in_playlists["Target_Path"] = df_tracks_in_playlists["Path"] +"\\" + df_tracks_in_playlists["Location"].str.split("\\").str[-1]
    if mode == "excel":
        init_output_dir()
        filename= f"playlist_structure_{ts_str()}.xlsx"
        filedir = os.path.join("output",filename)
        df_tracks_in_playlists.to_excel(filedir)
        print(f"{ts_log()}: Playlist structure saved in {filedir}")
    return df_tracks_in_playlists

## IV. Target Tree Structure Creation and Track Copy to Target Location

### 1. Create Target tree structure

In [149]:
def create_structure(target_path_column):
    for row in target_path_column.iterrows():
        if not os.path.exists(os.path.dirname(target_path_column)):
            os.makedirs(os.path.rootname(row["Target_Path"]))
            print(f'Path: {os.path.dirname(row["Target_Path"])} created')

#### Example

In [171]:
result_playlist_df = map_track__to_playlist(playlist_list_xml,target_dir="E:\\rekordbox")
track_df = get_tracks(XML)
merged_df = merge_tracks_and_playlists(track_df,result_playlist_df).drop_duplicates()

print(merged_df.head())

for index, row in merged_df.iterrows():
    if not os.path.exists(os.path.dirname(row["Target_Path"])):
        print(os.path.dirname(row["Target_Path"])) # os.makedirs(os.path.rootname(row["Target_Path"]))

2023-12-24 17:15:48: Playlist structure saved in output\playlist_structure_20231224_171547.xlsx
     TrackID                              Name  \
0  120200886                    03-Gang-Bounce   
1  120200886                    03-Gang-Bounce   
2  120200886                    03-Gang-Bounce   
3  256154136  Born This Way Session by ADZ 3.0   
4  256154136  Born This Way Session by ADZ 3.0   

                                            Location   Playlist  \
0  D:\Music\Rekordbox\Contents\UnknownArtist\Unkn...   show off   
1  D:\Music\Rekordbox\Contents\UnknownArtist\Unkn...   Show off   
2  D:\Music\Rekordbox\Contents\UnknownArtist\Unkn...  w - fresh   
3  D:\Music\Rekordbox\Contents\UnknownArtist\Unkn...   show off   
4  D:\Music\Rekordbox\Contents\UnknownArtist\Unkn...   Show off   

                                Path  \
0  E:\rekordbox\XNIHILO 2.0\show off   
1  E:\rekordbox\XNIHILO 2.0\Show off   
2      E:\rekordbox\Techno\w - fresh   
3  E:\rekordbox\XNIHILO 2.0\show off   


### 2. Move tracks to target locations

In [118]:
def copie_tracks(merged_df,mode = "test"):
    for index, row in merged_df.iterrows():
        if index == 0 or row["Name"] != merged_df.iloc[index-1]["Name"]:
            print("\n"+"_"*50+"\n")
            print(f"Track {row['Name']} is being copied: \n")
        if os.path.exists(row["Target_Path"]):
            print(f'Track {row["Name"]} already exists in {row["Target_Path"]}')
        else:
                if mode == "test" or mode == "excel":
                    print(f'{ts_log()}: Copied from '+Fore.RED+f'{row["Location"]}'+Style.RESET_ALL +'\nTo '+Fore.RED+f'{row["Target_Path"]}'+Style.RESET_ALL)
                else:
                    shutil.copy(row["Location"], row["Target_Path"])
        

#### Example

In [101]:
copie_tracks(merged_df,"test")


__________________________________________________

Track 03-Gang-Bounce is being copied: 

Copied from [31mD:\Music\Rekordbox\Contents\UnknownArtist\UnknownAlbum\03-Gang-Bounce.wav[0m
To [31mE:\rekordbox\XNIHILO 2.0\show off\03-Gang-Bounce\03-Gang-Bounce.wav[0m
Copied from [31mD:\Music\Rekordbox\Contents\UnknownArtist\UnknownAlbum\03-Gang-Bounce.wav[0m
To [31mE:\rekordbox\XNIHILO 2.0\Show off\03-Gang-Bounce\03-Gang-Bounce.wav[0m
Copied from [31mD:\Music\Rekordbox\Contents\UnknownArtist\UnknownAlbum\03-Gang-Bounce.wav[0m
To [31mE:\rekordbox\Techno\w - fresh\03-Gang-Bounce\03-Gang-Bounce.wav[0m

__________________________________________________

Track Born This Way Session by ADZ 3.0 is being copied: 

Copied from [31mD:\Music\Rekordbox\Contents\UnknownArtist\UnknownAlbum\Born%20This%20Way%20Session%20by%20ADZ%203.0.wav[0m
To [31mE:\rekordbox\XNIHILO 2.0\show off\Born This Way Session by ADZ 3.0\Born%20This%20Way%20Session%20by%20ADZ%203.0.wav[0m
Copied from [31mD:\Mu