In [1]:
import os
import shutil
from helper_functions import get_file_and_dirnames

PATH_RAW = "../data/0_raw_sets"
PATH_MIDI = "../data/1_midi_sets"

# when paths not exist, create directories
paths = [PATH_RAW, PATH_MIDI]
for path in paths:
    if not os.path.exists(path):
        os.makedirs(path)

In [2]:
def get_all_nested_midis(p):
    """
    get a list of all midi file paths in a given directory

    :param p: path of directory
    :return: list of midi file paths
    """
    files, dirs =  get_file_and_dirnames(p)
    while dirs:
        for d in dirs:
            nest_files, nest_dirs = get_file_and_dirnames(f"{p}/{d}")
            dirs.extend([f"{d}/{n}" for n in nest_dirs])
            files.extend([f"{d}/{n}" for n in nest_files])
            dirs.remove(d)
    files = [f for f in files if f[-4:]==".mid" or f[-5:]==".midi"]
    return files

In [3]:
# get list of directories in raw sets path
_, dirs = get_file_and_dirnames(PATH_RAW)
dirs.sort()
dirs

['10_classic_midi_cont_melodies',
 '11_classic_piano_vienna',
 '12_emotion_classification',
 '13_NES_music',
 '14_national_anthems',
 '15_baroque',
 '16_classic_piano',
 '17_POP909-Dataset-master',
 '1_lakh_full',
 '2_lakh_subset',
 '3_lofi_hiphop',
 '4_Jazz_incl_csv',
 '6_anime_music',
 '7_Pop',
 '8_maestro-v3.0.0',
 '9_classic_midi_data']

In [4]:
# copy midi files from raw sets to midi only sets
for dir in dirs:
    midi_dir = f"{PATH_MIDI}/{dir}"
    print(midi_dir)
    # make dir if not existent else delete contents
    if not os.path.exists(midi_dir):
        os.makedirs(midi_dir)
    else:
        del_files,_ = get_file_and_dirnames(midi_dir)
        for del_file in del_files:
            os.remove(f"{midi_dir}/{del_file}")
    # get files from raw set and copy to midi set if duplicates name with number
    files = get_all_nested_midis(f"{PATH_RAW}/{dir}")
    for file in files:
        filename = file.split("/")[-1]
        if not os.path.exists(f"{midi_dir}/{filename}"):
            shutil.copy2(f"{PATH_RAW}/{dir}/{file}", midi_dir)
        else:
            i = 2
            filename = f"{filename[:-4]}_{i}.mid"
            while os.path.exists(f"{midi_dir}/{filename}"):
                i = i + 1
                filename = f"{filename[:-5]}{i}.mid"
            shutil.copy2(f"{PATH_RAW}/{dir}/{file}", f"{midi_dir}/{filename}")
    # get files of new directory to count
    files_new = get_all_nested_midis(midi_dir)
    print(len(files))
    print(len(files_new))

../data/1_midi_sets/10_classic_midi_cont_melodies
29
29
../data/1_midi_sets/11_classic_piano_vienna
112
112
../data/1_midi_sets/12_emotion_classification
196
196
../data/1_midi_sets/13_NES_music
5278
5278
../data/1_midi_sets/14_national_anthems
342
342
../data/1_midi_sets/15_baroque
1114
1114
../data/1_midi_sets/16_classic_piano
292
292
../data/1_midi_sets/17_POP909-Dataset-master
2898
2898
../data/1_midi_sets/1_lakh_full
178561
178561
../data/1_midi_sets/2_lakh_subset
87
87
../data/1_midi_sets/3_lofi_hiphop
93
93
../data/1_midi_sets/4_Jazz_incl_csv
934
934
../data/1_midi_sets/6_anime_music
240
240
../data/1_midi_sets/7_Pop
50
50
../data/1_midi_sets/8_maestro-v3.0.0
1276
1276
../data/1_midi_sets/9_classic_midi_data
92
92


In [5]:
# remove versioning of songs in dataset 17
dir_17 = f"{PATH_MIDI}/17_POP909-Dataset-master"
files = get_all_nested_midis(dir_17)
for f in files:
    if "v" in f:
        os.remove(f"{PATH_MIDI}/17_POP909-Dataset-master/{f}")
print(len(get_all_nested_midis(dir_17)))

909
