### This notebook is intended to show how the datasets are modified to unify the format

# MDBDrums dataset

from : https://github.com/CarlSouthall/MDBDrums/

In order to use the data directly from the repo, it is needed to covert the filenames to urls

In [1]:
from github import Github
from scripts.utils import MusicDelta_filenameList_to_urlList
import urllib

# token has to be changed by every user, you can get it from gitHub website -> configuration -> developer settings...
g = Github("dace30506f12d6c77a91775fc1095469550d7846")

# repo info
repo = g.get_repo("CarlSouthall/MDBDrums")
repo_url = "https://raw.githubusercontent.com/CarlSouthall/MDBDrums/master/"


In [2]:
contents = repo.get_contents("")
drum_files = []
annotation_files = []

while contents:
    file_content = contents.pop(0)
    if file_content.type == "dir":
        contents.extend(repo.get_contents(file_content.path))
    elif "drum_only" in file_content.path:
        drum_files.append(file_content.path)
    elif "annotations/class" in file_content.path:
        annotation_files.append(file_content.path)

        
MusicDelta_filenameList_to_urlList('data/MDB-Drums/drum_files.txt', drum_files, repo_url)
MusicDelta_filenameList_to_urlList('data/MDB-Drums/annotation_files.txt', annotation_files, repo_url)

# IDMT-SMT-DRUMS-V2 Dataset

from: https://www.idmt.fraunhofer.de/en/business_units/m2d/smt/drums.html

This dataset is only avaible via download, can be foun in this repo.

In order to adapt the annotations to the MDBDrums ones, it is needed to rewrite the .xml files to .txt format, keeping the formating of MDBDrums dataset

In [1]:
import xmltodict
import os
from scripts.utils import get_files_in_dir 

database_dir = 'data/IDMT-SMT-DRUMS-V2/'
txt_anotation_dir = os.path.join(database_dir, 'annotation_txt/')
xml_annotation_dir = os.path.join(database_dir, 'annotation_xml/')

xml_files = get_files_in_dir(xml_annotation_dir)

In [2]:
if not os.path.exists(txt_anotation_dir):
        os.mkdir(txt_anotation_dir)


for file_path in xml_files:
    txt_file_path = txt_anotation_dir + (file_path[:-3] + 'txt').split('/')[-1]
    with open(file_path) as fd:
        new_file = open(txt_file_path, 'w')
        doc = xmltodict.parse(fd.read())
        for event in doc['instrumentRecording']['transcription']['event']:
            new_file.write(str(event['onsetSec'])[:7] + '\t\t' + str(event['instrument']) + '\n')
        new_file.close()
    fd.close()    
        

# MusicSchool jsonDataset

In order to adapt the annotations to the MDBDrums ones, it is needed to rewrite the .json to .txt format with timesteps instead of musical notation, keeping the formating of MDBDrums dataset to reuse the audio processing functions


In [13]:
from scripts.drums import *
import json
from scripts.utils import get_files_in_dir 
notes_dict = {
    'F4':'KD',
    'B4':'impro',
    'A4':'FT',
    'C5':'SD',
    'D5':'MT',
    'E5':'HT',
    'F5':'CR',
    'G5':'HH',
    'A5':'CY',
    'X':'REST',
    'Y':'note',
    'N':'diff'
}
folder = "data/Test_student"
json_files = get_files_in_dir(folder)
#json_files = ['data/routine.json']
txt_folder = "data/Test_student/"

for file in json_files:
    txt_file_path = txt_folder + (file[:-4] + 'txt').split('/')[-1]
    print(txt_file_path)
    with open(file) as f:
        annotations = json.load(f)

    notes, timesteps, beats = get_time_steps_from_annotations(annotations)
    new_file = open(txt_file_path, 'w')
    for i, item in enumerate(notes):
        var = item.split('.')
        new = notes_dict[var[0]]
        if len(var)>1:
            for j in range(len(var)-1):
                new = new + '+' + notes_dict[var[j+1]]
                
        new_file.write(str(timesteps[i])[:7] +'\t\t' + new + '\n')
    new_file.close()
    f.close()
    

data/Test_student/test_ex1_100.txt
data/Test_student/test_ex1_140.txt
data/Test_student/test_ex1_180.txt
data/Test_student/test_ex1_220.txt
data/Test_student/test_ex1_60.txt
data/Test_student/test_ex2_100.txt
data/Test_student/test_ex2_60.txt


In [8]:
import numpy as np
np.arange(0,30,60/300)

array([ 0. ,  0.2,  0.4,  0.6,  0.8,  1. ,  1.2,  1.4,  1.6,  1.8,  2. ,
        2.2,  2.4,  2.6,  2.8,  3. ,  3.2,  3.4,  3.6,  3.8,  4. ,  4.2,
        4.4,  4.6,  4.8,  5. ,  5.2,  5.4,  5.6,  5.8,  6. ,  6.2,  6.4,
        6.6,  6.8,  7. ,  7.2,  7.4,  7.6,  7.8,  8. ,  8.2,  8.4,  8.6,
        8.8,  9. ,  9.2,  9.4,  9.6,  9.8, 10. , 10.2, 10.4, 10.6, 10.8,
       11. , 11.2, 11.4, 11.6, 11.8, 12. , 12.2, 12.4, 12.6, 12.8, 13. ,
       13.2, 13.4, 13.6, 13.8, 14. , 14.2, 14.4, 14.6, 14.8, 15. , 15.2,
       15.4, 15.6, 15.8, 16. , 16.2, 16.4, 16.6, 16.8, 17. , 17.2, 17.4,
       17.6, 17.8, 18. , 18.2, 18.4, 18.6, 18.8, 19. , 19.2, 19.4, 19.6,
       19.8, 20. , 20.2, 20.4, 20.6, 20.8, 21. , 21.2, 21.4, 21.6, 21.8,
       22. , 22.2, 22.4, 22.6, 22.8, 23. , 23.2, 23.4, 23.6, 23.8, 24. ,
       24.2, 24.4, 24.6, 24.8, 25. , 25.2, 25.4, 25.6, 25.8, 26. , 26.2,
       26.4, 26.6, 26.8, 27. , 27.2, 27.4, 27.6, 27.8, 28. , 28.2, 28.4,
       28.6, 28.8, 29. , 29.2, 29.4, 29.6, 29.8])

In [3]:
len(np.arange(0,30,60/100))

50