### Create CSV from midi sources

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import json
import music21
import glob
import os
from tqdm import tqdm
from IPython.display import Image, Audio
import traceback

In [3]:
# parallel
import concurrent
from concurrent.futures import ProcessPoolExecutor
from functools import partial
from fastprogress.fastprogress import master_bar, progress_bar
from fastprogress.fastprogress import MasterBar, ProgressBar

In [4]:
from midi_data import *

In [5]:
from fastai.data_block import get_files

In [6]:
path = Path('data/midi')
orig_path = path/'midi_sources'
metapath = path/'metadata'
metapath.mkdir(parents=True, exist_ok=True)

In [7]:
directories = [x for x in path.iterdir() if x.is_dir()]; directories

[PosixPath('data/midi/metadata'),
 PosixPath('data/midi/midi_sources'),
 PosixPath('data/midi/transposed')]

## General stream parsing

In [8]:
def get_music21_attr(fp, transpose=True, offset=None):
    try:
        stream = music21.converter.parse(fp)
    except Exception as e:
        print('Could not parse stream', fp, e)
#         traceback.print_exc()
        return {}
    stream_attr = get_stream_attr(stream)
    if transpose:
        transposed_file = transpose_midi2c(fp, stream, halfsteps=offset)
        transposed_stream = music21.converter.parse(fp)
        t_key = transposed_stream.flat.analyze('key')
        transposed_attr = {
            'inferred_keyc': f'{t_key.tonic.name} {t_key.mode}',
            'midi_keyc': str(transposed_file),
        }
        stream_attr = {**stream_attr, **transposed_attr}
    return stream_attr 

In [9]:
def get_stream_attr(s):
    instruments = [i.instrumentName for i in list(s.getInstruments(recurse=True)) if i.instrumentName]
    metronome = list(filter(lambda x: isinstance(x, music21.tempo.MetronomeMark), s.flat))[0]
    bpm = metronome.getQuarterBPM()
    s_flat = s.flat
    key = s_flat.analyze('key')
    time_sig = s_flat.timeSignature.ratioString if hasattr(s_flat.timeSignature, 'ratioString') else None
    return {
        'instruments': instruments,
        'bpm': bpm,
        'inferred_key': f'{key.tonic.name} {key.mode}',
        'seconds': s_flat.seconds,
        'time_signature': time_sig,
    }

In [10]:
def process_parallel(func, arr, key_func=None, max_workers=None):
    "Maps function to an array of objects. Key can me modified by passing a key_func"
    results = {}
    with ProcessPoolExecutor(max_workers=max_workers) as ex:
        futures = [ex.submit(func,o) for i,o in enumerate(arr)]
        for f in progress_bar(concurrent.futures.as_completed(futures), total=len(arr)):
            k,v = f.result()
            results[k] = v
    return results

In [40]:
def parse_songs(data):
    fp = data.get('file_path')
    metadata = data.get('metadata', {})
    offset = data.get('offset', None)
    attr = {}
    try: attr = get_music21_attr(fp, offset=offset)
    except Exception as e: print('Midi Exeption:', fp, e)
    return str(fp), {**metadata, **attr}

In [12]:
def parse_midi_dir(files, out_path, meta_func, limit=None, recurse=True):
    file2metadata = load_json(out_path)
    if file2metadata is None: file2metadata = {}
        
    if limit: files = files[:limit]
    files = [meta_func(fp) for fp in files if str(fp) not in file2metadata]
    
    parsed = process_parallel(parse_songs, files)
    file2metadata.update(parsed)
    
    json.dump(file2metadata, open(out_path, 'w'))
    
    return file2metadata

In [13]:
import csv
import pandas as pd

def format_values(d):
    def format_value(v):
        if isinstance(v, list): return ','.join(v)
        return v
    return {k:format_value(v) for k,v in d.items()}

def arr2csv(arr, out_file):
    all_keys = {k for d in arr for k in d.keys()}
    arr = [format_values(x) for x in arr]
    with open(out_file, 'w') as f:
        dict_writer = csv.DictWriter(f, list(all_keys))
        dict_writer.writeheader()
        dict_writer.writerows(arr)

### Hooktheory

In [14]:
def create_paths(dirname):
    dir_path = orig_path/dirname
    json_path = metapath/f'{dirname}_metadata.json'
    csv_path = metapath/f'{dirname}_metadata.csv'
    return dir_path, json_path, csv_path

In [15]:
ht_path, ht_json, ht_csv = create_paths('hooktheory')
ht_song_list = metapath/'hooktheory_key2info.json'

In [16]:
def song_key(s): return '_'.join(s.parts[-3:-1])

In [17]:
ht_key2info = load_json(ht_song_list)

if ht_key2info is None:
    song_info = list((ht_path/'xml').glob('*/*/*/*.json'))
    ht_key2info = {song_key(s):json.load(open(s, 'r')) for s in song_info}
    save_json(ht_key2info, ht_song_list)
    len(song_info)

In [18]:
song_json = list((ht_path/'event').glob('*/*/*/*_key.json')); len(song_json) # using json instead of midi for metadata

19876

In [19]:
def get_hooktheory_attr(fp):
    song_info = ht_key2info[song_key(fp)]
    song_json = json.load(open(fp, 'r'))
    metadata = song_json['metadata']
    artist = fp.parts[-3]
    title = fp.parts[-2]
    section = fp.name.split('_')[0]
    midi_path = str(fp.with_suffix('.mid')).replace('event', 'pianoroll').replace('symbol_', '')
    
    # convert stream here
    metadata = {
        'artist': artist,
        'section': section,
        'original_path': midi_path,
        'parts': song_info['section'],
        'song_url': song_info['song_url'],
        'genres': song_info['genres'],
        'midi_title': metadata['title'],
        'source': 'hooktheory',
        'ht_bpm': metadata['BPM'],
        'ht_mode': metadata['mode'],
        'ht_key': metadata['key'],
        'ht_time_signature': metadata['beats_in_measure']
    }
    mode = metadata['ht_mode']
    if mode is None:
        print('No mode found. Assuming cmajor', fp)
        mode = 'major'
    offset = keyc_offset(metadata['ht_key'], mode)
    return {
        'file_path': midi_path, # midi path not json path
        'metadata': metadata,
        'offset': offset
    }

In [20]:
# sanity check
# hook_out = get_hooktheory_attr(song_json[1000], key2info); hook_out

In [21]:
ht_metadata = parse_midi_dir(song_json, ht_json, meta_func=get_hooktheory_attr)

No mode found. Assuming cmajor data/midi/midi_sources/hooktheory/event/w/willie-nelson/blue-eyes-cryin/chorus_symbol_key.json
No mode found. Assuming cmajor data/midi/midi_sources/hooktheory/event/w/weezer/beverly-hills/intro-and-verse_symbol_key.json
No mode found. Assuming cmajor data/midi/midi_sources/hooktheory/event/w/wolfgang-amadeus-mozart/symphony-no-25-in-g-minor/intro-and-verse_symbol_key.json
No mode found. Assuming cmajor data/midi/midi_sources/hooktheory/event/w/woody-guthrie/this-land-is-your-land/verse_symbol_key.json
No mode found. Assuming cmajor data/midi/midi_sources/hooktheory/event/w/ween/roses-are-free/chorus_symbol_key.json
No mode found. Assuming cmajor data/midi/midi_sources/hooktheory/event/j/junior-boys/first-time/chorus_symbol_key.json
No mode found. Assuming cmajor data/midi/midi_sources/hooktheory/event/j/johnny-cash/folsom-prison-blues/verse_symbol_key.json
No mode found. Assuming cmajor data/midi/midi_sources/hooktheory/event/j/johnny-cash/i-walk-the-lin

No mode found. Assuming cmajor data/midi/midi_sources/hooktheory/event/d/david-bowie/ziggy-stardust/intro_symbol_key.json
No mode found. Assuming cmajor data/midi/midi_sources/hooktheory/event/d/david-bowie/fantastic-voyage/verse_symbol_key.json
No mode found. Assuming cmajor data/midi/midi_sources/hooktheory/event/s/stephen-malkmus/baby-cmon/intro_symbol_key.json
No mode found. Assuming cmajor data/midi/midi_sources/hooktheory/event/s/star-control-ii-soundtrack/hyperspace/verse_symbol_key.json
No mode found. Assuming cmajor data/midi/midi_sources/hooktheory/event/s/star-control-ii-soundtrack/hyperspace/intro_symbol_key.json
No mode found. Assuming cmajor data/midi/midi_sources/hooktheory/event/s/sam-cook/wonderful-world/intro_symbol_key.json
No mode found. Assuming cmajor data/midi/midi_sources/hooktheory/event/s/sara-bareilles/love-song/chorus_symbol_key.json
No mode found. Assuming cmajor data/midi/midi_sources/hooktheory/event/s/sara-bareilles/love-song/verse_symbol_key.json
No mod

No mode found. Assuming cmajor data/midi/midi_sources/hooktheory/event/t/taichi-mukai/reset-%2528kaze-ga-tsuyoku-fuiteiru%2529/intro_symbol_key.json
No mode found. Assuming cmajor data/midi/midi_sources/hooktheory/event/t/taichi-mukai/reset/_symbol_key.json
No mode found. Assuming cmajor data/midi/midi_sources/hooktheory/event/t/the-waifs/london-still/verse_symbol_key.json
No mode found. Assuming cmajor data/midi/midi_sources/hooktheory/event/t/the-four-seasons/sherry/verse_symbol_key.json
No mode found. Assuming cmajor data/midi/midi_sources/hooktheory/event/t/the-cure/just-like-heaven/solo_symbol_key.json
No mode found. Assuming cmajor data/midi/midi_sources/hooktheory/event/t/tom-hanks/big-soundtrack/verse_symbol_key.json
No mode found. Assuming cmajor data/midi/midi_sources/hooktheory/event/t/the-ronettes/be-my-baby/verse_symbol_key.json
No mode found. Assuming cmajor data/midi/midi_sources/hooktheory/event/t/the-ronettes/be-my-baby/pre-chorus_symbol_key.json
No mode found. Assumin

No mode found. Assuming cmajor data/midi/midi_sources/hooktheory/event/r/r-kelly/i-believe-i-can-fly/chorus_symbol_key.json
No mode found. Assuming cmajor data/midi/midi_sources/hooktheory/event/r/rihanna/where-have-you-been/intro_symbol_key.json
No mode found. Assuming cmajor data/midi/midi_sources/hooktheory/event/r/rihanna/love-the-way-you-lie-part-2/verse_symbol_key.json
No mode found. Assuming cmajor data/midi/midi_sources/hooktheory/event/r/rihanna/love-the-way-you-lie-part-2/pre-chorus_symbol_key.json
No mode found. Assuming cmajor data/midi/midi_sources/hooktheory/event/r/rihanna/unfaithful/pre-chorus_symbol_key.json
No mode found. Assuming cmajor data/midi/midi_sources/hooktheory/event/r/rawnald-gregory-erickson-the-second/starfucker/intro_symbol_key.json
No mode found. Assuming cmajor data/midi/midi_sources/hooktheory/event/r/radiohead/no-surprises/verse-and-pre-chorus_symbol_key.json
No mode found. Assuming cmajor data/midi/midi_sources/hooktheory/event/r/radiohead/where-i-e

Could not parse stream data/midi/midi_sources/hooktheory/pianoroll/w/weezer/beverly-hills/intro-and-verse_key.mid list index out of range
Could not parse stream data/midi/midi_sources/hooktheory/pianoroll/w/wolfgang-amadeus-mozart/symphony-no-25-in-g-minor/intro-and-verse_key.mid list index out of range
Could not parse stream data/midi/midi_sources/hooktheory/pianoroll/y/ylvis/the-fox---what-does-the-fox-say/verse-and-pre-chorus_key.mid list index out of range
Could not parse stream data/midi/midi_sources/hooktheory/pianoroll/j/jay-z/so-ambitious-feat-pharrel-williams/intro_key.mid list index out of range
Could not parse stream data/midi/midi_sources/hooktheory/pianoroll/j/johnny-cash/folsom-prison-blues/verse_key.mid list index out of range
Could not parse stream data/midi/midi_sources/hooktheory/pianoroll/j/jack-johnson/do-you-remember/intro-and-verse_key.mid list index out of range
Could not parse stream data/midi/midi_sources/hooktheory/pianoroll/j/justin-moore/if-heaven-wasnt-so-f

Could not parse stream data/midi/midi_sources/hooktheory/pianoroll/t/the-waifs/london-still/verse_key.mid list index out of range
Could not parse stream data/midi/midi_sources/hooktheory/pianoroll/t/take-us-back/alela-diane/verse_key.mid list index out of range
Could not parse stream data/midi/midi_sources/hooktheory/pianoroll/t/take-us-back/alela-diane/verse-and-pre-chorus_key.mid list index out of range
Could not parse stream data/midi/midi_sources/hooktheory/pianoroll/t/the-rasmus/in-the-shadows/intro_key.mid list index out of range
Could not parse stream data/midi/midi_sources/hooktheory/pianoroll/t/tame-impala/pianorollually/verse_key.mid File not found or no such format found for: data/midi/midi_sources/hooktheory/pianoroll/t/tame-impala/pianorollually/verse_key.mid
Could not parse stream data/midi/midi_sources/hooktheory/pianoroll/t/tame-impala/pianorollually/intro_key.mid File not found or no such format found for: data/midi/midi_sources/hooktheory/pianoroll/t/tame-impala/piano

#### Save song_list

In [22]:
ht_metadata = load_json(ht_json)
arr2csv(ht_metadata.values(), ht_csv)
df = pd.read_csv(ht_csv); df.head()

Unnamed: 0,ht_bpm,inferred_key,section,ht_key,inferred_keyc,source,parts,artist,genres,ht_time_signature,midi_keyc,midi_title,song_url,ht_mode,instruments,seconds,time_signature,bpm,original_path
0,128,C major,chorus,C,C major,hooktheory,"intro,chorus",wayne-sharpe,,4,data/midi/transposed/hooktheory/pianoroll/w/wa...,yu-gi-oh3,https://www.hooktheory.com/theorytab/view/wayn...,1.0,"Piano,Piano",15.0,4/4,128.0,data/midi/hooktheory/pianoroll/w/wayne-sharpe/...
1,112,A major,bridge,E,A major,hooktheory,"verse,bridge",willie-nelson,,4,data/midi/transposed/hooktheory/pianoroll/w/wi...,On The Road Again,https://www.hooktheory.com/theorytab/view/will...,1.0,Piano,15.0,4/4,112.0,data/midi/hooktheory/pianoroll/w/willie-nelson...
2,112,E major,verse,E,E major,hooktheory,"verse,bridge",willie-nelson,,4,data/midi/transposed/hooktheory/pianoroll/w/wi...,On The Road Again,https://www.hooktheory.com/theorytab/view/will...,1.0,Piano,15.0,4/4,112.0,data/midi/hooktheory/pianoroll/w/willie-nelson...
3,58,E major,chorus,E,E major,hooktheory,chorus,willie-nelson,,4,data/midi/transposed/hooktheory/pianoroll/w/wi...,Blue Eyes Cryin,https://www.hooktheory.com/theorytab/view/will...,,"Piano,Piano",28.965517,4/4,58.0,data/midi/hooktheory/pianoroll/w/willie-nelson...
4,108,C# major,intro,Db,C# major,hooktheory,"intro,verse,chorus",wham,Holiday,4,data/midi/transposed/hooktheory/pianoroll/w/wh...,Last Christmas Intro,https://www.hooktheory.com/theorytab/view/wham...,1.0,"Piano,Piano",17.777778,4/4,108.0,data/midi/hooktheory/pianoroll/w/wham/last-chr...


## FreeMidi

In [31]:
fm_path, _, fm_csv = create_paths('freemidi')
fm_dance_path = metapath/f'freemidi_dance_metadata.json'
fm_pop_path = metapath/f'freemidi_pop_metadata.json'
list(fm_path.glob('*'))

[PosixPath('data/midi/midi_sources/freemidi/genre-disco'),
 PosixPath('data/midi/midi_sources/freemidi/genre-pop'),
 PosixPath('data/midi/midi_sources/freemidi/genre-dance-eletric'),
 PosixPath('data/midi/midi_sources/freemidi/genre-punk'),
 PosixPath('data/midi/midi_sources/freemidi/genre-hip-hop-rap'),
 PosixPath('data/midi/midi_sources/freemidi/genre-rock')]

In [32]:
def parse_freemidi_songs(fp, genre=None, source=None):
    name = fp.with_suffix('').name.split(' - ')
    artist = name[0]
    title = name[-1]
    metadata = {
        'artist': artist.strip(),
        'title': title.strip(),
        'midi': str(fp),
        'genre': genre,
        'source': source
    }
    return {
        'file_path': fp,
        'metadata': metadata
    }

In [30]:
d_parse_func = partial(parse_freemidi_songs, genre='dance', source='freemidi')
dir_path = fm_path/'genre-dance-eletric'
file_list = get_files(dir_path, extensions=['.mid'], recurse=True)
fm_dance_list = parse_midi_dir(file_list, fm_dance_path, meta_func=d_parse_func)

In [26]:
p_parse_func = partial(parse_freemidi_songs, genre='pop', source='freemidi')
dir_path = fm_path/'genre-pop'
file_list = get_files(dir_path, extensions=['.mid'], recurse=True)
fm_pop_list = parse_midi_dir(file_list, fm_pop_path, meta_func=p_parse_func)

In [27]:
fmd = load_json(fm_dance_path)
fmp = load_json(fm_pop_path)
fm_all = list(fmd.values())+list(fmp.values())
arr2csv(fm_all, fm_csv_path)
df = pd.read_csv(fm_csv_path); df.head()

EmptyDataError: No columns to parse from file

### Gather Cprato

In [41]:
cp_path, cp_json, cp_csv = create_paths('cprato')
list(cp_path.glob('*'))[:5]

[PosixPath('data/midi/midi_sources/cprato/Basto - Again And Again (midi By Carlo Prato) (www.cprato.com).mid'),
 PosixPath('data/midi/midi_sources/cprato/The Weeknd ft. Lana Del Rey - Stargirl Interlude  (midi by Carlo Prato) (www.cprato.com).mid'),
 PosixPath('data/midi/midi_sources/cprato/Two Steps From Hell - Magic of Love  (midi by Carlo Prato) (www.cprato.com).mid'),
 PosixPath('data/midi/midi_sources/cprato/Bermuda Loverz - My Girl (Ladidada) (Rimini Rockaz Radio Edit) (Midi By Carlo Prato) (www.cprato.com).mid'),
 PosixPath('data/midi/midi_sources/cprato/Cascada - Everytime We Touch (Midi By Carlo Prato) (www.cprato.com).mid')]

In [44]:
def parse_cprato_songs(fp, genre=None, source=None):
    name = fp.with_suffix('').name.split(' - ')
    artist = name[0]
    title = name[-1].replace('(midi by Carlo Prato) (www.cprato.com)', '')
    metadata = {
        'artist': artist.strip(),
        'title': title.strip(),
        'midi': str(fp),
        'genre': genre,
        'source': source
    }
    return {
        'file_path': fp,
        'metadata': metadata
    }

In [45]:
cp_meta = partial(parse_cprato_songs, genre='EDM,inferred', source='cprato')
file_list = get_files(cp_path, extensions=['.mid'], recurse=True)
cp_md = parse_midi_dir(file_list, cp_json, cp_meta)

#### CSV

In [47]:
cp = load_json(cp_json)
arr2csv(cp.values(), cp_csv)
df = pd.read_csv(cp_csv); df.head()

Unnamed: 0,inferred_key,title,genre,inferred_keyc,artist,midi,time_signature,bpm,source,instruments,seconds,midi_keyc
0,F minor,Morning Dew (Original Mix),"EDM,inferred",F minor,Nora En Pure,data/midi/midi_sources/cprato/Nora En Pure - M...,4/4,122.0,cprato,"Piano,Piano,Piano,Piano,Piano,Piano",51.147541,data/midi/midi_sources/cprato/Nora En Pure - M...
1,G major,Lost Boy,"EDM,inferred",G major,Ruth B,data/midi/midi_sources/cprato/Ruth B - Lost Bo...,4/4,124.0,cprato,"Piano,Piano,Piano,Flute,Flute,Flute,Piano,Pian...",85.16129,data/midi/midi_sources/cprato/Ruth B - Lost Bo...
2,C# minor,Sun Models,"EDM,inferred",C# minor,ODESZA feat. Madelyn Grant,data/midi/midi_sources/cprato/ODESZA feat. Mad...,4/4,120.0,cprato,,34.0,data/midi/midi_sources/cprato/ODESZA feat. Mad...
3,F minor,Remember Magnetic Nord,"EDM,inferred",F minor,BT vs. Sasha,data/midi/midi_sources/cprato/BT vs. Sasha - R...,4/4,134.0,cprato,,59.104478,data/midi/midi_sources/cprato/BT vs. Sasha - R...
4,E- minor,Back 2 U,"EDM,inferred",E- minor,Soundflower,data/midi/midi_sources/cprato/Soundflower - Ba...,,140.0,cprato,,32.571429,data/midi/midi_sources/cprato/Soundflower - Ba...


### Gather MidiWorld

In [74]:
mw_path, mw_json, mw_csv = create_paths('midiworld')
list(mw_path.glob('*'))[:5]

[PosixPath('data/midi/midi_sources/midiworld/named_midi'),
 PosixPath('data/midi/midi_sources/midiworld/unknown_midi')]

In [91]:
def parse_midiworld_songs(fp):
    name = fp.with_suffix('').name.replace('_', ' ').split(' - ')
    artist = name[0]
    title = name[-1]
    metadata = {
        'artist': artist.strip(),
        'title': title.strip(),
        'midi': str(fp),
        'genre': 'pop,inferred',
        'source': 'midiworld'
    }
    return {
        'file_path': fp,
        'metadata': metadata
    }

In [94]:
file_list = get_files(mw_path/'named_midi', extensions=['.mid'], recurse=True)
mw_md = parse_midi_dir(file_list, mw_json, parse_midiworld_songs)

















































































Could not parse stream ../data/midi/midiworld_scrape/named_midi/Bomfunk_MCs_-_Uprocking_Beats.mid badly formated midi bytes, got: b'RIFF\xb8\xa7\x00\x00RMIDdata\x04\xa7\x00\x00'
Midi Exeption: ../data/midi/midiworld_scrape/named_midi/Tyrian_Mumford_-_Untitled.mid list index out of range




Could not parse stream ../data/midi/midiworld_scrape/named_midi/Nirvana_-_Polly.mid badly formed midi string: missing leading MTrk
Could not parse stream ../data/midi/midiworld_scrape/named_midi/Nirvana_-_Pennyroyal_Tea.mid badly formed midi string: missing leading MTrk
Could not parse stream ../data/midi/midiworld_scrape/named_midi/Nirvana_-_Radio_Friendly_Unit_Shifter.mid badly formed midi string: missing leading MTrk




Midi Exeption: ../data/midi/midiworld_scrape/named_midi/National_Anthems_-_Papua_New_Guinea.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Midi Exeption: ../data/midi/midiworld_scrape/named_midi/Veruca_Salt_-_Volcana_Girls.mid list index out of range




Midi Exeption: ../data/midi/midiworld_scrape/named_midi/National_Anthems_-_Australia.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Could not parse stream ../data/midi/midiworld_scrape/named_midi/Bomfunk_MCs_-_B-Boys_And_Fly-Girls.mid badly formated midi bytes, got: b'RIFF^\xd8\x00\x00RMIDdata\xa1\xd7\x00\x00'
Could not parse stream ../data/midi/midiworld_scrape/named_midi/Nightwish_-_Feel_For_You.mid badly formated midi bytes, got: b'RIFFr\x9a\x00\x00RMIDdatae\x9a\x00\x00'


KeyboardInterrupt: 

Midi Exeption: ../data/midi/midiworld_scrape/named_midi/98_Degrees_-_The_Hardest_Thing.mid list index out of range
Midi Exeption: ../data/midi/midiworld_scrape/named_midi/Leo_Sayer_-_You_make_me_feel_like_Dancn.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Midi Exeption: ../data/midi/midiworld_scrape/named_midi/Firefall_-_Just_Remember_I_Love_you.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Midi Exeption: ../data/midi/midiworld_scrape/named_midi/National_Anthems_-_Portugal.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Midi Exeption: ../data/midi/midiworld_scrape/named_midi/National_Anthems_-_Slovenia.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Midi Exeption: ../data/midi/midiworld_scrape/named_midi/Rod_Stewart_-_Do_Ya_Think_I'm_Sexy.mid cannot get a seconds durat



Could not parse stream ../data/midi/midiworld_scrape/named_midi/un_debut_au_piano_-_my_first_composition.mid list index out of range
Midi Exeption: ../data/midi/midiworld_scrape/named_midi/Led_Zeppelin_-_Celebration_Day.mid list index out of range
Midi Exeption: ../data/midi/midiworld_scrape/named_midi/A-Teens_-_Super_Trouper.mid list index out of range
Midi Exeption: ../data/midi/midiworld_scrape/named_midi/Dave_Mathews_-_Lie_In_Our_Graves.mid list index out of range
Could not parse stream ../data/midi/midiworld_scrape/named_midi/Nirvana_-_Downer.mid badly formed midi string: missing leading MTrk
Could not parse stream ../data/midi/midiworld_scrape/named_midi/Itowngameplay_-_bonnies_song.mid badly formated midi bytes, got: b'ID3\x04\x00\x00\x00\x00\x01\x13TXXX\x00\x00\x00\x12\x00\x00'
Could not parse stream ../data/midi/midiworld_scrape/named_midi/Nirvana_-_Drain_You.mid cannot handle ticks per frame: 77
Could not parse stream ../data/midi/midiworld_scrape/named_midi/Nirvana_-_Territo



Midi Exeption: ../data/midi/midiworld_scrape/named_midi/Nine_Inch_Nails_-_Sin.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Could not parse stream ../data/midi/midiworld_scrape/named_midi/Nirvana_-_Negative_Creep.mid badly formed midi string: missing leading MTrk
Midi Exeption: ../data/midi/midiworld_scrape/named_midi/Rod_Stewart_-_Someone_Like_You.mid list index out of range
Midi Exeption: ../data/midi/midiworld_scrape/named_midi/ZZ_Top_-_Rough_Boy.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.




Could not parse stream ../data/midi/midiworld_scrape/named_midi/Deep_Purple_-_Deep_Cascade.mid badly formed midi string: missing leading MTrk
Midi Exeption: ../data/midi/midiworld_scrape/named_midi/rihanna_-_.mid list index out of range
Midi Exeption: ../data/midi/midiworld_scrape/named_midi/National_Anthems_-_Netherlands.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Could not parse stream ../data/midi/midiworld_scrape/named_midi/Nirvana_-_Frances_Farmer_Will_Have_Her_Revenge_On_Seattle.mid badly formed midi string: missing leading MTrk
Midi Exeption: ../data/midi/midiworld_scrape/named_midi/KC_and_The_Sunshine_Band_-_Shake_Your_Booty.mid list index out of range




Could not parse stream ../data/midi/midiworld_scrape/named_midi/Nightwish_-_Lagoon.mid badly formated midi bytes, got: b'RIFF(h\x00\x00RMIDdata\x1bh\x00\x00'




Could not parse stream ../data/midi/midiworld_scrape/named_midi/Nirvana_-_Love_Buzz.mid badly formed midi string: missing leading MTrk
Midi Exeption: ../data/midi/midiworld_scrape/named_midi/The_Beatles_-_I_Wanna_Be_Your_Man.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.








Midi Exeption: ../data/midi/midiworld_scrape/named_midi/National_Anthems_-_Bulgaria.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Midi Exeption: ../data/midi/midiworld_scrape/named_midi/Michael_Jackson_-_Heal_The_World.mid list index out of range
Midi Exeption: ../data/midi/midiworld_scrape/named_midi/Tal_Bachman_-_Shes_So_High.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.




Midi Exeption: ../data/midi/midiworld_scrape/named_midi/National_Anthems_-_Germany.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.




Could not parse stream ../data/midi/midiworld_scrape/named_midi/Nirvana_-_Scentless_Apprentice.mid badly formed midi string: missing leading MTrk
Midi Exeption: ../data/midi/midiworld_scrape/named_midi/Nick_Gilder_-_Hot_Child_in_the_City.mid list index out of range
Could not parse stream ../data/midi/midiworld_scrape/named_midi/Nirvana_-_Tourette's.mid badly formed midi string: missing leading MTrk
Could not parse stream ../data/midi/midiworld_scrape/named_midi/Nirvana_-_Swap_Meet.mid badly formed midi string: missing leading MTrk




Could not parse stream ../data/midi/midiworld_scrape/named_midi/Nirvana_-_Turnaround.mid badly formed midi string: missing leading MTrk
Midi Exeption: ../data/midi/midiworld_scrape/named_midi/chinami_-_Unfinished.mid list index out of range




Could not parse stream ../data/midi/midiworld_scrape/named_midi/Nirvana_-_All_Apologies.mid badly formed midi string: missing leading MTrk
Midi Exeption: ../data/midi/midiworld_scrape/named_midi/National_Anthems_-_Pakistan.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.




Could not parse stream ../data/midi/midiworld_scrape/named_midi/Nirvana_-_Smells_Like_Teen_Spirit.mid badly formed midi string: missing leading MTrk
Midi Exeption: ../data/midi/midiworld_scrape/named_midi/National_Anthems_-_Barbados.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Could not parse stream ../data/midi/midiworld_scrape/named_midi/John_Paul_Young_-_Love_is_in_the_Air.mid badly formed midi string: missing leading MTrk
Midi Exeption: ../data/midi/midiworld_scrape/named_midi/Sisqo_-_The_Thong_Song.mid list index out of range
Midi Exeption: ../data/midi/midiworld_scrape/named_midi/Paul_Simon_-_Mother_and_Child_Reunion.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Midi Exeption: ../data/midi/midiworld_scrape/named_midi/one_direction_-_Night_Changes.mid list index out of range
Midi Exeption: ../data/midi/midiworld_scrape/named_midi/Metallica_-_Until_It_Sleeps.mid cannot g



Midi Exeption: ../data/midi/midiworld_scrape/named_midi/National_Anthems_-_Argentina.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Could not parse stream ../data/midi/midiworld_scrape/named_midi/Gabi_Fratucello_-_.mid badly formated midi bytes, got: b'ID3\x04\x00\x00\x00\x00\x01\x00TXXX\x00\x00\x00\x12\x00\x00'






Midi Exeption: ../data/midi/midiworld_scrape/named_midi/National_Anthems_-_Albania.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Could not parse stream ../data/midi/midiworld_scrape/named_midi/The_Offspring_-_No_Hero.mid badly formated midi bytes, got: b'RIFF\x04N\x01\x00RMIDdata\xf7M\x01\x00'




Midi Exeption: ../data/midi/midiworld_scrape/named_midi/National_Anthems_-_Armenia.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Could not parse stream ../data/midi/midiworld_scrape/named_midi/Nirvana_-_Son_Of_A_Gun.mid badly formed midi string: missing leading MTrk
Midi Exeption: ../data/midi/midiworld_scrape/named_midi/me__-_test.mid list index out of range




Could not parse stream ../data/midi/midiworld_scrape/named_midi/Nirvana_-_Dumb.mid index out of range
Midi Exeption: ../data/midi/midiworld_scrape/named_midi/National_Anthems_-_Spain.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Could not parse stream ../data/midi/midiworld_scrape/named_midi/Nirvana_-_Mr._Moustache.mid badly formed midi string: missing leading MTrk




Could not parse stream ../data/midi/midiworld_scrape/named_midi/Nirvana_-_Lounge_Act.mid badly formed midi string: missing leading MTrk
Could not parse stream ../data/midi/midiworld_scrape/named_midi/Third_Eye_Blind_-_Semi_Charmed_Life.mid badly formated midi bytes, got: b'RIFFV\x0c\x01\x00RMIDdataI\x0c\x01\x00'
Midi Exeption: ../data/midi/midiworld_scrape/named_midi/Arun_Maitra_-_mayer-payer-jaba.mid list index out of range
Could not parse stream ../data/midi/midiworld_scrape/named_midi/Nirvana_-_On_A_Plain.mid badly formed midi string: missing leading MTrk
Could not parse stream ../data/midi/midiworld_scrape/named_midi/Nirvana_-_Heart-Shaped_Box.mid badly formed midi string: missing leading MTrk
Midi Exeption: ../data/midi/midiworld_scrape/named_midi/Nintendo_-_Mii_Channel_Song.mid list index out of range
Midi Exeption: ../data/midi/midiworld_scrape/named_midi/Garbage_-_Vow.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.




Could not parse stream ../data/midi/midiworld_scrape/named_midi/Nirvana_-_Mexican_Seafood.mid badly formed midi string: missing leading MTrk
Midi Exeption: ../data/midi/midiworld_scrape/named_midi/National_Anthems_-_Brunei.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.




Midi Exeption: ../data/midi/midiworld_scrape/named_midi/Tori_Amos_-_Smells_Like_Teen_Spirit.mid list index out of range


















































































Midi Exeption: ../data/midi/midiworld_scrape/named_midi/Robert_John_-_Sad_Eyes.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.
Could not parse stream ../data/midi/midiworld_scrape/named_midi/Nirvana_-_Been_A_Son.mid badly formed midi string: missing leading MTrk
Could not parse stream ../data/midi/midiworld_scrape/named_midi/Nirvana_-_Aneurysm.mid badly formed midi string: missing leading MTrk
Midi Exeption: ../data/midi/midiworld_scrape/named_midi/After_School_Shampoo_-_After_School_Shampoo.mid list index out of range
































































































































Could not parse stream ../data/midi/midiworld_scrape/named_midi/Nirvana_-_Dive.mid badly formed midi string: missing leading MTrk
Could not parse stream ../data/midi/midiworld_scrape/named_midi/Nirvana_-_Sliver.mid badly formed midi string: missing leading MTrk




Midi Exeption: ../data/midi/midiworld_scrape/named_midi/National_Anthems_-_Denmark.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.




Could not parse stream ../data/midi/midiworld_scrape/named_midi/Nirvana_-_Milk_It.mid badly formed midi string: missing leading MTrk
Midi Exeption: ../data/midi/midiworld_scrape/named_midi/National_Anthems_-_Japan.mid cannot get a seconds duration when no TempoIndication classes are found in or before this Stream.




In [None]:
mw = load_json(mw_json)
arr2csv(mw.values(), mw_csv)
df = pd.read_csv(mw_csv); df.head()

### Yamaha - piano

In [52]:
ec_path, ec_json, ec_csv = create_paths('ecomp')
ec_song = ec_path/'song_list.json'
list(ec_path.glob('*'))[:5]

[PosixPath('data/midi/midi_sources/ecomp/song_list.json'),
 PosixPath('data/midi/midi_sources/ecomp/2017'),
 PosixPath('data/midi/midi_sources/ecomp/2008'),
 PosixPath('data/midi/midi_sources/ecomp/2006'),
 PosixPath('data/midi/midi_sources/ecomp/2004')]

In [53]:
ec_songs = load_json(ec_path/'song_list.json')

In [57]:
def parse_ecomp_songs(fp):
    song_info = ec_songs[fp.name]
    metadata = {
        'artist': song_info['artist'],
        'title': song_info['title'],
        'midi': str(fp),
        'genre': 'classical',
        'source': 'ecomp'
    }
    return {
        'file_path': fp,
        'metadata': metadata
    }

In [59]:
file_list = get_files(ec_path, extensions=['.mid'], recurse=True)
ec_md = parse_midi_dir(file_list, ec_json, parse_ecomp_songs)

In [69]:
ec = load_json(ec_json)
arr2csv(ec.values(), ec_csv)
df = pd.read_csv(ec_csv); df.head()

Unnamed: 0,inferred_key,title,genre,inferred_keyc,artist,midi,time_signature,bpm,source,instruments,seconds,midi_keyc
0,E major,"Prelude and Fugue in E Major, WTC I, BWV 854",classical,E major,Johann Sebastian Bach,data/midi/midi_sources/ecomp/2017/MiyashitaM01...,4/4,117.0,ecomp,Piano,150.299145,data/midi/midi_sources/ecomp/2017/MiyashitaM01...
1,F# major,Etude Op. 10 No. 5 in G-flat Major,classical,F# major,Frédéric Chopin,data/midi/midi_sources/ecomp/2017/ZhangW03.MID,4/4,117.0,ecomp,Piano,98.162393,data/midi/midi_sources/ecomp/2017/ZhangW03.MID
2,C# minor,Etude Op. 10 No. 4 in C-sharp Minor,classical,C# minor,Frédéric Chopin,data/midi/midi_sources/ecomp/2017/LeeSH03.MID,4/4,117.0,ecomp,Piano,114.871795,data/midi/midi_sources/ecomp/2017/LeeSH03.MID
3,G major,"Sonata No. 16 Op. 31 No. 1 in G Major, I. A...",classical,G major,Ludwig van Beethoven,data/midi/midi_sources/ecomp/2017/BuiJL02.MID,4/4,117.0,ecomp,Piano,280.897436,data/midi/midi_sources/ecomp/2017/BuiJL02.MID
4,E major,Carmen Variations,classical,E major,G. Bizet/V. Horowitz,data/midi/midi_sources/ecomp/2017/MiyashitaM05...,4/4,117.0,ecomp,Piano,230.982906,data/midi/midi_sources/ecomp/2017/MiyashitaM05...


### Classic Piano

In [62]:
clc_path, clc_json, clc_csv = create_paths('classic_piano')
list(clc_path.glob('*'))[:5]

[PosixPath('data/midi/midi_sources/classic_piano/clementi_opus36_2_2_format0.mid'),
 PosixPath('data/midi/midi_sources/classic_piano/mz_333_2_format0.mid'),
 PosixPath('data/midi/midi_sources/classic_piano/haydn_7_1_format0.mid'),
 PosixPath('data/midi/midi_sources/classic_piano/mendel_op30_4_format0.mid'),
 PosixPath('data/midi/midi_sources/classic_piano/liz_et_trans4_format0.mid')]

In [70]:
def parse_classic_songs(fp):
    name = fp.with_suffix('').name.split('_')
    artist = name[0]
    title = ' '.join(name[1:])
    metadata = {
        'artist': artist,
        'title': title,
        'midi': str(fp),
        'genre': 'classical',
        'source': 'classical_piano'
    }
    return {
        'file_path': fp,
        'metadata': metadata
    }

In [71]:
file_list = get_files(clc_path, extensions=['.mid'], recurse=True)
clc_md = parse_midi_dir(file_list, clc_json, parse_classic_songs)

In [72]:
clc = load_json(clc_json)
arr2csv(clc.values(), clc_csv)
df = pd.read_csv(clc_csv); df.head()

Unnamed: 0,inferred_key,title,genre,inferred_keyc,artist,midi,time_signature,bpm,source,instruments,seconds,midi_keyc
0,C major,opus36 2 2 format0,classical,C major,clementi,data/midi/midi_sources/classic_piano/clementi_...,3/4,76.39,classical_piano,"Piano,Piano,Piano",66.018987,data/midi/midi_sources/classic_piano/clementi_...
1,C major,7 1 format0,classical,C major,haydn,data/midi/midi_sources/classic_piano/haydn_7_1...,2/4,88.87,classical_piano,"Piano,Piano,Piano",63.082423,data/midi/midi_sources/classic_piano/haydn_7_1...
2,E minor,im5 format0,classical,E minor,br,data/midi/midi_sources/classic_piano/br_im5_fo...,6/8,80.0,classical_piano,"Piano,Piano,Piano",171.965667,data/midi/midi_sources/classic_piano/br_im5_fo...
3,C# major,format0,classical,C# major,schumm-6,data/midi/midi_sources/classic_piano/schumm-6_...,3/4,128.98,classical_piano,"Piano,Piano",311.398207,data/midi/midi_sources/classic_piano/schumm-6_...
4,B minor,op30 4 format0,classical,B minor,mendel,data/midi/midi_sources/classic_piano/mendel_op...,3/8,102.39,classical_piano,"Piano,Piano,Piano",160.097002,data/midi/midi_sources/classic_piano/mendel_op...


### Creating CSV

In [75]:
all_csvs = [ht_csv, fm_csv, cp_csv, mw_csv, ec_csv, clc_csv]