### Create CSV from midi sources

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import glob
import os
from tqdm import tqdm
from IPython.display import Image, Audio
import traceback

In [3]:
# parallel
from functools import partial
from pathlib import Path

In [4]:
from data_sources import *
from midi_data import *

In [5]:
version = 'v4'
data_path = Path('data/midi')
version_path = data_path/version
orig_path = version_path/'midi_sources'
metapath = version_path/'metadata'
all_csv = metapath/'midi_sources.csv'
metapath.mkdir(parents=True, exist_ok=True)

In [6]:
def create_paths(dirname):
    "Standardize midi_source paths"
    dir_path = orig_path/dirname
    json_path = metapath/f'{dirname}_metadata.json'
    csv_path = metapath/f'{dirname}_metadata.csv'
    return dir_path, json_path, csv_path

In [7]:
sources = ['hooktheory', 'freemidi', 'midiworld', 'ecomp', 'cprato', 'classic_piano', 'wikifonia']

In [8]:
version_path.relative_to(data_path)

PosixPath('v4')

In [9]:
def relative_path(filepath):
    return str(Path(filepath).relative_to(version_path))

### Remove corrupted file - this causes deadlock with music21 processing

In [10]:
corrupted_files = [
    'midiworld/named_midi/NITRO_BRO_-_IT_WONT_DIE.mid',
    'ecomp/2004/MORET02.mid',
    'ecomp/2006/Mordvinov9.MID',
    'ecomp/2006/Na06.MID',
    'ecomp/2008/Cui01.MID',
    'ecomp/2008/Cui02.MID',
    'ecomp/2008/Cui03.MID',
    'ecomp/2008/Cui04.MID',
    'ecomp/2008/Cui05.MID',
    'ecomp/2008/Cui06.MID',
    'ecomp/2008/Cui07.MID',
    'ecomp/2008/Cui08.MID',
    'ecomp/2008/Tan01.MID',
    'ecomp/2008/Tan02.MID',
    'ecomp/2008/Tan03.MID',
    'ecomp/2018/KaszoS14.MID'
]
for f in corrupted_files:
    fp = orig_path/f
    if fp.exists(): fp.unlink()

### Hooktheory

In [11]:
ht_path, ht_json, ht_csv = create_paths('hooktheory')
ht_song_list = metapath/'hooktheory_key2info.json'

In [12]:
def song_key(s): return '_'.join(s.parts[-3:-1])

In [13]:
ht_song_list

PosixPath('data/midi/v4/metadata/hooktheory_key2info.json')

In [14]:
ht_key2info = load_json(ht_song_list)

if ht_key2info is None:
    song_info = list((ht_path/'xml').glob('*/*/*/*.json'))
    ht_key2info = {song_key(s):json.load(open(s, 'r')) for s in song_info}
    save_json(ht_key2info, ht_song_list)
len(ht_key2info)

11873

In [15]:
song_json = list((ht_path/'event').glob('*/*/*/*_key.json')); len(song_json) # using json instead of midi for metadata

19876

In [16]:
def get_ht_midifile(json_file):
    return str(json_file.with_suffix('.mid')).replace('event', 'pianoroll').replace('symbol_', '')

In [17]:
def get_hooktheory_attr(fp):
    song_info = ht_key2info[song_key(fp)]
    song_json = json.load(open(fp, 'r'))
    metadata = song_json['metadata']
    artist = fp.parts[-3]
    title = fp.parts[-2]
    section = fp.name.split('_')[0]
    midi_path = get_ht_midifile(fp)
    
    ht_key = metadata['key']
    ht_mode = metadata['mode']
    if ht_mode is None: ht_mode = 'major'
    ht_offset = keyc_offset(ht_key, ht_mode)
    
    # convert stream here
    return {
        'artist': artist,
        'title': title,
        'midi': relative_path(midi_path),
        'section': section,
        'parts': song_info['section'],
        'song_url': song_info['song_url'],
        'genres': song_info['genres'],
        'midi_title': metadata['title'],
        'source': 'hooktheory',
        'ht_bpm': metadata['BPM'],
        'ht_mode': metadata['mode'],
        'ht_key': metadata['key'],
        'ht_offset': ht_offset,
        'ht_time_signature': metadata['beats_in_measure']
    }

In [18]:
# sanity check
# hook_out = get_hooktheory_attr(song_json[1000]); hook_out

In [19]:
ht_metadata = parse_midi_dir(song_json, ht_json, base_path=version_path, 
                             meta_func=get_hooktheory_attr)

Midi Exeption: data/midi/v4/midi_sources/hooktheory/pianoroll/w/weezer/beverly-hills/intro-and-verse_key.mid list index out of range


Midi Exeption: data/midi/v4/midi_sources/hooktheory/pianoroll/w/wolfgang-amadeus-mozart/symphony-no-25-in-g-minor/intro-and-verse_key.mid list index out of range
Midi Exeption: data/midi/v4/midi_sources/hooktheory/pianoroll/y/ylvis/the-fox---what-does-the-fox-say/verse-and-pre-chorus_key.mid list index out of range
Midi Exeption: data/midi/v4/midi_sources/hooktheory/pianoroll/j/jay-z/so-ambitious-feat-pharrel-williams/intro_key.mid list index out of range
Midi Exeption: data/midi/v4/midi_sources/hooktheory/pianoroll/j/johnny-cash/folsom-prison-blues/verse_key.mid list index out of range
Midi Exeption: data/midi/v4/midi_sources/hooktheory/pianoroll/j/jack-johnson/do-you-remember/intro-and-verse_key.mid list index out of range
Midi Exeption: data/midi/v4/midi_sources/hooktheory/pianoroll/j/justin-moore/if-heaven-wasnt-so-far-awat/intro-and-verse_key.mid list index out of range
Midi Exeption: data/midi/v4/midi_sources/hooktheory/pianoroll/j/janis-ian/at-spianorolleen/verse_key.mid Cannot 

Midi Exeption: data/midi/v4/midi_sources/hooktheory/pianoroll/t/tame-impala/pianorollually/pre-chorus-and-chorus_key.mid Cannot find file in data/midi/v4/midi_sources/hooktheory/pianoroll/t/tame-impala/pianorollually/pre-chorus-and-chorus_key.mid
Midi Exeption: data/midi/v4/midi_sources/hooktheory/pianoroll/t/tame-impala/pianorollually/verse_key.mid Cannot find file in data/midi/v4/midi_sources/hooktheory/pianoroll/t/tame-impala/pianorollually/verse_key.mid
Midi Exeption: data/midi/v4/midi_sources/hooktheory/pianoroll/t/tame-impala/pianorollually/intro_key.mid Cannot find file in data/midi/v4/midi_sources/hooktheory/pianoroll/t/tame-impala/pianorollually/intro_key.mid
Midi Exeption: data/midi/v4/midi_sources/hooktheory/pianoroll/t/the-beatles/drive-my-car/verse_key.mid list index out of range
Midi Exeption: data/midi/v4/midi_sources/hooktheory/pianoroll/t/the-beatles/the-end/solo_key.mid list index out of range
Midi Exeption: data/midi/v4/midi_sources/hooktheory/pianoroll/k/katy-pary/f

#### Save song_list

In [22]:
ht_metadata = load_json(ht_json)
arr2csv(ht_metadata.values(), ht_csv)
df = pd.read_csv(ht_csv); df.head()

Unnamed: 0,source,midi,genres,artist,bpm,parts,instruments,time_signature,section,seconds,...,inferred_offset,quarter_length,ht_time_signature,ht_bpm,ht_offset,ht_mode,title,midi_title,inferred_key,song_url
0,hooktheory,midi_sources/hooktheory/pianoroll/w/wayne-shar...,,wayne-sharpe,128.0,"intro,chorus","Piano,Piano",4/4,chorus,15.0,...,0.0,32.0,4,128,0,1.0,yu-gi-oh-theme-song,yu-gi-oh3,C major,https://www.hooktheory.com/theorytab/view/wayn...
1,hooktheory,midi_sources/hooktheory/pianoroll/w/wayne-shar...,,wayne-sharpe,85.0,"intro,chorus",Piano,3/4,intro,25.411765,...,0.0,36.0,3,85,0,1.0,yu-gi-oh-theme-song,yu-gi-oh,C major,https://www.hooktheory.com/theorytab/view/wayn...
2,hooktheory,midi_sources/hooktheory/pianoroll/w/what-a-day...,Jazz,what-a-day,96.0,chorus,"Piano,Piano",4/4,chorus,10.0,...,0.0,16.0,4,96,-5,6.0,kiefer,kiefer,A minor,https://www.hooktheory.com/theorytab/view/what...
3,hooktheory,midi_sources/hooktheory/pianoroll/w/weebl/donk...,,weebl,140.0,"intro,verse",Piano,4/4,intro,13.714286,...,6.0,32.0,4,140,1,1.0,donkeys,Donkeys Intro,F# major,https://www.hooktheory.com/theorytab/view/weeb...
4,hooktheory,midi_sources/hooktheory/pianoroll/w/wolfgang-g...,,wolfgang-gartner,240.0,intro,"Piano,Piano",4/4,intro,8.0,...,-2.0,32.0,4,240,-2,6.0,undertaker,undertaker,B minor,https://www.hooktheory.com/theorytab/view/wolf...


In [23]:
df.shape

(19876, 21)

## FreeMidi

In [25]:
fm_path, _, fm_csv = create_paths('freemidi')
fm_dance_path = metapath/f'freemidi_dance_metadata.json'
fm_pop_path = metapath/f'freemidi_pop_metadata.json'
list(fm_path.glob('*'))

[PosixPath('data/midi/v4/midi_sources/freemidi/genre-disco'),
 PosixPath('data/midi/v4/midi_sources/freemidi/genre-pop'),
 PosixPath('data/midi/v4/midi_sources/freemidi/genre-dance-eletric'),
 PosixPath('data/midi/v4/midi_sources/freemidi/genre-punk'),
 PosixPath('data/midi/v4/midi_sources/freemidi/genre-hip-hop-rap'),
 PosixPath('data/midi/v4/midi_sources/freemidi/genre-rock')]

In [26]:
def parse_freemidi_songs(fp, genres=None, source=None):
    name = fp.with_suffix('').name.split(' - ')
    artist = name[0]
    title = name[-1]
    return {
        'artist': artist.strip(),
        'title': title.strip(),
        'midi': relative_path(fp),
        'genres': genres,
        'source': source
    }

In [28]:
d_parse_func = partial(parse_freemidi_songs, genres='dance', source='freemidi')
dir_path = fm_path/'genre-dance-eletric'
file_list = get_files(dir_path, extensions=['.mid'], recurse=True)
fm_dance_list = parse_midi_dir(file_list, fm_dance_path,
                               base_path=version_path, meta_func=d_parse_func)

Midi Exeption: data/midi/v4/midi_sources/freemidi/genre-dance-eletric/Gloria Estefan - I'm Not Givin' You Up.mid badly formated midi bytes, got: b'RIFFB\x8c\x00\x00RMIDdata~\x8b\x00\x00'
Midi Exeption: data/midi/v4/midi_sources/freemidi/genre-dance-eletric/Atomic Kitten - Whole Again.mid badly formated midi bytes, got: b'RIFF\x08K\x00\x00RMIDdata{J\x00\x00'
Midi Exeption: data/midi/v4/midi_sources/freemidi/genre-dance-eletric/Apollo 440 - Lost In Space.mid index out of range
Midi Exeption: data/midi/v4/midi_sources/freemidi/genre-dance-eletric/Tatu - Ya Tvoy Vrag (I'm Your Enemy).mid badly formated midi bytes, got: b'RIFF,\xa3\x00\x00RMIDdata\xc6\xa2\x00\x00'
Midi Exeption: data/midi/v4/midi_sources/freemidi/genre-dance-eletric/Gloria Estefan - Get On Your Feet.mid badly formated midi bytes, got: b'RIFF\x92\xa8\x01\x00RMIDdata\xd8\xa7\x01\x00'
Midi Exeption: data/midi/v4/midi_sources/freemidi/genre-dance-eletric/Gloria Estefan - Mi Tierra.mid badly formated midi bytes, got: b'RIFFz\r\x

In [29]:
p_parse_func = partial(parse_freemidi_songs, genres='pop', source='freemidi')
dir_path = fm_path/'genre-pop'
file_list = get_files(dir_path, extensions=['.mid'], recurse=True)
fm_pop_list = parse_midi_dir(file_list, fm_pop_path,
                             base_path=version_path, meta_func=p_parse_func)

Midi Exeption: data/midi/v4/midi_sources/freemidi/genre-pop/Cyndi Lauper - Whats Going On.mid badly formated midi bytes, got: b'0&\xb2u\x8ef\xcf\x11\xa6\xd9\x00\xaa\x00b\xcel?\x13\x00\x00'
Midi Exeption: data/midi/v4/midi_sources/freemidi/genre-pop/Cyndi Lauper - Who Let In The Rain.mid badly formated midi bytes, got: b'0&\xb2u\x8ef\xcf\x11\xa6\xd9\x00\xaa\x00b\xcel?\x13\x00\x00'
Midi Exeption: data/midi/v4/midi_sources/freemidi/genre-pop/5th Dimension - One Less Bell To Answere.mid badly formed midi string: missing leading MTrk
Midi Exeption: data/midi/v4/midi_sources/freemidi/genre-pop/Cyndi Lauper - The World Is Stone.mid badly formated midi bytes, got: b'0&\xb2u\x8ef\xcf\x11\xa6\xd9\x00\xaa\x00b\xcel?\x13\x00\x00'
Midi Exeption: data/midi/v4/midi_sources/freemidi/genre-pop/Janet Jackson - Escapade.mid badly formated midi bytes, got: b'error with file'
Midi Exeption: data/midi/v4/midi_sources/freemidi/genre-pop/Cyndi Lauper - Change Of Heart.mid badly formated midi bytes, got: b'0&\

Midi Exeption: data/midi/v4/midi_sources/freemidi/genre-pop/Aqua - Turn Back Time.mid index out of range
Midi Exeption: data/midi/v4/midi_sources/freemidi/genre-pop/Modern Talking - Megamix 2000.mid badly formated midi bytes, got: b'RIFFp\x91\x02\x00RMIDdata\xd1\x90\x02\x00'
Midi Exeption: data/midi/v4/midi_sources/freemidi/genre-pop/Donna Summer - Bad Girls.mid badly formed midi string: missing leading MTrk
Midi Exeption: data/midi/v4/midi_sources/freemidi/genre-pop/Britney Spears - I Wanna Go.mid index out of range
Midi Exeption: data/midi/v4/midi_sources/freemidi/genre-pop/Gloria Estefan - Abriendo Puertas.mid badly formated midi bytes, got: b'error with file'
Midi Exeption: data/midi/v4/midi_sources/freemidi/genre-pop/Four Seasons - I've Got You Under My Skin.mid badly formed midi string: missing leading MTrk
Midi Exeption: data/midi/v4/midi_sources/freemidi/genre-pop/Wings - Band On The Run.mid badly formed midi string: missing leading MTrk
Midi Exeption: data/midi/v4/midi_sources

In [30]:
fmd = load_json(fm_dance_path)
fmp = load_json(fm_pop_path)
fm_all = list(fmd.values())+list(fmp.values())
arr2csv(fm_all, fm_csv)
df = pd.read_csv(fm_csv); df.head()

Unnamed: 0,inferred_key,instruments,time_signature,source,seconds,genres,midi,title,artist,bpm,quarter_length,inferred_offset
0,,,,freemidi,,dance,midi_sources/freemidi/genre-dance-eletric/Glor...,I'm Not Givin' You Up,Gloria Estefan,,,
1,G minor,"Piano,Piano",4/4,freemidi,85.263158,dance,midi_sources/freemidi/genre-dance-eletric/Radi...,Fitter Happier,Radiohead,76.0,108.0,2.0
2,F major,,,freemidi,28.732394,dance,midi_sources/freemidi/genre-dance-eletric/Tune...,Bounce,Tune Up,142.0,68.0,-5.0
3,C# minor,"StringInstrument,StringInstrument,StringInstru...",4/4,freemidi,40.0,dance,midi_sources/freemidi/genre-dance-eletric/Daft...,The Grid,Daft Punk,102.0,68.0,-4.0
4,A major,"Flute,Flute,Flute,Flute",4/4,freemidi,,dance,midi_sources/freemidi/genre-dance-eletric/Bjor...,Glora,Bjork,65.0,613/6,3.0


### Gather Cprato

In [31]:
cp_path, cp_json, cp_csv = create_paths('cprato')
list(cp_path.glob('*'))[:5]

[PosixPath('data/midi/v4/midi_sources/cprato/Basto - Again And Again (midi By Carlo Prato) (www.cprato.com).mid'),
 PosixPath('data/midi/v4/midi_sources/cprato/The Weeknd ft. Lana Del Rey - Stargirl Interlude  (midi by Carlo Prato) (www.cprato.com).mid'),
 PosixPath('data/midi/v4/midi_sources/cprato/Two Steps From Hell - Magic of Love  (midi by Carlo Prato) (www.cprato.com).mid'),
 PosixPath('data/midi/v4/midi_sources/cprato/Bermuda Loverz - My Girl (Ladidada) (Rimini Rockaz Radio Edit) (Midi By Carlo Prato) (www.cprato.com).mid'),
 PosixPath('data/midi/v4/midi_sources/cprato/Cascada - Everytime We Touch (Midi By Carlo Prato) (www.cprato.com).mid')]

In [32]:
def parse_cprato_songs(fp, genres=None, source=None):
    name = fp.with_suffix('').name.split(' - ')
    artist = name[0]
    title = name[-1].replace('(midi by Carlo Prato) (www.cprato.com)', '')
    return {
        'artist': artist.strip(),
        'title': title.strip(),
        'midi': relative_path(fp),
        'genres': genres,
        'source': source
    }

In [33]:
cp_meta = partial(parse_cprato_songs, genres='EDM,inferred', source='cprato')
file_list = get_files(cp_path, extensions=['.mid'], recurse=True)
cp_md = parse_midi_dir(file_list, cp_json, 
                       base_path=version_path, meta_func=cp_meta)

In [34]:
cp = load_json(cp_json)
arr2csv(cp.values(), cp_csv)
df = pd.read_csv(cp_csv); df.head()

Unnamed: 0,inferred_key,instruments,time_signature,source,seconds,genres,midi,title,artist,bpm,quarter_length,inferred_offset
0,F minor,"Piano,Piano,Piano,Piano,Piano,Piano",4/4,cprato,51.147541,"EDM,inferred",midi_sources/cprato/Nora En Pure - Morning Dew...,Morning Dew (Original Mix),Nora En Pure,122.0,104.0,4
1,B- minor,"Fretless Bass,Fretless Bass,Fretless Bass,Pian...",4/4,cprato,35.478261,"EDM,inferred",midi_sources/cprato/deadmau5 - Cat Thruster (...,Cat Thruster,deadmau5,115.0,68.0,-1
2,F major,,,cprato,29.142857,"EDM,inferred",midi_sources/cprato/Basshunter - So Near So Cl...,So Near So Close (Midi By Carlo Prato) (www.cp...,Basshunter,140.0,68.0,-5
3,F major,,,cprato,29.142857,"EDM,inferred",midi_sources/cprato/The Hitmen - Bass Up (mid...,Bass Up,The Hitmen,140.0,68.0,-5
4,A- major,,,cprato,30.857143,"EDM,inferred",midi_sources/cprato/Cascada - Everytime We Tou...,Everytime We Touch (Midi By Carlo Prato) (www....,Cascada,140.0,72.0,4


### Gather MidiWorld

In [35]:
mw_path, mw_json, mw_csv = create_paths('midiworld')

In [36]:
def parse_midiworld_songs(fp):
    name = fp.with_suffix('').name.replace('_', ' ').split(' - ')
    artist = name[0]
    title = name[-1]
    return {
        'artist': artist.strip(),
        'title': title.strip(),
        'midi': relative_path(fp),
        'genres': 'pop,inferred',
        'source': 'midiworld'
    }

In [37]:
file_list = get_files(mw_path/'named_midi', extensions=['.mid'], recurse=True)
mw_md = parse_midi_dir(file_list, mw_json, base_path=version_path, meta_func=parse_midiworld_songs)

Midi Exeption: data/midi/v4/midi_sources/midiworld/named_midi/Bomfunk_MCs_-_Uprocking_Beats.mid badly formated midi bytes, got: b'RIFF\xb8\xa7\x00\x00RMIDdata\x04\xa7\x00\x00'
Midi Exeption: data/midi/v4/midi_sources/midiworld/named_midi/Nirvana_-_Polly.mid badly formed midi string: missing leading MTrk
Midi Exeption: data/midi/v4/midi_sources/midiworld/named_midi/Nirvana_-_Pennyroyal_Tea.mid badly formed midi string: missing leading MTrk
Midi Exeption: data/midi/v4/midi_sources/midiworld/named_midi/Nirvana_-_Radio_Friendly_Unit_Shifter.mid badly formed midi string: missing leading MTrk
Midi Exeption: data/midi/v4/midi_sources/midiworld/named_midi/Bomfunk_MCs_-_B-Boys_And_Fly-Girls.mid badly formated midi bytes, got: b'RIFF^\xd8\x00\x00RMIDdata\xa1\xd7\x00\x00'
Midi Exeption: data/midi/v4/midi_sources/midiworld/named_midi/Nightwish_-_Feel_For_You.mid badly formated midi bytes, got: b'RIFFr\x9a\x00\x00RMIDdatae\x9a\x00\x00'
Midi Exeption: data/midi/v4/midi_sources/midiworld/named_midi/B

In [38]:
mw = load_json(mw_json)
arr2csv(mw.values(), mw_csv)
df = pd.read_csv(mw_csv); df.head()

Unnamed: 0,inferred_key,instruments,time_signature,source,seconds,genres,midi,title,artist,bpm,quarter_length,inferred_offset
0,B minor,Piano,4/4,midiworld,1.193182,"pop,inferred",midi_sources/midiworld/named_midi/Kona_-_Druml...,Drumloop,Kona,176.0,3.5,-2.0
1,D minor,Piano,4/4,midiworld,2.526316,"pop,inferred",midi_sources/midiworld/named_midi/Cameron_Lee_...,Cue,Cameron Lee Simpson,95.0,4.0,-5.0
2,C major,"Marimba,Acoustic Bass,Trombone,StringInstrumen...",4/4,midiworld,19.25,"pop,inferred",midi_sources/midiworld/named_midi/TV_Themes_-_...,Looney Tunes,TV Themes,160.0,154/3,0.0
3,A major,"Timpani,Taiko,Violin,Contrabass,Voice",4/4,midiworld,41.929134,"pop,inferred",midi_sources/midiworld/named_midi/TV_Themes_-_...,Millenium,TV Themes,127.0,88.75,3.0
4,C# minor,Piano,2/4,midiworld,67.916667,"pop,inferred",midi_sources/midiworld/named_midi/Kaito_-_Cant...,Cantarella,Kaito,144.0,163.0,-4.0


### Gather Wikifonia

In [67]:
wf_path, wf_json, wf_csv = create_paths('wikifonia')

In [71]:
def parse_wikifonia_songs(fp):
    name = fp.with_suffix('').name.replace('_', ' ').split(' - ')
    artist = name[0]
    title = name[-1]
    return {
        'artist': artist.strip(),
        'title': title.strip(),
        'mxl': relative_path(fp),
        'genres': 'pop,inferred',
        'source': 'wikifonia'
    }

In [74]:
# Warning: if you get a deadlock: 
# PosixPath('data/midi/midi_sources/midiworld/named_midi/NITRO_BRO_-_IT_WONT_DIE.mid') is broken
file_list = get_files(wf_path, extensions=['.mxl'], recurse=True)
wf_md = parse_midi_dir(file_list, wf_json, base_path=version_path, meta_func=parse_wikifonia_songs)

Midi Exeption: data/midi/v4/midi_sources/wikifonia/Django Reinhardt - Douce Ambiance.mxl local variable 'lyricLanguage' referenced before assignment
Midi Exeption: data/midi/v4/midi_sources/wikifonia/Charlie Parker - Scrapple From The Apple.mxl Degree not in specified chord: 9


Midi Exeption: data/midi/v4/midi_sources/wikifonia/blah3 - blah.mxl failed to get likely keys for Stream component
Midi Exeption: data/midi/v4/midi_sources/wikifonia/Bach ? - Four Part Choralforwikifoniachannel04quant.mxl local variable 'lyricLanguage' referenced before assignment
Midi Exeption: data/midi/v4/midi_sources/wikifonia/Monty Python - Always look on the bright side of kive.mxl got a negative delta time
Midi Exeption: data/midi/v4/midi_sources/wikifonia/Fraser Smith, Adele Adkins - Set Fire To The Rain.mxl float division by zero
Midi Exeption: data/midi/v4/midi_sources/wikifonia/Spencer, James F. Hanley - Mary Is Talking.mxl failed to get likely keys for Stream component
Midi Exeption: data/midi/v4/midi_sources/wikifonia/Maurice Williams - Stay.mxl unsupported operand type(s) for /: 'int' and 'NoneType'
Midi Exeption: data/midi/v4/midi_sources/wikifonia/Gigi Gryce, Jon Hendricks - Social Call.mxl unsupported operand type(s) for /: 'int' and 'NoneType'
Midi Exeption: data/midi

In [75]:
wf = load_json(wf_json)
arr2csv(wf.values(), wf_csv)
df = pd.read_csv(wf_csv); df.head()

Unnamed: 0,inferred_key,instruments,time_signature,source,seconds,genres,title,artist,bpm,quarter_length,inferred_offset,midi,mxl
0,C major,Voice,4/4,wikifonia,,"pop,inferred",Love Is A Simple Thing,"Arthur Siegel, June Carroll",,76.0,0.0,"midi_sources/from_mxl/wikifonia/Arthur Siegel,...","midi_sources/wikifonia/Arthur Siegel, June Car..."
1,C minor,,2/2,wikifonia,,"pop,inferred",Broadway Melody,"Nacio Herb Brown, Arthur Freed",,132.0,-3.0,midi_sources/from_mxl/wikifonia/Nacio Herb Bro...,"midi_sources/wikifonia/Nacio Herb Brown, Arthu..."
2,A minor,,4/4,wikifonia,,"pop,inferred",O Sacred Head Now Wounded,"Hans Leo Hassler, From the Latin",,64.0,0.0,midi_sources/from_mxl/wikifonia/Hans Leo Hassl...,"midi_sources/wikifonia/Hans Leo Hassler, From ..."
3,C major,,4/4,wikifonia,,"pop,inferred",The Rose,Amanda McBroom,,148.0,0.0,midi_sources/from_mxl/wikifonia/Amanda McBroom...,midi_sources/wikifonia/Amanda McBroom - The Ro...
4,C major,Choir Aahs,4/4,wikifonia,,"pop,inferred",Sunny Side Of The Street,Jimmy McHugh,,129.0,0.0,midi_sources/from_mxl/wikifonia/Jimmy McHugh -...,midi_sources/wikifonia/Jimmy McHugh - Sunny Si...


### Yamaha - piano

In [76]:
ec_path, ec_json, ec_csv = create_paths('ecomp')
ec_song = ec_path/'song_list.json'
list(ec_path.glob('*'))[:5]

[PosixPath('data/midi/v4/midi_sources/ecomp/song_list.json'),
 PosixPath('data/midi/v4/midi_sources/ecomp/2017'),
 PosixPath('data/midi/v4/midi_sources/ecomp/2008'),
 PosixPath('data/midi/v4/midi_sources/ecomp/2006'),
 PosixPath('data/midi/v4/midi_sources/ecomp/2004')]

In [77]:
ec_songs = load_json(ec_path/'song_list.json')

In [54]:
def parse_ecomp_songs(fp):
    song_info = ec_songs[fp.name]
    fp = fp.with_suffix('.mxl')
    return {
        'artist': song_info['artist'],
        'title': song_info['title'],
        'mxl': relative_path(fp),
        'genres': 'classical',
        'source': 'ecomp'
    }

In [55]:
file_list = get_files(ec_path, extensions=['.mid'], recurse=True)

In [56]:
sorted_files = sorted([fp.with_suffix('.mxl').stat().st_size/1000 for fp in file_list])[-200:]

In [None]:
file_list = [fp for fp in file_list if fp.with_suffix('.mxl').stat().st_size/1000 < 430] # over 200mb takes crazy long to analyze
ec_md = parse_midi_dir(file_list, ec_json, base_path=version_path, meta_func=parse_ecomp_songs)

In [78]:
def reformat_json(out_path):
    f2m = load_json(out_path)
    m = {relative_path(k):v for k,v in f2m.items()}
    save_json(m, out_path)
    return m

In [87]:
f2m = load_json(ec_json)

In [88]:
f2m

{'data/midi/midi_sources_fromxml_v3/ecomp/2017/SunY06.mid': {'artist': '\xa0',
  'title': 'II. Allegro con moto',
  'mxl': 'data/midi/midi_sources_v3/ecomp/2017/SunY06.mxl',
  'genres': 'classical',
  'source': 'ecomp',
  'instruments': ['Piano', 'Piano'],
  'bpm': 115.0,
  'inferred_key': 'A major',
  'seconds': 123.13043478260869,
  'quarter_length': '236.0',
  'time_signature': '4/4',
  'inferred_offset': 3,
  'midi': 'data/midi/midi_sources_fromxml_v3/ecomp/2017/SunY06.mid'},
 'data/midi/midi_sources_fromxml_v3/ecomp/2017/TuanS10.mid': {'artist': '\xa0',
  'title': 'VI. Allegro (Fuga)',
  'mxl': 'data/midi/midi_sources_v3/ecomp/2017/TuanS10.mxl',
  'genres': 'classical',
  'source': 'ecomp',
  'instruments': ['Piano', 'Piano'],
  'bpm': 200.0,
  'inferred_key': 'F major',
  'seconds': 128.4,
  'quarter_length': '428.0',
  'time_signature': '4/4',
  'inferred_offset': -5,
  'midi': 'data/midi/midi_sources_fromxml_v3/ecomp/2017/TuanS10.mid'},
 'data/midi/midi_sources_fromxml_v3/ecomp

In [84]:
def reformat_key(k):
    return k.replace('data/midi/midi_sources_fromxml_v3','midi_sources/from_mxl')

In [85]:
def reformat_data(d):
    if 'midi' in d: d['midi'] = reformat_key(d['midi'])
    if 'mxl' in d: d['mxl'] = d['mxl'].replace('data/midi/midi_sources_v3', 'midi_sources')

In [89]:
m = {reformat_data(v)['mxl']:reformat_data(v) for k,v in f2m.items()}

TypeError: 'NoneType' object is not subscriptable

In [60]:
ec

{'data/midi/midi_sources_fromxml_v3/ecomp/2017/SunY06.mid': {'artist': '\xa0',
  'title': 'II. Allegro con moto',
  'mxl': 'data/midi/midi_sources_v3/ecomp/2017/SunY06.mxl',
  'genres': 'classical',
  'source': 'ecomp',
  'instruments': ['Piano', 'Piano'],
  'bpm': 115.0,
  'inferred_key': 'A major',
  'seconds': 123.13043478260869,
  'quarter_length': '236.0',
  'time_signature': '4/4',
  'inferred_offset': 3,
  'midi': 'data/midi/midi_sources_fromxml_v3/ecomp/2017/SunY06.mid'},
 'data/midi/midi_sources_fromxml_v3/ecomp/2017/TuanS10.mid': {'artist': '\xa0',
  'title': 'VI. Allegro (Fuga)',
  'mxl': 'data/midi/midi_sources_v3/ecomp/2017/TuanS10.mxl',
  'genres': 'classical',
  'source': 'ecomp',
  'instruments': ['Piano', 'Piano'],
  'bpm': 200.0,
  'inferred_key': 'F major',
  'seconds': 128.4,
  'quarter_length': '428.0',
  'time_signature': '4/4',
  'inferred_offset': -5,
  'midi': 'data/midi/midi_sources_fromxml_v3/ecomp/2017/TuanS10.mid'},
 'data/midi/midi_sources_fromxml_v3/ecomp

In [None]:
ec = load_json(ec_json)
arr2csv(ec.values(), ec_csv)
df = pd.read_csv(ec_csv); df.head()

### Classic Piano

In [None]:
clc_path, clc_json, clc_csv = create_paths('classic_piano')
list(clc_path.glob('*'))[:5]

In [None]:
def parse_classic_songs(fp):
    name = fp.with_suffix('').name.split('_')
    artist = name[0]
    title = ' '.join(name[1:])
    return {
        'artist': artist,
        'title': title,
        'mxl': relative_path(fp),
        'genres': 'classical',
        'source': 'classical_piano'
    }

In [None]:
file_list = get_files(clc_path, extensions=['.mxl'], recurse=True)

In [None]:
file_list = [fp for fp in file_list if fp.stat().st_size/1000 < 350] # over 200mb takes crazy long to analyze

In [None]:
clc_md = parse_midi_dir(file_list, clc_json, base_path=version_path, meta_func=parse_classic_songs)

In [None]:
clc = load_json(clc_json)
arr2csv(clc.values(), clc_csv)
df = pd.read_csv(clc_csv); df.head()

### Creating CSV

In [9]:
all_csvs = [create_paths(s)[-1] for s in sources]
all_dfs = [pd.read_csv(csv) for csv in all_csvs]

In [10]:
merged_df = pd.concat(all_dfs, sort=False)
merged_df = merged_df.reset_index(drop=True); merged_df

Unnamed: 0,source,title,midi_title,inferred_offset,inferred_key,ht_offset,ht_mode,artist,seconds,bpm,section,ht_key,instruments,time_signature,ht_time_signature,ht_bpm,song_url,midi,parts,genres
0,hooktheory,yu-gi-oh-theme-song,yu-gi-oh3,0.0,C major,0.0,1.0,wayne-sharpe,15.000000,128.00,chorus,C,"Piano,Piano",4/4,4.0,128.0,https://www.hooktheory.com/theorytab/view/wayn...,data/midi/midi_sources/hooktheory/pianoroll/w/...,"intro,chorus",
1,hooktheory,beverly-hills,My New Song,,,0.0,,weezer,,,intro-and-verse,C,,,4.0,128.0,https://www.hooktheory.com/theorytab/view/weez...,data/midi/midi_sources/hooktheory/pianoroll/w/...,intro-and-verse,
2,hooktheory,falling-for-you,falling for you intro,-3.0,E- major,-3.0,1.0,weezer,11.111111,108.00,intro,Eb,"Piano,Piano",4/4,4.0,108.0,https://www.hooktheory.com/theorytab/view/weez...,data/midi/midi_sources/hooktheory/pianoroll/w/...,"intro,verse,chorus,solo",
3,hooktheory,buddy-holly,Buddy Holly,4.0,A- major,4.0,1.0,weezer,43.388430,121.00,solo,Ab,"Piano,Piano",4/4,4.0,121.0,https://www.hooktheory.com/theorytab/view/weez...,data/midi/midi_sources/hooktheory/pianoroll/w/...,"verse,pre-chorus,chorus,bridge,solo","Pop,Rock"
4,hooktheory,dreams-of-grandeur,dreams of grandeur,-2.0,B minor,3.0,1.0,wavves,21.333333,180.00,verse,A,"Piano,Piano",4/4,4.0,180.0,https://www.hooktheory.com/theorytab/view/wavv...,data/midi/midi_sources/hooktheory/pianoroll/w/...,verse,
5,hooktheory,la-girlz,LA Girlz,4.0,A- major,4.0,1.0,weezer,12.203390,118.00,chorus,Ab,"Piano,Piano",3/4,3.0,118.0,https://www.hooktheory.com/theorytab/view/weez...,data/midi/midi_sources/hooktheory/pianoroll/w/...,"verse,chorus",
6,hooktheory,the-freaking-fcc,Freakin FCC Bridge,0.0,C major,0.0,1.0,walter-murphy,17.142857,126.00,bridge,C,"Piano,Piano",4/4,4.0,126.0,https://www.hooktheory.com/theorytab/view/walt...,data/midi/midi_sources/hooktheory/pianoroll/w/...,"intro-and-verse,bridge",Soundtrack
7,hooktheory,west-wing-suite,snuffy,5.0,G major,5.0,1.0,wg-snuffy-walden,39.069767,86.00,instrumental,G,"Piano,Piano",4/4,4.0,86.0,https://www.hooktheory.com/theorytab/view/wg-s...,data/midi/midi_sources/hooktheory/pianoroll/w/...,instrumental,
8,hooktheory,family-guy-theme-song,Family Guy - Theme Song,-5.0,F major,-5.0,1.0,walter-murphy,32.578125,128.00,verse,F,"Piano,Piano",4/4,4.0,128.0,https://www.hooktheory.com/theorytab/view/walt...,data/midi/midi_sources/hooktheory/pianoroll/w/...,verse,
9,hooktheory,last-christmas,Last Christmas Verse,-1.0,B- minor,-1.0,1.0,wham,35.555556,108.00,verse,Db,"Piano,Piano",4/4,4.0,108.0,https://www.hooktheory.com/theorytab/view/wham...,data/midi/midi_sources/hooktheory/pianoroll/w/...,"intro,verse,chorus",Holiday


In [11]:
[df.shape for df in all_dfs], merged_df.shape

([(19876, 20), (5797, 11), (4715, 11), (2715, 11), (314, 11), (329, 11)],
 (33746, 20))

In [12]:
merged_df.to_csv(all_csv, index=False)