In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import os
os.chdir('../../../')

In [3]:
from musicautobot.utils.file_processing import *
from musicautobot.utils.midifile import *
from musicautobot.numpy_encode import *

In [4]:
import traceback
import time

## Standardize and reformat raw midi files before encoding to text
- Transform key to C major
- Remove unused instruments
- Combine multiple tracks with the same instrument into a single part
- Melody, Piano, String

### Load midi data

In [5]:
version = 'v20'
data_path = Path('data/midi')
version_path = data_path/version

In [6]:
import pandas as pd

In [7]:
# sf_path = f'sf{SAMPLE_FREQ}'
# out_dir = Path(f'{sf_path}/midi_encode')

duet_only = True
out_dir = Path('piano_duet') if duet_only else Path('midi_encode')

In [8]:
source_dir = 'midi_sources'
source_csv = version_path/'metadata'/f'{source_dir}.csv'
out_csv = version_path/out_dir/f'{out_dir.name}.csv'
out_csv.parent.mkdir(parents=True, exist_ok=True)
source_csv, out_csv

(PosixPath('data/midi/v20/metadata/midi_sources.csv'),
 PosixPath('data/midi/v20/piano_duet/piano_duet.csv'))

In [9]:
# num_comps = 2 # note, duration
cutoff = 5 # max instruments
min_variation = 3 # minimum number of different midi notes played
# max_dur = 128

### Encoding midi to numpy

In [10]:
df = pd.read_csv(source_csv); df.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,title,midi,source,parts,mxl,ht_time_signature,midi_title,ht_mode,md5,genres,ht_bpm,ht_key,artist,song_url,section
0,yu-gi-oh-theme-song,midi_sources/hooktheory/pianoroll/w/wayne-shar...,hooktheory,"intro,chorus",,4.0,yu-gi-oh3,1.0,bf1f29e5ff84e3e93e37fb873bfb590e,,128.0,C,wayne-sharpe,https://www.hooktheory.com/theorytab/view/wayn...,chorus
1,yu-gi-oh-theme-song,midi_sources/hooktheory/pianoroll/w/wayne-shar...,hooktheory,"intro,chorus",,3.0,yu-gi-oh,1.0,055f80ad67f64edb14a85ca8fbfe8c29,,85.0,C,wayne-sharpe,https://www.hooktheory.com/theorytab/view/wayn...,intro
2,kiefer,midi_sources/hooktheory/pianoroll/w/what-a-day...,hooktheory,chorus,,4.0,kiefer,6.0,197f96f5d181f6ce1e2c5ab04ac1ff87,Jazz,96.0,D,what-a-day,https://www.hooktheory.com/theorytab/view/what...,chorus
3,senbonzakura,midi_sources/hooktheory/pianoroll/w/whiteflame...,hooktheory,"verse,pre-chorus,chorus",,4.0,senbonzakura - pre-Pre-Chorus,6.0,9e7ce13a35f1314423a9a6d5a5287a4a,"J-Pop,Pop",152.0,D,whiteflame,https://www.hooktheory.com/theorytab/view/whit...,pre-chorus
4,senbonzakura,midi_sources/hooktheory/pianoroll/w/whiteflame...,hooktheory,"verse,pre-chorus,chorus",,4.0,Senbonzakura,6.0,d5aaf79d0989222f1362f9f46c540a27,"J-Pop,Pop",152.0,D,whiteflame,https://www.hooktheory.com/theorytab/view/whit...,verse


In [11]:
all_records = df.to_dict(orient='records'); len(all_records)

198757

In [12]:
def process_metadata(metadata):
    result = metadata.copy()
    
    # Part 1. Compress tracks/instruments
    if not isinstance(metadata.get('midi'), str): return None
    
    input_path = version_path/metadata['midi']
    extension = input_path.suffix.lower()
    if not input_path.exists(): 
        print('Input path does not exist:', input_path, metadata)
        return result
    
    # Get outfile and check if it exists
    out_file = Path(str(input_path).replace(f'/{source_dir}/', f'/{out_dir}/'))
    out_file = out_file.with_suffix('.npy')
    out_file.parent.mkdir(parents=True, exist_ok=True)
    if out_file.exists(): 
        result['numpy'] = str(out_file.relative_to(version_path))
        return result
    
    npenc = transform_midi(input_path)
    if npenc is None: return result
    np.save(out_file, npenc)
    result['numpy'] = str(out_file.relative_to(version_path))
    return result

In [13]:
def transform_midi(midi_file):
    input_path = midi_file
    
    # Part 1: Filter out midi tracks (drums, repetitive instruments, etc.)
    try: 
        if duet_only and num_piano_tracks(input_path) not in [1, 2]: return None
        input_file = compress_midi_file(input_path, min_variation=min_variation, cutoff=cutoff) # remove non note tracks and standardize instruments
        
        if input_file is None: return None
    except Exception as e:
        if 'badly form' in str(e): return None # ignore badly formatted midi errors
        if 'out of range' in str(e): return None # ignore badly formatted midi errors
        print('Error parsing midi', input_path, e)
        return None
        
    # Part 2. Compress rests and long notes
    stream = file2stream(input_file) # 1.
    try:
        chordarr = stream2chordarr(stream) # 2. max_dur = quarter_len * sample_freq (4). 128 = 8 bars
    except Exception as e:
        print('Could not encode to chordarr:', input_path, e)
        print(traceback.format_exc())
        return None
    
    # Part 3. Compress song rests - Don't want songs with really long pauses 
    # (this happens because we filter out midi tracks).
    chord_trim = trim_chordarr_rests(chordarr)
    chord_short = shorten_chordarr_rests(chord_trim)
    delta_trim = chord_trim.shape[0] - chord_short.shape[0]
#     if delta_trim > 500: 
#         print(f'Removed {delta_trim} rests from {input_path}. Skipping song')
#         return None
    chordarr = chord_short
    
    # Part 3. Chord array to numpy
    npenc = chordarr2npenc(chordarr)
    if not is_valid_npenc(npenc, input_path=input_path):
        return None
    
    return npenc

In [14]:
def try_process_metadata(metadata):
    try:
        return process_metadata(metadata)
    except Exception:
#         print(traceback.format_exc())
        return None

In [15]:
# # sanity check
import random
for r in random.sample(all_records, 10):
    process_metadata(r)

In [16]:
def timeout_func(data, seconds):
    print("Timeout:", seconds, data.get('midi'))

In [17]:
timeout = 500
processed = process_all(try_process_metadata, all_records, timeout=timeout, timeout_func=timeout_func)

Sequence too short: 19 data/midi/v20/midi_sources/hooktheory/pianoroll/w/willie-nelson/on-the-road-again/bridge_key_original.mid
Sequence too short: 28 data/midi/v20/midi_sources/hooktheory/pianoroll/w/willie-nelson/on-the-road-again/verse_key_original.mid
Sequence too short: 15 data/midi/v20/midi_sources/hooktheory/pianoroll/w/weebl/donkeys/intro_key_original.mid
Sequence too short: 24 data/midi/v20/midi_sources/hooktheory/pianoroll/w/wolfgang-gartner/undertaker/intro_key_original.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/w/wavves/way-too-much/intro_key_original.mid
Sequence too short: 16 data/midi/v20/midi_sources/hooktheory/pianoroll/w/womack-and-womack/teardrops/intro-and-verse_key_original.mid
Sequence too short: 18 data/midi/v20/midi_sources/hooktheory/pianoroll/w/wolfgang-amadeus-mozart/confutatis-from-requiem/solo-3_key_original.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/w/working-for-a-nuclear-free-city/je-suis-l

Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/y/youth-group/forever-young/chorus_key_original.mid
Sequence too short: 27 data/midi/v20/midi_sources/hooktheory/pianoroll/y/yes/ive-seen-all-good-people/bridge_key_original.mid
Sequence too short: 23 data/midi/v20/midi_sources/hooktheory/pianoroll/y/yes/ive-seen-all-good-people/outro_key_original.mid
Sequence too short: 3 data/midi/v20/midi_sources/hooktheory/pianoroll/y/ylvis/massachusetts/intro_key_original.mid
Sequence too short: 29 data/midi/v20/midi_sources/hooktheory/pianoroll/y/ylvis/massachusetts/pre-chorus_key_original.mid
Sequence too short: 19 data/midi/v20/midi_sources/hooktheory/pianoroll/j/jackson-browne/doctor-my-eyes/verse_key_original.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/j/john-legend/love-me-now/bridge_key_original.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/j/john-legend/love-me-now/chorus_key_original.mid
Sequence too short: 27

Sequence too short: 18 data/midi/v20/midi_sources/hooktheory/pianoroll/l/lorde/ribs/outro_key_original.mid
Sequence too short: 19 data/midi/v20/midi_sources/hooktheory/pianoroll/l/louisa-johnson/so-good/verse_key_original.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/l/live/lightning-crashes/intro-and-verse_key_original.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/l/lord-huron/the-stranger/verse_key_original.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/l/lord-huron/love-like-ghosts/bridge_key_original.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/l/led-zeppelin/thank-you/intro-and-verse_key_original.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/l/lana-del-rey/never-let-me-go/verse_key_original.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/l/luiz-bonfa/pernambuco/intro_key_original.mid
Sequence too short: 31 data/

Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/b/booka-shade/love-inc/chorus_key_original.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/b/belle-and-sebastian/the-stars-of-track-and-field/verse_key_original.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/b/belle-and-sebastian/like-dylan-in-the-movies/chorus_key_original.mid
Sequence too short: 17 data/midi/v20/midi_sources/hooktheory/pianoroll/b/bread/diary/intro_key_original.mid
Sequence too short: 7 data/midi/v20/midi_sources/hooktheory/pianoroll/b/birds-of-tokyo/plans/bridge_key_original.mid
Sequence too short: 25 data/midi/v20/midi_sources/hooktheory/pianoroll/b/bird/bird/verse_key_original.mid
Sequence too short: 15 data/midi/v20/midi_sources/hooktheory/pianoroll/b/brad-paisley/time-well-wasted/intro_key_original.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/b/brad-paisley/shes-everything/intro_key_original.mid
Sequence too 

Sequence too short: 27 data/midi/v20/midi_sources/hooktheory/pianoroll/d/dispatch/the-general/verse_key_original.mid
Sequence too short: 11 data/midi/v20/midi_sources/hooktheory/pianoroll/d/dixie-chicks/if-i-fall-youre-going-down-with-me/pre-chorus_key_original.mid
Sequence too short: 19 data/midi/v20/midi_sources/hooktheory/pianoroll/d/dungen/festival/intro_key_original.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/d/david-guetta/what-i-did-for-love/intro_key_original.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/d/d-nox-and-beckers/the-cave---original-mix/instrumental_key_original.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/d/duda-beat/todo-carinho/chorus_key_original.mid
Sequence too short: 23 data/midi/v20/midi_sources/hooktheory/pianoroll/d/dangelo/aint-that-easy/verse_key_original.mid
Sequence too short: 27 data/midi/v20/midi_sources/hooktheory/pianoroll/d/dangelo/aint-that-easy/pre-chorus_ke

Sequence too short: 11 data/midi/v20/midi_sources/hooktheory/pianoroll/s/steve-earle/copperhead-road/intro-and-verse_key_original.mid
Sequence too short: 29 data/midi/v20/midi_sources/hooktheory/pianoroll/s/shinedown/shed-some-light/intro_key_original.mid
Sequence too short: 27 data/midi/v20/midi_sources/hooktheory/pianoroll/s/sonia-leigh/put-it-in-your-pocket/chorus_key_original.mid
Sequence too short: 24 data/midi/v20/midi_sources/hooktheory/pianoroll/s/say-anything/shiksa/verse_key_original.mid
Sequence too short: 18 data/midi/v20/midi_sources/hooktheory/pianoroll/s/say-anything/every-man-has-a-molly/intro_key_original.mid
Sequence too short: 15 data/midi/v20/midi_sources/hooktheory/pianoroll/c/carne-doce/amigo-dos-bichos/intro_key_original.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/c/carne-doce/amiga/bridge_key_original.mid
Sequence too short: 22 data/midi/v20/midi_sources/hooktheory/pianoroll/c/carne-doce/ideia/intro_key_original.mid
Sequence too sh

Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/e/eagle-eye-cherry/save-tonight/intro_key_original.mid
Sequence too short: 17 data/midi/v20/midi_sources/hooktheory/pianoroll/n/nicola-conte-/jet-sounds---thievery-corporation-remix/intro_key_original.mid
Sequence too short: 15 data/midi/v20/midi_sources/hooktheory/pianoroll/n/naomi/three-stars-no-match/intro_key_original.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/n/noiseworks/take-me-back/chorus_key_original.mid
Sequence too short: 28 data/midi/v20/midi_sources/hooktheory/pianoroll/n/ninja-sex-party/orgy-for-one/pre-chorus_key_original.mid
Sequence too short: 19 data/midi/v20/midi_sources/hooktheory/pianoroll/n/natalie-imbruglia/torn/intro_key_original.mid
Sequence too short: 19 data/midi/v20/midi_sources/hooktheory/pianoroll/n/nine-inch-nails/closer/outro_key_original.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/e/evermore/running/bridge_key_original.mi

Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/p/panic-at-the-disco/new-perspective/pre-chorus_key_original.mid
Sequence too short: 25 data/midi/v20/midi_sources/hooktheory/pianoroll/p/panic-at-the-disco/new-perspective/chorus-lead-out_key_original.mid
Sequence too short: 25 data/midi/v20/midi_sources/hooktheory/pianoroll/p/pablo-acenso/little-monsters/chorus_key_original.mid
Sequence too short: 27 data/midi/v20/midi_sources/hooktheory/pianoroll/p/pj-cavanagh/i-took-my-dad-to-see-johnny-cash/chorus_key_original.mid
Sequence too short: 9 data/midi/v20/midi_sources/hooktheory/pianoroll/t/this-town-needs-guns/gibbon/intro_key_original.mid
Sequence too short: 15 data/midi/v20/midi_sources/hooktheory/pianoroll/t/the-living-end/roll-on/intro_key_original.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/t/the-allman-brothers/feelin-alright/intro_key_original.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/t/takuya-ha

Sequence too short: 19 data/midi/v20/midi_sources/hooktheory/pianoroll/t/the-beatles/ive-just-seen-a-face/verse_key_original.mid
Sequence too short: 4 data/midi/v20/midi_sources/hooktheory/pianoroll/t/the-beatles/a-hard-days-night/intro-and-verse_key_original.mid
Sequence too short: 3 data/midi/v20/midi_sources/hooktheory/pianoroll/t/the-beatles/a-hard-days-night/intro_key_original.mid
Sequence too short: 19 data/midi/v20/midi_sources/hooktheory/pianoroll/t/the-beatles/paperback-writer/intro-and-verse_key_original.mid
Sequence too short: 19 data/midi/v20/midi_sources/hooktheory/pianoroll/t/the-beatles/all-my-loving/solo_key_original.mid
Sequence too short: 30 data/midi/v20/midi_sources/hooktheory/pianoroll/t/the-beatles/glass-onion/bridge_key_original.mid
Sequence too short: 8 data/midi/v20/midi_sources/hooktheory/pianoroll/t/the-beatles/back-in-the-ussr/intro_key_original.mid
Sequence too short: 16 data/midi/v20/midi_sources/hooktheory/pianoroll/t/the-beatles/im-so-tired/chorus_key_or

Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/m/mason-vs-princess-superstar/perfect-exceeder/chorus_key_original.mid
Sequence too short: 21 data/midi/v20/midi_sources/hooktheory/pianoroll/m/masashi-kageyama/good-morning---gimmick/outro_key_original.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/m/madeon/home-%2528live%2529/pre-chorus_key_original.mid
Sequence too short: 23 data/midi/v20/midi_sources/hooktheory/pianoroll/m/men-at-work/land-down-under/intro_key_original.mid
Sequence too short: 15 data/midi/v20/midi_sources/hooktheory/pianoroll/m/metallica/fade-to-black/verse_key_original.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/m/my-bloody-valetine/loomer/verse_key_original.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/m/motley-crue/home-sweet-home/intro-and-verse_key_original.mid
Sequence too short: 17 data/midi/v20/midi_sources/hooktheory/pianoroll/m/mac-demarco/brother/p

Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/a/arcade-fire/porno/chorus_key_original.mid
Sequence too short: 27 data/midi/v20/midi_sources/hooktheory/pianoroll/a/avicii/speed---burn-and-lotus-f1-team-mix/chorus_key_original.mid
Sequence too short: 17 data/midi/v20/midi_sources/hooktheory/pianoroll/a/anamanaguchi/the-dark-one/verse_key_original.mid
Sequence too short: 7 data/midi/v20/midi_sources/hooktheory/pianoroll/a/alan-braxe---fred-falke/rubicon/intro_key_original.mid
Sequence too short: 22 data/midi/v20/midi_sources/hooktheory/pianoroll/a/art-of-trance/blue-owl/intro_key_original.mid
Sequence too short: 15 data/midi/v20/midi_sources/hooktheory/pianoroll/a/absynthe-minded/end-of-the-line/intro-and-verse_key_original.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/a/absynthe-minded/papillon/intro-and-verse_key_original.mid
Sequence too short: 11 data/midi/v20/midi_sources/hooktheory/pianoroll/a/audien/wayfarer/intro_key_original.m

Timeout: 50 midi_sources/midiworld/named_midi/OFMG_-_.mid
Error parsing midi data/midi/v20/midi_sources/lmd_clean/Supertramp/Bloody Well Right.mid ord() expected string of length 1, but int found
npenc exceeds max 161 duration: 163 data/midi/v20/midi_sources/lmd_clean/Jovanotti/Penso Positivo.mid
Error parsing midi data/midi/v20/midi_sources/lmd_clean/Bee Gees/Stayin Alive.3.mid ord() expected string of length 1, but int found
Error parsing midi data/midi/v20/midi_sources/lmd_clean/Bee Gees/Too Much Heaven.mid ord() expected string of length 1, but int found
Sequence too short: 30 data/midi/v20/midi_sources/130k_reddit/Jazz_www.thejazzpage.de_MIDIRip/085Lead6Voice.mid
Sequence too short: 21 data/midi/v20/midi_sources/130k_reddit/Jazz_www.thejazzpage.de_MIDIRip/014TubularBells.mid
Sequence too short: 25 data/midi/v20/midi_sources/130k_reddit/Jazz_www.thejazzpage.de_MIDIRip/089Pad2Warm.mid
Sequence too short: 29 data/midi/v20/midi_sources/130k_reddit/Jazz_www.thejazzpage.de_MIDIRip/094Pa

Error parsing midi data/midi/v20/midi_sources/130k_reddit/F/F/ffmqbatl.mid cannot handle ticks per frame: 77
Error parsing midi data/midi/v20/midi_sources/130k_reddit/F/F/for_you_blue.mid ord() expected string of length 1, but int found
Error parsing midi data/midi/v20/midi_sources/130k_reddit/F/F/Fight6.mid cannot handle midi file format: <built-in function format>
Error parsing midi data/midi/v20/midi_sources/130k_reddit/F/F/fight.mid cannot handle midi file format: <built-in function format>
Sequence too short: 27 data/midi/v20/midi_sources/130k_reddit/F/F/ff5-1-12-goodnight.mid
Error parsing midi data/midi/v20/midi_sources/130k_reddit/Classical Archives - The Greats (MIDI)/Classical Piano Midis/Varios - Título desconocido/i_o/mywind3.mid cannot handle midi file format: <built-in function format>
Sequence too short: 30 data/midi/v20/midi_sources/130k_reddit/Classical Archives - The Greats (MIDI)/Classical Piano Midis/Varios - Título desconocido/p_z/schumann.mid
Error parsing midi 

Sequence too short: 0 data/midi/v20/midi_sources/130k_reddit/H/H/HiGate_-_Pitchin__iCarroller_20120421201820.mid
Sequence too short: 17 data/midi/v20/midi_sources/130k_reddit/H/H/Hannah_-_FallingAwayOriginalMix__Ameyah_20120319212226.mid
Sequence too short: 31 data/midi/v20/midi_sources/130k_reddit/H/H/Headhunterz_-_The_Sacrafice__Deejayjop_20070904131649.mid
Timeout: 50 midi_sources/130k_reddit/M/M/mthm26b.mid
Sequence too short: 10 data/midi/v20/midi_sources/130k_reddit/9/95SYNTH.MID
Sequence too short: 6 data/midi/v20/midi_sources/130k_reddit/I/I/intro08.mid
Error parsing midi data/midi/v20/midi_sources/130k_reddit/I/I/icantcry.mid ord() expected string of length 1, but int found
Sequence too short: 23 data/midi/v20/midi_sources/130k_reddit/S/S/Super8DJ_Tab_-_Elektra__azrijel_20090115225414.mid
Sequence too short: 0 data/midi/v20/midi_sources/130k_reddit/S/S/SimonPatterson_-_Bulldozer__NXTrance_20120605103302.mid
Error parsing midi data/midi/v20/midi_sources/130k_reddit/S/S/strke254

Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/y/youth-group/forever-young/chorus_key_cmajor.mid
Sequence too short: 27 data/midi/v20/midi_sources/hooktheory/pianoroll/y/yes/ive-seen-all-good-people/bridge_key_cmajor.mid
Sequence too short: 19 data/midi/v20/midi_sources/hooktheory/pianoroll/w/willie-nelson/on-the-road-again/bridge_key_cmajor.mid
Sequence too short: 3 data/midi/v20/midi_sources/hooktheory/pianoroll/y/ylvis/massachusetts/intro_key_cmajor.mid
Sequence too short: 29 data/midi/v20/midi_sources/hooktheory/pianoroll/y/ylvis/massachusetts/pre-chorus_key_cmajor.mid
Sequence too short: 23 data/midi/v20/midi_sources/hooktheory/pianoroll/y/yes/ive-seen-all-good-people/outro_key_cmajor.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/j/john-legend/love-me-now/bridge_key_cmajor.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/j/john-legend/love-me-now/verse_key_cmajor.mid
Sequence too short: 27 data/midi/v20

Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/l/louis-the-child/from-here/pre-chorus_key_cmajor.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/l/lady-gaga/til-it-happens-to-you/intro_key_cmajor.mid
Sequence too short: 18 data/midi/v20/midi_sources/hooktheory/pianoroll/l/lorde/ribs/outro_key_cmajor.mid
Sequence too short: 19 data/midi/v20/midi_sources/hooktheory/pianoroll/l/louisa-johnson/so-good/verse_key_cmajor.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/l/live/lightning-crashes/intro-and-verse_key_cmajor.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/l/lord-huron/love-like-ghosts/bridge_key_cmajor.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/l/lord-huron/the-stranger/verse_key_cmajor.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/l/lord-huron/love-will-keep-us-together/bridge_key_cmajor.mid
Sequence too short: 31 dat

Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/b/bent-van-looy/flowers-and-balloons/outro-2_key_cmajor.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/b/booka-shade/love-inc/chorus_key_cmajor.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/b/belle-and-sebastian/like-dylan-in-the-movies/chorus_key_cmajor.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/b/belle-and-sebastian/the-stars-of-track-and-field/verse_key_cmajor.mid
Sequence too short: 7 data/midi/v20/midi_sources/hooktheory/pianoroll/b/birds-of-tokyo/plans/bridge_key_cmajor.mid
Sequence too short: 17 data/midi/v20/midi_sources/hooktheory/pianoroll/b/bread/diary/intro_key_cmajor.mid
Sequence too short: 25 data/midi/v20/midi_sources/hooktheory/pianoroll/b/bird/bird/verse_key_cmajor.mid
Sequence too short: 15 data/midi/v20/midi_sources/hooktheory/pianoroll/b/brad-paisley/time-well-wasted/intro_key_cmajor.mid
Sequence too short: 3

Sequence too short: 29 data/midi/v20/midi_sources/hooktheory/pianoroll/d/destinys-child/say-my-name---cosmos-midnight-bootleg/verse_key_cmajor.mid
Sequence too short: 27 data/midi/v20/midi_sources/hooktheory/pianoroll/d/dispatch/the-general/verse_key_cmajor.mid
Sequence too short: 11 data/midi/v20/midi_sources/hooktheory/pianoroll/d/dixie-chicks/if-i-fall-youre-going-down-with-me/pre-chorus_key_cmajor.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/d/duda-beat/todo-carinho/chorus_key_cmajor.mid
Sequence too short: 19 data/midi/v20/midi_sources/hooktheory/pianoroll/d/dungen/festival/intro_key_cmajor.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/d/d-nox-and-beckers/the-cave---original-mix/instrumental_key_cmajor.mid
Sequence too short: 23 data/midi/v20/midi_sources/hooktheory/pianoroll/d/dangelo/aint-that-easy/verse_key_cmajor.mid
Sequence too short: 27 data/midi/v20/midi_sources/hooktheory/pianoroll/d/dangelo/aint-that-easy/pre-cho

Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/s/slint/good-morning-captain/intro_key_cmajor.mid
Sequence too short: 11 data/midi/v20/midi_sources/hooktheory/pianoroll/s/steve-earle/copperhead-road/intro-and-verse_key_cmajor.mid
Sequence too short: 27 data/midi/v20/midi_sources/hooktheory/pianoroll/s/sonia-leigh/put-it-in-your-pocket/chorus_key_cmajor.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/s/say-anything/attaboy/intro_key_cmajor.mid
Sequence too short: 24 data/midi/v20/midi_sources/hooktheory/pianoroll/s/say-anything/shiksa/verse_key_cmajor.mid
Sequence too short: 18 data/midi/v20/midi_sources/hooktheory/pianoroll/s/say-anything/every-man-has-a-molly/intro_key_cmajor.mid
Sequence too short: 23 data/midi/v20/midi_sources/hooktheory/pianoroll/c/carne-doce/amigo-dos-bichos/verse_key_cmajor.mid
Sequence too short: 15 data/midi/v20/midi_sources/hooktheory/pianoroll/c/carne-doce/amigo-dos-bichos/intro_key_cmajor.mid
Sequence too sho

Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/e/earth-wind-and-fire/fall-in-love-with-me/intro_key_cmajor.mid
Sequence too short: 15 data/midi/v20/midi_sources/hooktheory/pianoroll/n/naomi/three-stars-no-match/intro_key_cmajor.mid
Sequence too short: 17 data/midi/v20/midi_sources/hooktheory/pianoroll/n/nicola-conte-/jet-sounds---thievery-corporation-remix/intro_key_cmajor.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/e/eagle-eye-cherry/save-tonight/intro_key_cmajor.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/n/noiseworks/take-me-back/chorus_key_cmajor.mid
Sequence too short: 28 data/midi/v20/midi_sources/hooktheory/pianoroll/n/ninja-sex-party/orgy-for-one/pre-chorus_key_cmajor.mid
Sequence too short: 19 data/midi/v20/midi_sources/hooktheory/pianoroll/n/natalie-imbruglia/torn/intro_key_cmajor.mid
Sequence too short: 19 data/midi/v20/midi_sources/hooktheory/pianoroll/n/nine-inch-nails/closer/outro_key_cm

Sequence too short: 30 data/midi/v20/midi_sources/hooktheory/pianoroll/p/post-malone/rockstar/chorus_key_cmajor.mid
Sequence too short: 30 data/midi/v20/midi_sources/hooktheory/pianoroll/p/post-malone/rockstar/intro_key_cmajor.mid
Sequence too short: 25 data/midi/v20/midi_sources/hooktheory/pianoroll/p/panic-at-the-disco/new-perspective/chorus-lead-out_key_cmajor.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/p/panic-at-the-disco/new-perspective/pre-chorus_key_cmajor.mid
Sequence too short: 25 data/midi/v20/midi_sources/hooktheory/pianoroll/p/pablo-acenso/little-monsters/chorus_key_cmajor.mid
Sequence too short: 27 data/midi/v20/midi_sources/hooktheory/pianoroll/p/pj-cavanagh/i-took-my-dad-to-see-johnny-cash/chorus_key_cmajor.mid
Sequence too short: 9 data/midi/v20/midi_sources/hooktheory/pianoroll/t/this-town-needs-guns/gibbon/intro_key_cmajor.mid
Sequence too short: 15 data/midi/v20/midi_sources/hooktheory/pianoroll/t/the-living-end/roll-on/intro_key_cmajo

Sequence too short: 27 data/midi/v20/midi_sources/hooktheory/pianoroll/t/the-beatles/revolution/chorus_key_cmajor.mid
Sequence too short: 15 data/midi/v20/midi_sources/hooktheory/pianoroll/t/the-beatles/being-for-the-benefit-of-mr-kite/intro_key_cmajor.mid
Sequence too short: 3 data/midi/v20/midi_sources/hooktheory/pianoroll/t/the-beatles/a-hard-days-night/intro_key_cmajor.mid
Sequence too short: 20 data/midi/v20/midi_sources/hooktheory/pianoroll/t/the-beatles/ive-just-seen-a-face/chorus_key_cmajor.mid
Sequence too short: 19 data/midi/v20/midi_sources/hooktheory/pianoroll/t/the-beatles/ive-just-seen-a-face/verse_key_cmajor.mid
Sequence too short: 4 data/midi/v20/midi_sources/hooktheory/pianoroll/t/the-beatles/a-hard-days-night/intro-and-verse_key_cmajor.mid
Sequence too short: 19 data/midi/v20/midi_sources/hooktheory/pianoroll/t/the-beatles/paperback-writer/intro-and-verse_key_cmajor.mid
Sequence too short: 19 data/midi/v20/midi_sources/hooktheory/pianoroll/t/the-beatles/all-my-loving/

Sequence too short: 23 data/midi/v20/midi_sources/hooktheory/pianoroll/m/manu-chao/bongo-bong/intro_key_cmajor.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/m/mason-vs-princess-superstar/perfect-exceeder/chorus_key_cmajor.mid
Sequence too short: 21 data/midi/v20/midi_sources/hooktheory/pianoroll/m/masashi-kageyama/good-morning---gimmick/outro_key_cmajor.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/m/madeon/home-%2528live%2529/pre-chorus_key_cmajor.mid
Sequence too short: 23 data/midi/v20/midi_sources/hooktheory/pianoroll/m/men-at-work/land-down-under/intro_key_cmajor.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/m/my-bloody-valetine/loomer/verse_key_cmajor.mid
Sequence too short: 15 data/midi/v20/midi_sources/hooktheory/pianoroll/m/metallica/fade-to-black/verse_key_cmajor.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/m/motley-crue/home-sweet-home/intro-and-verse_key_cmaj

Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/a/arcade-fire/neighborhood-2-laika/chorus_key_cmajor.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/a/arcade-fire/here-comes-the-night-time/verse_key_cmajor.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/a/arcade-fire/neighborhood-2-laika/verse_key_cmajor.mid
Sequence too short: 23 data/midi/v20/midi_sources/hooktheory/pianoroll/a/arcade-fire/no-cars-go/verse_key_cmajor.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/a/arcade-fire/porno/chorus_key_cmajor.mid
Sequence too short: 31 data/midi/v20/midi_sources/hooktheory/pianoroll/a/arcade-fire/haiti/verse_key_cmajor.mid
Sequence too short: 27 data/midi/v20/midi_sources/hooktheory/pianoroll/a/avicii/speed---burn-and-lotus-f1-team-mix/chorus_key_cmajor.mid
Sequence too short: 17 data/midi/v20/midi_sources/hooktheory/pianoroll/a/anamanaguchi/the-dark-one/verse_key_cmajor.mid
Sequence too s

In [18]:
arr2csv(processed, out_csv); len(processed)

197902

In [19]:
df = pd.read_csv(out_csv); df.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,parts,ht_time_signature,midi,section,midi_title,title,numpy,mxl,ht_bpm,md5,source,artist,ht_key,ht_mode,genres,song_url
0,"intro,chorus",4.0,midi_sources/hooktheory/pianoroll/w/wayne-shar...,chorus,yu-gi-oh3,yu-gi-oh-theme-song,piano_duet/hooktheory/pianoroll/w/wayne-sharpe...,,128.0,bf1f29e5ff84e3e93e37fb873bfb590e,hooktheory,wayne-sharpe,C,1.0,,https://www.hooktheory.com/theorytab/view/wayn...
1,"intro,chorus",3.0,midi_sources/hooktheory/pianoroll/w/wayne-shar...,intro,yu-gi-oh,yu-gi-oh-theme-song,piano_duet/hooktheory/pianoroll/w/wayne-sharpe...,,85.0,055f80ad67f64edb14a85ca8fbfe8c29,hooktheory,wayne-sharpe,C,1.0,,https://www.hooktheory.com/theorytab/view/wayn...
2,chorus,4.0,midi_sources/hooktheory/pianoroll/w/what-a-day...,chorus,kiefer,kiefer,piano_duet/hooktheory/pianoroll/w/what-a-day/k...,,96.0,197f96f5d181f6ce1e2c5ab04ac1ff87,hooktheory,what-a-day,D,6.0,Jazz,https://www.hooktheory.com/theorytab/view/what...
3,"verse,pre-chorus,chorus",4.0,midi_sources/hooktheory/pianoroll/w/whiteflame...,pre-chorus,senbonzakura - pre-Pre-Chorus,senbonzakura,piano_duet/hooktheory/pianoroll/w/whiteflame/s...,,152.0,9e7ce13a35f1314423a9a6d5a5287a4a,hooktheory,whiteflame,D,6.0,"J-Pop,Pop",https://www.hooktheory.com/theorytab/view/whit...
4,"verse,pre-chorus,chorus",4.0,midi_sources/hooktheory/pianoroll/w/whiteflame...,verse,Senbonzakura,senbonzakura,piano_duet/hooktheory/pianoroll/w/whiteflame/s...,,152.0,d5aaf79d0989222f1362f9f46c540a27,hooktheory,whiteflame,D,6.0,"J-Pop,Pop",https://www.hooktheory.com/theorytab/view/whit...


In [20]:
len([s for s in df.midi.values if isinstance(s, str)])

197902

In [24]:
len([s for s in df.numpy.values if isinstance(s, str)])

113111

In [25]:
from collections import Counter

In [26]:
Counter(df[df.midi.notnull()].source.values)

Counter({'hooktheory': 20544,
         'freemidi': 5168,
         'midiworld': 4108,
         'ecomp': 2533,
         'cprato': 312,
         'classical_piano': 328,
         'classical_archives': 14647,
         'musescore': 10933,
         'wikifonia': 6346,
         'lmd': 13565,
         'reddit': 98673,
         'hooktheory_c': 20745})

In [27]:
Counter(df[df.numpy.notnull()].source.values)

Counter({'hooktheory': 19874,
         'freemidi': 1872,
         'midiworld': 1771,
         'ecomp': 2533,
         'cprato': 214,
         'classical_piano': 327,
         'classical_archives': 4883,
         'musescore': 9195,
         'wikifonia': 3912,
         'lmd': 5017,
         'reddit': 43485,
         'hooktheory_c': 20028})