In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from encode_data import *
from midi_data import *

In [3]:
from tqdm import tqdm
import pandas as pd
from data_sources import process_parallel, transform_csv_row

In [4]:
from collections import Counter

In [5]:
import scipy.sparse

## Encode music21 stream to text representation 

This notebook uses a full component format 
- measure separators, instruments, and separated octaves
- Format: note, octave, action type, instrument
- note repr: nG# o4 t1 i0

### Load midi data

In [6]:
version = 'v5'
data_path = Path('data/midi')
version_path = data_path/version

In [7]:
source_dir = 'midi_transform'
out_dir = 'midi_npz'
source_csv = version_path/source_dir/f'{source_dir}.csv'
out_csv = version_path/out_dir/f'{out_dir}.csv'

In [8]:
df = pd.read_csv(source_csv); df.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,song_url,instruments,ht_offset,time_signature,title,quarter_length,ht_mode,source,artist,ht_key,...,inferred_key,inferred_offset,genres,section,parts,ht_time_signature,ht_bpm,midi_title,mxl,midi_transform
0,https://www.hooktheory.com/theorytab/view/wayn...,Piano,0.0,3/4,yu-gi-oh-theme-song,36.0,1.0,hooktheory,wayne-sharpe,C,...,C major,0.0,,intro,"intro,chorus",3.0,85.0,yu-gi-oh,,
1,https://www.hooktheory.com/theorytab/view/wayn...,"Piano,Piano",0.0,4/4,yu-gi-oh-theme-song,32.0,1.0,hooktheory,wayne-sharpe,C,...,C major,0.0,,chorus,"intro,chorus",4.0,128.0,yu-gi-oh3,,midi_transform/hooktheory/pianoroll/w/wayne-sh...
2,https://www.hooktheory.com/theorytab/view/what...,"Piano,Piano",-5.0,4/4,kiefer,16.0,6.0,hooktheory,what-a-day,D,...,A minor,0.0,Jazz,chorus,chorus,4.0,96.0,kiefer,,midi_transform/hooktheory/pianoroll/w/what-a-d...
3,https://www.hooktheory.com/theorytab/view/whit...,"Piano,Piano",-5.0,4/4,senbonzakura,32.0,6.0,hooktheory,whiteflame,D,...,D minor,-5.0,"J-Pop,Pop",verse,"verse,pre-chorus,chorus",4.0,152.0,Senbonzakura,,midi_transform/hooktheory/pianoroll/w/whitefla...
4,https://www.hooktheory.com/theorytab/view/wham...,"Piano,Piano",-1.0,4/4,last-christmas,32.0,1.0,hooktheory,wham,Db,...,C# major,-1.0,Holiday,intro,"intro,verse,chorus",4.0,108.0,Last Christmas Intro,,midi_transform/hooktheory/pianoroll/w/wham/las...


In [9]:
df_filtered = df.loc[df[source_dir].notna()]; df_filtered.shape

(32963, 23)

In [10]:
def transform_func(file, out_file, row):
    stream = file2stream(file) # 1.
    chordarr = stream2chordarr(stream) # 2.
    if row.source != 'hooktheory': # keep hooktheory
        chord_short = compress_chordarr(chordarr)
        delta_trim = chordarr.shape[0] - chord_short.shape[0]
        if delta_trim > 100: print(f'Removed {delta_trim} rests from {file}')
        chordarr = chord_short
    save_chordarr(out_file, chordarr)

In [11]:
from functools import partial
parallel_func = partial(transform_csv_row, 
        transform_func=transform_func,
        base_path=version_path,
        source_dir=source_dir,
        out_dir=out_dir,
        out_extension='.npz'
       )

In [12]:
# for r in df_filtered.iterrows():
#     parallel_func(r)
#     break

In [13]:
transcribed_files = process_parallel(parallel_func, df_filtered.iterrows(), total=df_filtered.shape[0])

Compressing rests: 64 -> 32
Compressing rests: 64 -> 32
Compressing rests: 128 -> 32
Removed 176 rests from data/midi/v5/midi_transform/freemidi/genre-dance-eletric/Ariana Grande - Problem.mid
Compressing rests: 318 -> 34
Compressing rests: 66 -> 34
Compressing rests: 192 -> 32
Removed 492 rests from data/midi/v5/midi_transform/freemidi/genre-dance-eletric/Madonna - Beautiful Stranger.mid
Compressing rests: 39 -> 35
Removed 272 rests from data/midi/v5/midi_transform/freemidi/genre-dance-eletric/Madonna - Frozen.mid
Compressing rests: 48 -> 32
Compressing rests: 134 -> 34
Compressing rests: 60 -> 32
Removed 128 rests from data/midi/v5/midi_transform/freemidi/genre-dance-eletric/Madonna - American Pie.mid
Compressing rests: 52 -> 32
Compressing rests: 58 -> 34
Compressing rests: 64 -> 32
Removed 204 rests from data/midi/v5/midi_transform/freemidi/genre-dance-eletric/Fatboy Slim - Praise You.mid
Compressing rests: 66 -> 34
Compressing rests: 128 -> 32
Removed 128 rests from data/midi/v5/m

Compressing rests: 335 -> 35
Compressing rests: 335 -> 35
Compressing rests: 343 -> 35
Compressing rests: 335 -> 35
Compressing rests: 335 -> 35
Compressing rests: 351 -> 35
Compressing rests: 335 -> 35
Compressing rests: 335 -> 35
Compressing rests: 351 -> 35
Compressing rests: 335 -> 35
Compressing rests: 335 -> 35
Compressing rests: 351 -> 35
Compressing rests: 335 -> 35
Compressing rests: 335 -> 35
Compressing rests: 343 -> 35
Compressing rests: 335 -> 35
Compressing rests: 335 -> 35
Compressing rests: 344 -> 32
Compressing rests: 334 -> 34
Compressing rests: 335 -> 35
Compressing rests: 367 -> 35
Compressing rests: 336 -> 32
Compressing rests: 334 -> 34
Compressing rests: 368 -> 32
Compressing rests: 335 -> 35
Compressing rests: 335 -> 35
Compressing rests: 366 -> 34
Compressing rests: 336 -> 32
Compressing rests: 335 -> 35
Compressing rests: 351 -> 35
Compressing rests: 334 -> 34
Compressing rests: 336 -> 32
Compressing rests: 351 -> 35
Compressing rests: 335 -> 35
Compressing re

Compressing rests: 131 -> 35
Compressing rests: 64 -> 32
Removed 128 rests from data/midi/v5/midi_transform/freemidi/genre-dance-eletric/Depeche Mode - Faith Healer.mid
Compressing rests: 64 -> 32
Compressing rests: 129 -> 33
Compressing rests: 54 -> 34
Compressing rests: 256 -> 32
Compressing rests: 256 -> 32
Removed 468 rests from data/midi/v5/midi_transform/freemidi/genre-dance-eletric/New Order - Vanishing Point (2).mid
Compressing rests: 118 -> 34
Removed 116 rests from data/midi/v5/midi_transform/freemidi/genre-dance-eletric/Scooter - Back In The UK.mid
Compressing rests: 64 -> 32
Compressing rests: 64 -> 32
Compressing rests: 128 -> 32
Removed 176 rests from data/midi/v5/midi_transform/freemidi/genre-dance-eletric/Ariana Grande - Ariana Grande feat. Iggy Azalea - Problem.mid
Compressing rests: 112 -> 32
Compressing rests: 52 -> 32
Compressing rests: 52 -> 32
Removed 120 rests from data/midi/v5/midi_transform/freemidi/genre-dance-eletric/New Order - Temptation.mid
Removed 112 res

Compressing rests: 92 -> 32
Removed 220 rests from data/midi/v5/midi_transform/freemidi/genre-pop/David Bowie - Dancing In The Street.mid
Compressing rests: 50 -> 34
Compressing rests: 124 -> 32
Removed 156 rests from data/midi/v5/midi_transform/freemidi/genre-pop/Fleetwood Mac - Dreams.mid
Compressing rests: 56 -> 32
Compressing rests: 60 -> 32
Compressing rests: 54 -> 34
Compressing rests: 80 -> 32
Removed 112 rests from data/midi/v5/midi_transform/freemidi/genre-pop/Britney Spears - till the world ends.mid
Compressing rests: 44 -> 32
Compressing rests: 288 -> 32
Removed 384 rests from data/midi/v5/midi_transform/freemidi/genre-pop/Idina Menzel - Let it Go 0.2.mid
Compressing rests: 48 -> 32
Compressing rests: 134 -> 34
Compressing rests: 60 -> 32
Removed 128 rests from data/midi/v5/midi_transform/freemidi/genre-pop/Madonna - American Pie.mid
Compressing rests: 383 -> 35
Compressing rests: 255 -> 35
Compressing rests: 130 -> 34
Removed 224 rests from data/midi/v5/midi_transform/freem

Removed 224 rests from data/midi/v5/midi_transform/freemidi/genre-pop/Evanescence - Farther Away.mid
Compressing rests: 48 -> 32
Removed 912 rests from data/midi/v5/midi_transform/freemidi/genre-pop/John Lennon - Jealous Guy.mid
Removed 112 rests from data/midi/v5/midi_transform/freemidi/genre-pop/Gipsy Kings - Baila Me.mid
Compressing rests: 112 -> 32
Compressing rests: 96 -> 32
Compressing rests: 98 -> 34
Compressing rests: 98 -> 34
Compressing rests: 66 -> 34
Compressing rests: 160 -> 32
Compressing rests: 50 -> 34
Compressing rests: 96 -> 32
Removed 464 rests from data/midi/v5/midi_transform/freemidi/genre-pop/Yazoo - Dont Go.mid
Compressing rests: 80 -> 32
Compressing rests: 88 -> 32
Compressing rests: 70 -> 34
Removed 140 rests from data/midi/v5/midi_transform/freemidi/genre-pop/Wet Wet Wet - Love Is All Around.mid
Compressing rests: 60 -> 32
Compressing rests: 811 -> 35
Compressing rests: 130 -> 34
Removed 792 rests from data/midi/v5/midi_transform/freemidi/genre-pop/Genesis - F

Removed 272 rests from data/midi/v5/midi_transform/freemidi/genre-pop/Destinys Child - Survivor.mid
Removed 160 rests from data/midi/v5/midi_transform/freemidi/genre-pop/Hall And Oats - I Can't Go For That.mid
Compressing rests: 40 -> 32
Removed 384 rests from data/midi/v5/midi_transform/freemidi/genre-pop/Kinks - Have A Cuppa Tea.mid
Removed 1696 rests from data/midi/v5/midi_transform/freemidi/genre-pop/Engelbert Humperdinck - Quando.mid
Compressing rests: 59 -> 35
Compressing rests: 64 -> 32
Removed 128 rests from data/midi/v5/midi_transform/freemidi/genre-pop/Pet Shop Boys - Hedonism.mid
Compressing rests: 52 -> 32
Compressing rests: 61 -> 33
Removed 128 rests from data/midi/v5/midi_transform/freemidi/genre-pop/Barbra Streisand - Over The Rainbow.mid
Removed 128 rests from data/midi/v5/midi_transform/freemidi/genre-pop/Gorillaz - Kids With Guns.mid
Compressing rests: 48 -> 32
Compressing rests: 72 -> 32
Compressing rests: 225 -> 33
Compressing rests: 208 -> 32
Removed 368 rests from

Compressing rests: 192 -> 32
Compressing rests: 204 -> 32
Compressing rests: 444 -> 32
Removed 792 rests from data/midi/v5/midi_transform/midiworld/named_midi/Green_Day_-_Hitching_A_Ride.mid
Compressing rests: 124 -> 32
Compressing rests: 124 -> 32
Removed 184 rests from data/midi/v5/midi_transform/midiworld/named_midi/Hall_and_Oates_-_Family_Man.mid
Compressing rests: 92 -> 32
Compressing rests: 39 -> 35
Compressing rests: 68 -> 32
Compressing rests: 122 -> 34
Removed 120 rests from data/midi/v5/midi_transform/midiworld/named_midi/Genesis_-_Illegal_Alien.mid
Compressing rests: 57 -> 33
Compressing rests: 254 -> 34
Compressing rests: 58 -> 34
Removed 220 rests from data/midi/v5/midi_transform/midiworld/named_midi/Nine_Inch_Nails_-_Heresy.mid
Compressing rests: 65 -> 33
Removed 192 rests from data/midi/v5/midi_transform/midiworld/named_midi/Backstreet_Boys_-_Back_to_Your_Heart.mid
Compressing rests: 68 -> 32
Compressing rests: 78 -> 34
Compressing rests: 121 -> 33
Compressing rests: 106

Compressing rests: 84 -> 32
Removed 172 rests from data/midi/v5/midi_transform/midiworld/named_midi/Coolio_-_Fantastic_Voyage.mid
Compressing rests: 46 -> 34
Removed 176 rests from data/midi/v5/midi_transform/midiworld/named_midi/Sweet_-_Ballroom_Blitz.mid
Compressing rests: 191 -> 35
Removed 172 rests from data/midi/v5/midi_transform/midiworld/named_midi/Vanilla_Fudge_-_You_Keep_Me_Hangin_On.mid
Compressing rests: 126 -> 34
Compressing rests: 128 -> 32
Compressing rests: 128 -> 32
Removed 284 rests from data/midi/v5/midi_transform/midiworld/named_midi/Live_-_Lakinis_Juice.mid
Compressing rests: 50 -> 34
Removed 144 rests from data/midi/v5/midi_transform/midiworld/named_midi/Michael_Jackson_-_Beat_It.mid
Compressing rests: 128 -> 32
Compressing rests: 128 -> 32
Removed 192 rests from data/midi/v5/midi_transform/midiworld/named_midi/Nightwish_-_Stargazers.mid
Compressing rests: 192 -> 32
Compressing rests: 128 -> 32
Removed 256 rests from data/midi/v5/midi_transform/midiworld/named_midi

Compressing rests: 42 -> 34
Removed 128 rests from data/midi/v5/midi_transform/midiworld/named_midi/Deep_Purple_-_Hush.mid
Compressing rests: 64 -> 32
Compressing rests: 64 -> 32
Compressing rests: 56 -> 32
Removed 160 rests from data/midi/v5/midi_transform/midiworld/named_midi/B52s_-_Dry_County.mid
Compressing rests: 51 -> 35
Removed 144 rests from data/midi/v5/midi_transform/midiworld/named_midi/Nine_Inch_Nails_-_Wish.mid
Compressing rests: 135 -> 35
Compressing rests: 175 -> 35
Removed 352 rests from data/midi/v5/midi_transform/midiworld/named_midi/Led_Zeppelin_-_All_My_Love.mid
Compressing rests: 294 -> 34
Removed 276 rests from data/midi/v5/midi_transform/midiworld/named_midi/Arlo_Guthrie_-_City_of_New_Orleans.mid
Removed 128 rests from data/midi/v5/midi_transform/midiworld/named_midi/Nightwish_-_Alpenglow.mid
Compressing rests: 61 -> 33
Compressing rests: 64 -> 32
Removed 176 rests from data/midi/v5/midi_transform/midiworld/named_midi/Marc_Anthony_-_Hasta_Ayer.mid
Removed 512 res

Compressing rests: 43 -> 35
Compressing rests: 111 -> 35
Removed 108 rests from data/midi/v5/midi_transform/midiworld/named_midi/Emerson_Lake_and_Palmer_-_Fanfare_For_The_Common_Man.mid
Compressing rests: 40 -> 32
Removed 200 rests from data/midi/v5/midi_transform/midiworld/named_midi/Nine_Inch_Nails_-_Head_Like_a_Hole.mid
Compressing rests: 39 -> 35
Compressing rests: 112 -> 32
Removed 128 rests from data/midi/v5/midi_transform/midiworld/named_midi/Andrew_Lloyd_Webber_-_Jesus_Christ_Superstar.mid
Compressing rests: 54 -> 34
Compressing rests: 128 -> 32
Removed 496 rests from data/midi/v5/midi_transform/midiworld/named_midi/Jennifer_Lopez_-_Feeling_So_Good.mid
Removed 112 rests from data/midi/v5/midi_transform/midiworld/named_midi/All_Saints_-_Never_Ever.mid
Compressing rests: 62 -> 34
Compressing rests: 157 -> 33
Removed 124 rests from data/midi/v5/midi_transform/midiworld/named_midi/Sister_Hazel_-_All_For_You.mid
Compressing rests: 64 -> 32
Compressing rests: 48 -> 32
Compressing res

Compressing rests: 62 -> 34
Compressing rests: 62 -> 34
Compressing rests: 56 -> 32
Compressing rests: 44 -> 32
Compressing rests: 132 -> 32
Removed 116 rests from data/midi/v5/midi_transform/midiworld/named_midi/The_Beatles_-_Baby_It's_You.mid
Removed 160 rests from data/midi/v5/midi_transform/midiworld/named_midi/Mariah_Carey_-_Fantasy.mid
Compressing rests: 95 -> 35
Compressing rests: 64 -> 32
Removed 236 rests from data/midi/v5/midi_transform/midiworld/named_midi/New_Order_-_Blue_Monday.mid
Removed 160 rests from data/midi/v5/midi_transform/midiworld/named_midi/Dr_Dre_-_Dre_Day.mid
Removed 816 rests from data/midi/v5/midi_transform/midiworld/named_midi/Huey_Lewis_-_Its_Alright.mid
Compressing rests: 60 -> 32
Removed 144 rests from data/midi/v5/midi_transform/midiworld/named_midi/Buffalo_Springfield_-_For_What_Its_Worth.mid
Compressing rests: 52 -> 32
Removed 132 rests from data/midi/v5/midi_transform/midiworld/named_midi/Billie_Myers_-_Kiss_The_Rain.mid
Compressing rests: 247 -> 35

In [14]:
tdf = pd.DataFrame(data={out_dir: list(transcribed_files.values())}, index=list(transcribed_files.keys()))

In [15]:
merged_df = df.join(tdf, how='outer'); tdf.shape, df.shape, merged_df.shape

((32963, 1), (39942, 23), (39942, 24))

In [16]:
merged_df.to_csv(out_csv, index=False); merged_df.head()

Unnamed: 0,song_url,instruments,ht_offset,time_signature,title,quarter_length,ht_mode,source,artist,ht_key,...,inferred_offset,genres,section,parts,ht_time_signature,ht_bpm,midi_title,mxl,midi_transform,midi_npz
0,https://www.hooktheory.com/theorytab/view/wayn...,Piano,0.0,3/4,yu-gi-oh-theme-song,36.0,1.0,hooktheory,wayne-sharpe,C,...,0.0,,intro,"intro,chorus",3.0,85.0,yu-gi-oh,,,
1,https://www.hooktheory.com/theorytab/view/wayn...,"Piano,Piano",0.0,4/4,yu-gi-oh-theme-song,32.0,1.0,hooktheory,wayne-sharpe,C,...,0.0,,chorus,"intro,chorus",4.0,128.0,yu-gi-oh3,,midi_transform/hooktheory/pianoroll/w/wayne-sh...,midi_npz/hooktheory/pianoroll/w/wayne-sharpe/y...
2,https://www.hooktheory.com/theorytab/view/what...,"Piano,Piano",-5.0,4/4,kiefer,16.0,6.0,hooktheory,what-a-day,D,...,0.0,Jazz,chorus,chorus,4.0,96.0,kiefer,,midi_transform/hooktheory/pianoroll/w/what-a-d...,midi_npz/hooktheory/pianoroll/w/what-a-day/kie...
3,https://www.hooktheory.com/theorytab/view/whit...,"Piano,Piano",-5.0,4/4,senbonzakura,32.0,6.0,hooktheory,whiteflame,D,...,-5.0,"J-Pop,Pop",verse,"verse,pre-chorus,chorus",4.0,152.0,Senbonzakura,,midi_transform/hooktheory/pianoroll/w/whitefla...,midi_npz/hooktheory/pianoroll/w/whiteflame/sen...
4,https://www.hooktheory.com/theorytab/view/wham...,"Piano,Piano",-1.0,4/4,last-christmas,32.0,1.0,hooktheory,wham,Db,...,-1.0,Holiday,intro,"intro,verse,chorus",4.0,108.0,Last Christmas Intro,,midi_transform/hooktheory/pianoroll/w/wham/las...,midi_npz/hooktheory/pianoroll/w/wham/last-chri...


### Calculate timesteps

In [17]:
merged_df = pd.read_csv(out_csv)

In [18]:
def calc_timesteps(idxrow):
    idx,row = idxrow
    np_file = row[out_dir]
    if not isinstance(np_file, str) or not (Path(version_path)/np_file).exists(): return idx,None
    try:
#         timesteps = scipy.sparse.load_npz(np_file).shape[0]
        timesteps = load_chordarr(Path(version_path)/np_file).shape[0]
        return idx,timesteps
    except Exception as e:
        print('Error reading text', e, np_file)
    return idx, None

In [19]:
# for r in merged_df.iterrows():
#     calc_timesteps(r)

In [20]:
file2steps = process_parallel(calc_timesteps, merged_df.iterrows(), total=merged_df.shape[0])

In [21]:
len_df = pd.DataFrame(data={f'{out_dir}_timesteps': list(file2steps.values())}, index=list(file2steps.keys()))
merged_len_df = merged_df.join(len_df, how='outer');
len_df.shape, merged_df.shape, merged_len_df.shape

((39942, 1), (39942, 24), (39942, 25))

In [22]:
merged_len_df.to_csv(out_csv, index=False); merged_len_df.head()

Unnamed: 0,song_url,instruments,ht_offset,time_signature,title,quarter_length,ht_mode,source,artist,ht_key,...,genres,section,parts,ht_time_signature,ht_bpm,midi_title,mxl,midi_transform,midi_npz,midi_npz_timesteps
0,https://www.hooktheory.com/theorytab/view/wayn...,Piano,0.0,3/4,yu-gi-oh-theme-song,36.0,1.0,hooktheory,wayne-sharpe,C,...,,intro,"intro,chorus",3.0,85.0,yu-gi-oh,,,,
1,https://www.hooktheory.com/theorytab/view/wayn...,"Piano,Piano",0.0,4/4,yu-gi-oh-theme-song,32.0,1.0,hooktheory,wayne-sharpe,C,...,,chorus,"intro,chorus",4.0,128.0,yu-gi-oh3,,midi_transform/hooktheory/pianoroll/w/wayne-sh...,midi_npz/hooktheory/pianoroll/w/wayne-sharpe/y...,129.0
2,https://www.hooktheory.com/theorytab/view/what...,"Piano,Piano",-5.0,4/4,kiefer,16.0,6.0,hooktheory,what-a-day,D,...,Jazz,chorus,chorus,4.0,96.0,kiefer,,midi_transform/hooktheory/pianoroll/w/what-a-d...,midi_npz/hooktheory/pianoroll/w/what-a-day/kie...,65.0
3,https://www.hooktheory.com/theorytab/view/whit...,"Piano,Piano",-5.0,4/4,senbonzakura,32.0,6.0,hooktheory,whiteflame,D,...,"J-Pop,Pop",verse,"verse,pre-chorus,chorus",4.0,152.0,Senbonzakura,,midi_transform/hooktheory/pianoroll/w/whitefla...,midi_npz/hooktheory/pianoroll/w/whiteflame/sen...,129.0
4,https://www.hooktheory.com/theorytab/view/wham...,"Piano,Piano",-1.0,4/4,last-christmas,32.0,1.0,hooktheory,wham,Db,...,Holiday,intro,"intro,verse,chorus",4.0,108.0,Last Christmas Intro,,midi_transform/hooktheory/pianoroll/w/wham/las...,midi_npz/hooktheory/pianoroll/w/wham/last-chri...,129.0
