In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from encode_data import *
from midi_data import *

In [3]:
from tqdm import tqdm
import pandas as pd
from data_sources import process_parallel, transform_csv_row

In [4]:
from collections import Counter

In [5]:
import scipy.sparse

## Encode music21 stream to text representation 

This notebook uses a full component format 
- measure separators, instruments, and separated octaves
- Format: note, octave, action type, instrument
- note repr: nG# o4 t1 i0

### Load midi data

In [6]:
version = 'v7'
data_path = Path('data/midi')
version_path = data_path/version

In [7]:
source_dir = 'midi_transform'
out_dir = 'midi_npz'
source_csv = version_path/source_dir/f'{source_dir}.csv'
out_csv = version_path/out_dir/f'{out_dir}.csv'

In [8]:
df = pd.read_csv(source_csv); df.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,song_url,instruments,ht_offset,time_signature,title,quarter_length,ht_mode,source,artist,ht_key,...,inferred_key,inferred_offset,genres,section,parts,ht_time_signature,ht_bpm,midi_title,mxl,midi_transform
0,https://www.hooktheory.com/theorytab/view/wayn...,Piano,0.0,3/4,yu-gi-oh-theme-song,36.0,1.0,hooktheory,wayne-sharpe,C,...,C major,0.0,,intro,"intro,chorus",3.0,85.0,yu-gi-oh,,midi_transform/hooktheory/pianoroll/w/wayne-sh...
1,https://www.hooktheory.com/theorytab/view/wayn...,"Piano,Piano",0.0,4/4,yu-gi-oh-theme-song,32.0,1.0,hooktheory,wayne-sharpe,C,...,C major,0.0,,chorus,"intro,chorus",4.0,128.0,yu-gi-oh3,,midi_transform/hooktheory/pianoroll/w/wayne-sh...
2,https://www.hooktheory.com/theorytab/view/what...,"Piano,Piano",-5.0,4/4,kiefer,16.0,6.0,hooktheory,what-a-day,D,...,A minor,0.0,Jazz,chorus,chorus,4.0,96.0,kiefer,,midi_transform/hooktheory/pianoroll/w/what-a-d...
3,https://www.hooktheory.com/theorytab/view/whit...,"Piano,Piano",-5.0,4/4,senbonzakura,32.0,6.0,hooktheory,whiteflame,D,...,D minor,-5.0,"J-Pop,Pop",verse,"verse,pre-chorus,chorus",4.0,152.0,Senbonzakura,,midi_transform/hooktheory/pianoroll/w/whitefla...
4,https://www.hooktheory.com/theorytab/view/wham...,"Piano,Piano",-1.0,4/4,last-christmas,32.0,1.0,hooktheory,wham,Db,...,C# major,-1.0,Holiday,intro,"intro,verse,chorus",4.0,108.0,Last Christmas Intro,,midi_transform/hooktheory/pianoroll/w/wham/las...


In [9]:
df_filtered = df.loc[df[source_dir].notna()]; df_filtered.shape

(38792, 23)

In [10]:
def transform_func(file, out_file, row):
    stream = file2stream(file) # 1.
    chordarr = stream2chordarr(stream, max_dur=128) # 2. max_dur = quarter_len * sample_freq (4). 128 = 8 bars
    if row.source != 'hooktheory': # keep hooktheory
        chord_short = compress_chordarr(chordarr)
        delta_trim = chordarr.shape[0] - chord_short.shape[0]
        if delta_trim > 100: print(f'Removed {delta_trim} rests from {file}')
        chordarr = chord_short
    save_chordarr(out_file, chordarr)

In [11]:
from functools import partial
parallel_func = partial(transform_csv_row, 
        transform_func=transform_func,
        base_path=version_path,
        source_dir=source_dir,
        out_dir=out_dir,
        out_extension='.npz'
       )

In [12]:
# for r in df_filtered.iterrows():
#     parallel_func(r)
#     break

In [13]:
transcribed_files = process_parallel(parallel_func, df_filtered.iterrows(), total=df_filtered.shape[0])

Compressing rests: 192 -> 32
Removed 176 rests from data/midi/v7/midi_transform/freemidi/genre-dance-eletric/Madonna - Beautiful Stranger.mid
Compressing rests: 128 -> 32
Removed 128 rests from data/midi/v7/midi_transform/freemidi/genre-dance-eletric/Eurythmics - Revival.mid
Compressing rests: 56 -> 32
Compressing rests: 48 -> 32
Removed 224 rests from data/midi/v7/midi_transform/freemidi/genre-dance-eletric/Madonna - Frozen.mid
Compressing rests: 42 -> 34
Compressing rests: 44 -> 32
Compressing rests: 64 -> 32
Compressing rests: 52 -> 32
Compressing rests: 58 -> 34
Compressing rests: 42 -> 34
Compressing rests: 64 -> 32
Removed 132 rests from data/midi/v7/midi_transform/freemidi/genre-dance-eletric/Fatboy Slim - Praise You.mid
Compressing rests: 64 -> 32
Compressing rests: 47 -> 35
Compressing rests: 55 -> 35
Error converting midi to sequence index 127 is out of bounds for axis 2 with size 127
Compressing rests: 95 -> 35
Compressing rests: 64 -> 32
Removed 220 rests from data/midi/v7/

Compressing rests: 134 -> 34
Compressing rests: 126 -> 34
Compressing rests: 86 -> 34
Compressing rests: 87 -> 35
Compressing rests: 46 -> 34
Compressing rests: 138 -> 34
Compressing rests: 58 -> 34
Compressing rests: 64 -> 32
Compressing rests: 144 -> 32
Compressing rests: 144 -> 32
Compressing rests: 45 -> 33
Compressing rests: 37 -> 33
Compressing rests: 43 -> 35
Compressing rests: 42 -> 34
Compressing rests: 40 -> 32
Compressing rests: 139 -> 35
Compressing rests: 69 -> 33
Compressing rests: 188 -> 32
Compressing rests: 61 -> 33
Compressing rests: 134 -> 34
Compressing rests: 61 -> 33
Compressing rests: 61 -> 33
Compressing rests: 63 -> 35
Compressing rests: 128 -> 32
Compressing rests: 183 -> 35
Compressing rests: 55 -> 35
Compressing rests: 55 -> 35
Compressing rests: 106 -> 34
Compressing rests: 55 -> 35
Compressing rests: 48 -> 32
Compressing rests: 57 -> 33
Compressing rests: 46 -> 34
Compressing rests: 165 -> 33
Compressing rests: 44 -> 32
Compressing rests: 86 -> 34
Compress

Compressing rests: 341 -> 33
Compressing rests: 211 -> 35
Compressing rests: 213 -> 33
Compressing rests: 211 -> 35
Compressing rests: 217 -> 33
Compressing rests: 341 -> 33
Compressing rests: 211 -> 35
Compressing rests: 214 -> 34
Compressing rests: 210 -> 34
Compressing rests: 218 -> 34
Compressing rests: 340 -> 32
Compressing rests: 212 -> 32
Compressing rests: 213 -> 33
Compressing rests: 211 -> 35
Compressing rests: 217 -> 33
Compressing rests: 342 -> 34
Compressing rests: 210 -> 34
Compressing rests: 213 -> 33
Compressing rests: 211 -> 35
Compressing rests: 217 -> 33
Compressing rests: 340 -> 32
Compressing rests: 212 -> 32
Compressing rests: 213 -> 33
Compressing rests: 211 -> 35
Compressing rests: 217 -> 33
Compressing rests: 341 -> 33
Compressing rests: 211 -> 35
Compressing rests: 213 -> 33
Compressing rests: 211 -> 35
Compressing rests: 217 -> 33
Compressing rests: 341 -> 33
Compressing rests: 211 -> 35
Compressing rests: 213 -> 33
Compressing rests: 211 -> 35
Compressing re

Compressing rests: 58 -> 34
Error converting midi to sequence cannot reshape array of size 0 into shape (0,newaxis)
Compressing rests: 63 -> 35
Removed 108 rests from data/midi/v7/midi_transform/freemidi/genre-dance-eletric/Vengaboys - Boom Boom Boom.mid
Compressing rests: 58 -> 34
Compressing rests: 60 -> 32
Compressing rests: 128 -> 32
Removed 124 rests from data/midi/v7/midi_transform/freemidi/genre-dance-eletric/Breathe Carolina - No Vacancy.mid
Compressing rests: 113 -> 33
Compressing rests: 248 -> 32
Compressing rests: 252 -> 32
Removed 548 rests from data/midi/v7/midi_transform/freemidi/genre-dance-eletric/Fatboy Slim - Rockafella Skank.mid
Compressing rests: 47 -> 35
Compressing rests: 111 -> 35
Compressing rests: 48 -> 32
Compressing rests: 66 -> 34
Compressing rests: 400 -> 32
Removed 400 rests from data/midi/v7/midi_transform/freemidi/genre-dance-eletric/Prodigy - Breathe.mid
Compressing rests: 640 -> 32
Removed 880 rests from data/midi/v7/midi_transform/freemidi/genre-dance

Compressing rests: 92 -> 32
Removed 108 rests from data/midi/v7/midi_transform/freemidi/genre-pop/David Bowie - Dancing In The Street.mid
Removed 112 rests from data/midi/v7/midi_transform/freemidi/genre-pop/Scorpions - Cant Get Enough.mid
Compressing rests: 48 -> 32
Compressing rests: 288 -> 32
Removed 256 rests from data/midi/v7/midi_transform/freemidi/genre-pop/Idina Menzel - Let it Go 0.2.mid
Compressing rests: 83 -> 35
Removed 112 rests from data/midi/v7/midi_transform/freemidi/genre-pop/Britney Spears - till the world ends.mid
Compressing rests: 383 -> 35
Compressing rests: 255 -> 35
Removed 568 rests from data/midi/v7/midi_transform/freemidi/genre-pop/Toni Braxton - Fable - Dream Version.mid
Compressing rests: 124 -> 32
Compressing rests: 48 -> 32
Compressing rests: 69 -> 33
Compressing rests: 69 -> 33
Removed 104 rests from data/midi/v7/midi_transform/freemidi/genre-pop/Jay-Z - Sunshine.mid
Error converting midi to sequence list index out of range
Compressing rests: 62 -> 34
Co

Removed 176 rests from data/midi/v7/midi_transform/freemidi/genre-pop/Boney M - Marys Boy Child (Oh My Lord).mid
Compressing rests: 63 -> 35
Removed 108 rests from data/midi/v7/midi_transform/freemidi/genre-pop/Vengaboys - Boom Boom Boom.mid
Compressing rests: 124 -> 32
Removed 19952 rests from data/midi/v7/midi_transform/freemidi/genre-pop/Anne Murray - Snowbird.mid
Removed 144 rests from data/midi/v7/midi_transform/freemidi/genre-pop/Oasis - Fuckin In The Bushes.mid
Error converting midi to sequence index 2679 is out of bounds for axis 0 with size 2679
Removed 720 rests from data/midi/v7/midi_transform/freemidi/genre-pop/Corrs - Paddy McCarthy.mid
Error converting midi to sequence list index out of range
Removed 144 rests from data/midi/v7/midi_transform/freemidi/genre-pop/Boney M - Sunny (2).mid
Compressing rests: 94 -> 34
Compressing rests: 81 -> 33
Compressing rests: 95 -> 35
Removed 204 rests from data/midi/v7/midi_transform/freemidi/genre-pop/2 Unlimited - Workaholic.mid
Removed

Compressing rests: 67 -> 35
Compressing rests: 48 -> 32
Compressing rests: 192 -> 32
Compressing rests: 69 -> 33
Removed 212 rests from data/midi/v7/midi_transform/freemidi/genre-pop/Shakira - Waka Waka.mid
Removed 176 rests from data/midi/v7/midi_transform/freemidi/genre-pop/Gloria Estefan - Bad Boy.mid
Compressing rests: 76 -> 32
Compressing rests: 128 -> 32
Removed 176 rests from data/midi/v7/midi_transform/freemidi/genre-pop/Whitney Houston - Free Your Mind.mid
Compressing rests: 48 -> 32
Compressing rests: 52 -> 32
Compressing rests: 108 -> 32
Compressing rests: 244 -> 32
Removed 308 rests from data/midi/v7/midi_transform/freemidi/genre-pop/Bon Jovi - Home Bound Train.mid
Compressing rests: 64 -> 32
Removed 208 rests from data/midi/v7/midi_transform/freemidi/genre-pop/Olivia Newton John - Hopelessly Devoted To You.mid
Removed 112 rests from data/midi/v7/midi_transform/freemidi/genre-pop/Celine Dion - Jattendias.mid
Compressing rests: 48 -> 32
Compressing rests: 128 -> 32
Compressi

Removed 592 rests from data/midi/v7/midi_transform/midiworld/named_midi/Movie_Themes_-_Forrest_Gump.mid
Compressing rests: 61 -> 33
Compressing rests: 112 -> 32
Removed 784 rests from data/midi/v7/midi_transform/midiworld/named_midi/TV_Themes_-_South_Park.mid
Compressing rests: 64 -> 32
Compressing rests: 48 -> 32
Compressing rests: 41 -> 33
Compressing rests: 48 -> 32
Removed 1200 rests from data/midi/v7/midi_transform/midiworld/named_midi/The_Cars_-_Let_the_Good_Time_Roll.mid
Compressing rests: 61 -> 33
Removed 112 rests from data/midi/v7/midi_transform/midiworld/named_midi/REO_Speedwagon_-_In_my_Dreams.mid
Removed 352 rests from data/midi/v7/midi_transform/midiworld/named_midi/Captain_and_Tenille_-_Love_Will_Keep_Us_Together.mid
Error converting midi to sequence 
Compressing rests: 96 -> 32
Removed 112 rests from data/midi/v7/midi_transform/midiworld/named_midi/Village_People_-_In_the_Navy.mid
Compressing rests: 144 -> 32
Removed 128 rests from data/midi/v7/midi_transform/midiworld/

Compressing rests: 96 -> 32
Compressing rests: 128 -> 32
Removed 160 rests from data/midi/v7/midi_transform/midiworld/named_midi/Nightwish_-_Sacrament_Of_Wilderness.mid
Compressing rests: 40 -> 32
Compressing rests: 53 -> 33
Compressing rests: 47 -> 35
Compressing rests: 38 -> 34
Compressing rests: 63 -> 35
Removed 256 rests from data/midi/v7/midi_transform/midiworld/named_midi/TLC_-_Dear_Lie.mid
Compressing rests: 168 -> 32
Removed 152 rests from data/midi/v7/midi_transform/midiworld/named_midi/The_Corrs_-_Erin_Shore.mid
Compressing rests: 38 -> 34
Compressing rests: 41 -> 33
Compressing rests: 47 -> 35
Compressing rests: 44 -> 32
Removed 192 rests from data/midi/v7/midi_transform/midiworld/named_midi/Tears_For_Fears_-_Sowing_the_Seeds_of_Love.mid
Compressing rests: 85 -> 33
Compressing rests: 64 -> 32
Compressing rests: 47 -> 35
Compressing rests: 44 -> 32
Removed 144 rests from data/midi/v7/midi_transform/midiworld/named_midi/Gary_Lewis_and_the_Playboys_-_Shes_Just_My_Style.mid
Comp

Removed 160 rests from data/midi/v7/midi_transform/midiworld/named_midi/ELO_-_Tightrope.mid
Compressing rests: 128 -> 32
Removed 128 rests from data/midi/v7/midi_transform/midiworld/named_midi/The_Beatles_-_Birthday.mid
Compressing rests: 52 -> 32
Compressing rests: 64 -> 32
Compressing rests: 63 -> 35
Compressing rests: 56 -> 32
Compressing rests: 96 -> 32
Compressing rests: 256 -> 32
Compressing rests: 96 -> 32
Removed 480 rests from data/midi/v7/midi_transform/midiworld/named_midi/Marilyn_Manson_-_Deformography.mid
Removed 224 rests from data/midi/v7/midi_transform/midiworld/named_midi/Bjork_-_Bachelorette.mid
Compressing rests: 96 -> 32
Compressing rests: 47 -> 35
Compressing rests: 124 -> 32
Removed 128 rests from data/midi/v7/midi_transform/midiworld/named_midi/Nine_Inch_Nails_-_Gave_Up.mid
Removed 112 rests from data/midi/v7/midi_transform/midiworld/named_midi/Busta_Rhymes_-_Put_Your_Hands_Where_My_Eyes_Could_See.mid
Removed 112 rests from data/midi/v7/midi_transform/midiworld/n

Removed 128 rests from data/midi/v7/midi_transform/midiworld/named_midi/Pat_Benatar_-_Love_Is_A_Battlefield.mid
Compressing rests: 97 -> 33
Compressing rests: 61 -> 33
Compressing rests: 289 -> 33
Compressing rests: 161 -> 33
Compressing rests: 257 -> 33
Removed 144 rests from data/midi/v7/midi_transform/midiworld/named_midi/The_Offspring_-_Change_the_World.mid
Removed 704 rests from data/midi/v7/midi_transform/midiworld/named_midi/Ozzy_Osbourne_-_No_More_Tears.mid
Removed 112 rests from data/midi/v7/midi_transform/midiworld/named_midi/Deep_Purple_-_Highway_Star.mid
Compressing rests: 44 -> 32
Compressing rests: 57 -> 33
Removed 376 rests from data/midi/v7/midi_transform/midiworld/named_midi/Hall_and_Oates_-_I_Cant_Go_for_That.mid
Removed 432 rests from data/midi/v7/midi_transform/midiworld/named_midi/Mike_Oldfield_-_Moonlight_Shadow.mid
Compressing rests: 39 -> 35
Removed 2608 rests from data/midi/v7/midi_transform/midiworld/named_midi/Sam_and_Dave_-_Soul_Man.mid
Compressing rests: 48

Compressing rests: 60 -> 32
Compressing rests: 49 -> 33
Compressing rests: 96 -> 32
Compressing rests: 84 -> 32
Compressing rests: 101 -> 33
Compressing rests: 144 -> 32
Removed 296 rests from data/midi/v7/midi_transform/from_mxl/ecomp/2017/WangH07.mid
Compressing rests: 76 -> 32
Compressing rests: 56 -> 32
Compressing rests: 40 -> 32
Compressing rests: 84 -> 32
Compressing rests: 96 -> 32
Compressing rests: 92 -> 32
Removed 124 rests from data/midi/v7/midi_transform/from_mxl/ecomp/2006/Zusko06.mid
Compressing rests: 76 -> 32
Compressing rests: 60 -> 32
Compressing rests: 64 -> 32
Removed 120 rests from data/midi/v7/midi_transform/from_mxl/ecomp/2006/Hao01.mid
Compressing rests: 78 -> 34
Compressing rests: 67 -> 35
Compressing rests: 39 -> 35
Compressing rests: 40 -> 32
Compressing rests: 48 -> 32
Compressing rests: 44 -> 32
Compressing rests: 112 -> 32
Compressing rests: 56 -> 32
Removed 104 rests from data/midi/v7/midi_transform/from_mxl/ecomp/2006/Zusko05.mid
Compressing rests: 75 -

Compressing rests: 72 -> 32
Compressing rests: 64 -> 32
Removed 200 rests from data/midi/v7/midi_transform/from_mxl/ecomp/2006/Avdeeva06.mid
Compressing rests: 84 -> 32
Compressing rests: 77 -> 33
Compressing rests: 116 -> 32
Compressing rests: 136 -> 32
Removed 104 rests from data/midi/v7/midi_transform/from_mxl/ecomp/2006/Savitski08.mid
Compressing rests: 55 -> 35
Compressing rests: 332 -> 32
Removed 320 rests from data/midi/v7/midi_transform/from_mxl/ecomp/2006/WongDoe05.mid
Compressing rests: 111 -> 35
Compressing rests: 92 -> 32
Compressing rests: 93 -> 33
Removed 196 rests from data/midi/v7/midi_transform/from_mxl/ecomp/2006/Park08.mid
Compressing rests: 40 -> 32
Compressing rests: 48 -> 32
Compressing rests: 56 -> 32
Compressing rests: 40 -> 32
Compressing rests: 236 -> 32
Compressing rests: 212 -> 32
Compressing rests: 264 -> 32
Removed 632 rests from data/midi/v7/midi_transform/from_mxl/ecomp/2004/SCHU07.mid
Compressing rests: 68 -> 32
Compressing rests: 92 -> 32
Compressing r

Compressing rests: 52 -> 32
Compressing rests: 96 -> 32
Compressing rests: 60 -> 32
Compressing rests: 40 -> 32
Compressing rests: 56 -> 32
Compressing rests: 51 -> 35
Compressing rests: 54 -> 34
Compressing rests: 139 -> 35
Removed 124 rests from data/midi/v7/midi_transform/from_mxl/ecomp/2014/WangY03.mid
Compressing rests: 52 -> 32
Compressing rests: 40 -> 32
Compressing rests: 44 -> 32
Compressing rests: 104 -> 32
Compressing rests: 56 -> 32
Compressing rests: 100 -> 32
Removed 292 rests from data/midi/v7/midi_transform/from_mxl/ecomp/2002/victoria07.mid
Compressing rests: 64 -> 32
Compressing rests: 52 -> 32
Compressing rests: 38 -> 34
Compressing rests: 40 -> 32
Compressing rests: 122 -> 34
Compressing rests: 108 -> 32
Compressing rests: 44 -> 32
Compressing rests: 68 -> 32
Compressing rests: 56 -> 32
Compressing rests: 40 -> 32
Compressing rests: 472 -> 32
Compressing rests: 82 -> 34
Compressing rests: 44 -> 32
Removed 500 rests from data/midi/v7/midi_transform/from_mxl/ecomp/201

Compressing rests: 84 -> 32
Compressing rests: 168 -> 32
Compressing rests: 182 -> 34
Compressing rests: 262 -> 34
Compressing rests: 248 -> 32
Compressing rests: 194 -> 34
Compressing rests: 126 -> 34
Compressing rests: 126 -> 34
Compressing rests: 84 -> 32
Compressing rests: 72 -> 32
Removed 1260 rests from data/midi/v7/midi_transform/from_mxl/wikifonia/Luis Miguel - Mexico En La Piel (Trumpet in Bb).mid
Removed 128 rests from data/midi/v7/midi_transform/from_mxl/wikifonia/Taylor Swift - Sparks Fly.mid
Compressing rests: 134 -> 34
Compressing rests: 60 -> 32
Compressing rests: 68 -> 32
Compressing rests: 86304 -> 32
Removed 86272 rests from data/midi/v7/midi_transform/from_mxl/wikifonia/Les Kettley - Don't Ask.mid
Compressing rests: 108 -> 32
Compressing rests: 58 -> 34
Compressing rests: 196 -> 32
Removed 164 rests from data/midi/v7/midi_transform/from_mxl/wikifonia/Bon Jovi - Something for the Pain.mid
Compressing rests: 64 -> 32
Compressing rests: 60 -> 32
Compressing rests: 140 -

In [14]:
tdf = pd.DataFrame(data={out_dir: list(transcribed_files.values())}, index=list(transcribed_files.keys()))

In [15]:
merged_df = df.join(tdf, how='outer'); tdf.shape, df.shape, merged_df.shape

((38792, 1), (39942, 23), (39942, 24))

In [16]:
merged_df.to_csv(out_csv, index=False); merged_df.head()

Unnamed: 0,song_url,instruments,ht_offset,time_signature,title,quarter_length,ht_mode,source,artist,ht_key,...,inferred_offset,genres,section,parts,ht_time_signature,ht_bpm,midi_title,mxl,midi_transform,midi_npz
0,https://www.hooktheory.com/theorytab/view/wayn...,Piano,0.0,3/4,yu-gi-oh-theme-song,36.0,1.0,hooktheory,wayne-sharpe,C,...,0.0,,intro,"intro,chorus",3.0,85.0,yu-gi-oh,,midi_transform/hooktheory/pianoroll/w/wayne-sh...,midi_npz/hooktheory/pianoroll/w/wayne-sharpe/y...
1,https://www.hooktheory.com/theorytab/view/wayn...,"Piano,Piano",0.0,4/4,yu-gi-oh-theme-song,32.0,1.0,hooktheory,wayne-sharpe,C,...,0.0,,chorus,"intro,chorus",4.0,128.0,yu-gi-oh3,,midi_transform/hooktheory/pianoroll/w/wayne-sh...,midi_npz/hooktheory/pianoroll/w/wayne-sharpe/y...
2,https://www.hooktheory.com/theorytab/view/what...,"Piano,Piano",-5.0,4/4,kiefer,16.0,6.0,hooktheory,what-a-day,D,...,0.0,Jazz,chorus,chorus,4.0,96.0,kiefer,,midi_transform/hooktheory/pianoroll/w/what-a-d...,midi_npz/hooktheory/pianoroll/w/what-a-day/kie...
3,https://www.hooktheory.com/theorytab/view/whit...,"Piano,Piano",-5.0,4/4,senbonzakura,32.0,6.0,hooktheory,whiteflame,D,...,-5.0,"J-Pop,Pop",verse,"verse,pre-chorus,chorus",4.0,152.0,Senbonzakura,,midi_transform/hooktheory/pianoroll/w/whitefla...,midi_npz/hooktheory/pianoroll/w/whiteflame/sen...
4,https://www.hooktheory.com/theorytab/view/wham...,"Piano,Piano",-1.0,4/4,last-christmas,32.0,1.0,hooktheory,wham,Db,...,-1.0,Holiday,intro,"intro,verse,chorus",4.0,108.0,Last Christmas Intro,,midi_transform/hooktheory/pianoroll/w/wham/las...,midi_npz/hooktheory/pianoroll/w/wham/last-chri...


### Calculate timesteps

In [17]:
merged_df = pd.read_csv(out_csv)

In [18]:
def calc_timesteps(idxrow):
    idx,row = idxrow
    np_file = row[out_dir]
    if not isinstance(np_file, str) or not (Path(version_path)/np_file).exists(): return idx,None
    try:
#         timesteps = scipy.sparse.load_npz(np_file).shape[0]
        timesteps = load_chordarr(Path(version_path)/np_file).shape[0]
        return idx,timesteps
    except Exception as e:
        print('Error reading text', e, np_file)
    return idx, None

In [19]:
# for r in merged_df.iterrows():
#     calc_timesteps(r)

In [20]:
file2steps = process_parallel(calc_timesteps, merged_df.iterrows(), total=merged_df.shape[0])

In [21]:
len_df = pd.DataFrame(data={f'{out_dir}_timesteps': list(file2steps.values())}, index=list(file2steps.keys()))
merged_len_df = merged_df.join(len_df, how='outer');
len_df.shape, merged_df.shape, merged_len_df.shape

((39942, 1), (39942, 24), (39942, 25))

In [22]:
merged_len_df.to_csv(out_csv, index=False); merged_len_df.head()

Unnamed: 0,song_url,instruments,ht_offset,time_signature,title,quarter_length,ht_mode,source,artist,ht_key,...,genres,section,parts,ht_time_signature,ht_bpm,midi_title,mxl,midi_transform,midi_npz,midi_npz_timesteps
0,https://www.hooktheory.com/theorytab/view/wayn...,Piano,0.0,3/4,yu-gi-oh-theme-song,36.0,1.0,hooktheory,wayne-sharpe,C,...,,intro,"intro,chorus",3.0,85.0,yu-gi-oh,,midi_transform/hooktheory/pianoroll/w/wayne-sh...,midi_npz/hooktheory/pianoroll/w/wayne-sharpe/y...,145.0
1,https://www.hooktheory.com/theorytab/view/wayn...,"Piano,Piano",0.0,4/4,yu-gi-oh-theme-song,32.0,1.0,hooktheory,wayne-sharpe,C,...,,chorus,"intro,chorus",4.0,128.0,yu-gi-oh3,,midi_transform/hooktheory/pianoroll/w/wayne-sh...,midi_npz/hooktheory/pianoroll/w/wayne-sharpe/y...,129.0
2,https://www.hooktheory.com/theorytab/view/what...,"Piano,Piano",-5.0,4/4,kiefer,16.0,6.0,hooktheory,what-a-day,D,...,Jazz,chorus,chorus,4.0,96.0,kiefer,,midi_transform/hooktheory/pianoroll/w/what-a-d...,midi_npz/hooktheory/pianoroll/w/what-a-day/kie...,65.0
3,https://www.hooktheory.com/theorytab/view/whit...,"Piano,Piano",-5.0,4/4,senbonzakura,32.0,6.0,hooktheory,whiteflame,D,...,"J-Pop,Pop",verse,"verse,pre-chorus,chorus",4.0,152.0,Senbonzakura,,midi_transform/hooktheory/pianoroll/w/whitefla...,midi_npz/hooktheory/pianoroll/w/whiteflame/sen...,129.0
4,https://www.hooktheory.com/theorytab/view/wham...,"Piano,Piano",-1.0,4/4,last-christmas,32.0,1.0,hooktheory,wham,Db,...,Holiday,intro,"intro,verse,chorus",4.0,108.0,Last Christmas Intro,,midi_transform/hooktheory/pianoroll/w/wham/las...,midi_npz/hooktheory/pianoroll/w/wham/last-chri...,129.0
