In [None]:
import os
import numpy as np
import muspy
from tqdm import tqdm

# muspy.download_musescore_soundfont()


N_CPUS = os.cpu_count()
ROOT_PATH = os.path.abspath(os.path.join(os.getcwd(), os.pardir))

# Warning, ~ 10 hours required

In [None]:
DATASET_PATH = os.path.join(ROOT_PATH, "data/lmd/")

if not os.path.exists(DATASET_PATH):
    os.mkdir(DATASET_PATH)
    
if len(os.listdir(DATASET_PATH)) == 0:
    lmd = muspy.LakhMIDIDataset(DATASET_PATH, download_and_extract=True, convert=True, n_jobs=N_CPUS)
else:
    lmd = muspy.LakhMIDIDataset(DATASET_PATH)

---
## Data exploration

In [None]:
# print(len(lmd[1]))
# print(np.unique(list(track.program for track in lmd[1].tracks)))
for i in range(10000):
    if len(k_sings := lmd[i].key_signatures) > 1: 
        print(k_sings)

# print(i)
# print(lmd[i])

# muspy.write_audio(os.path.join(ROOT_PATH, "data/songs"), lmd[19])

In [None]:
((time_s.numerator, time_s.denominator) for time_s in lmd[7].time_signatures)

for i in range(100):
    print("----")
    for tims_s in lmd[i].time_signatures:
        print((tims_s.numerator, tims_s.denominator))
    
# time_signatures = np.unique(((time_s.numerator, time_s.denominator) for time_s in lmd[7].time_signatures)) 
# time_signatures

In [None]:
lmd[19].beats

In [None]:
def transform_representation(song: muspy.music.Music, verbose=0):

    '''
    This function accepts as input a song in the muspy format and transforms it in a series of tuples written in the following way

                            Description                                                     Interval                                                Possible values
    (
        type,               # see below -->                                                 [0, 7] -->                                              8
        measure,            # index of the measure inside the song in interval -->          [0, last_measure] -->                                   512?
        beat,               # index of beat inside measure -->                              [0, numerator of time_signature] -->                    ??
        position,           # index with 1/64 beat length granularity -->                   [0, 63/64] -->                                          64
        duration,           # hierarchical structure? -->                                   ??? better to specify after dataset exploration                                  
        pitch,              # height of pitch (128) + drums (another 128) -->               [0, 255] -->                                            256
        instrument_type,    # 128 instrument types + 1 for drums -->                        [0, 128] -->                                            129
        n_instrument,       # same instrument twice in the song for multiple voices -->     [0, 7??] -->                                            8
        velocity,           # amplitude of note, strength of play -->                       [0, 127] -->                                            128
        key_sign,           # [0,11] (all possible notes) and [maj,min] -->                 [0, 23] -->                                             24
        time_sign,          # denominator pow(2) in [1,64] and numerator int in [0,128] --> ??? better to specify after dataset exploration
        tempo,              # qpm, geometric progression from 16 to 256 -->                 [0, 48] -->                                             49
    )

    type:
        0: start of song
        1: new instrument
        2: start of events
        3: note
        4: key_signature change event
        5: time_signature change event
        6: tempo change event
        7: end of song

    if type = 0 --> all values 0
    if type = 1 --> only instrument_type must be specified (and n_instrument is 1 bigger than the previous identical instrument defined) (other values are 0)
    if type = 2 --> all values 0

    then, before ANY type = 3, MUST FOLLOW at least one of each:
        type = 4 --> only key_sign (other values are 0)
        type = 5 --> only time_sign (other values are 0)
        type = 6 --> only tempo (other values are 0)

    if type = 3 --> all values are full, and key_sign, time_sign and tempo are identical to the last 4, 5 or 6 respectively
    if type = 7 --> all values 0, end of the representation
    '''
    
    final_song = []

    t = 0
    events = []
    n_battuta = 0


    for t_sign, i in enumerate(song.time_signatures):
        if i == 0:
            current_t_sign = (t_sign.numerator, t_sign.denominator)
        else:
            events.append((t_sign.timestep, "t_sign", t_sign.numerator, t_sign.denominator))

    for beat, i in enumerate(song.beats):
        if i == 0:
            current_beat = beat.qpm
        else:
            events.append((beat.timestep, "beat", beat.qpm))

    for k_sign, i in enumerate(song.key_signatures):
        if i == 0:
            current_k_sign = (k_sign.key, k_sign.major_minor)
        else:
            events.append((k_sign.timestep, "k_sign", k_sign.key, k_sign.major_minor))

    events.sort(key = lambda x: x[0])


    notes = np.concatenate([track.notes for track in song.tracks if len(track) > 0])    # stats on notes

    resolution = song.resolution
    length_battuta = time_signatures[idx_t_sing].denominator * resolution
    length_beat = length_battuta / time_signatures[idx_t_sing].numerator

    current_settings = (
        current_t_sign,
        current_beat,
        current_k_sign
    )

    current_note_idx = 0

    while i < len(events):
        event = events[i]
        flag_new_event = False
        
        if event[1] == "t_sign":
            if event[2] != current_t_sign[0] and event[3] != current_t_sign[1]:
                current_t_sign = (event[2], event[3])
                final_song.append(current_t_sign)   # WRITE CORRECT REPRESENTATION
                flag_new_event = True
        
        if event[1] == "beat":
            if event[2] != current_beat:
                current_beat = event[2]
                final_song.append(current_beat) ############
                flag_new_event = True

        if event[1] == "k_sign":
            if event[2] != current_k_sign[0] and event[3] != current_k_sign[1]:
                current_k_sign = (event[2], event[3])
                final_song.append(current_k_sign) #########
                flag_new_event = True

        if flag_new_event:
            
            delta_t = event[0] - ((event[0]-t) % length_battuta) # events can only happen at the beginning of a battuta
            # if the event happens in the middle of a beat because midi is "wrong" --> move the event to the beginning of THAT battuta (not the following one)

            final_song, current_note_idx = add_notes( # shouldn't do anything if there are no notes between current time and t+delta_t
                current_settings, 
                current_note_idx, 
                
            )

            t = t + 
            
            current_settings = (
                current_t_sign,
                current_beat,
                current_k_sign
            )

        i+=1


    final_song, _ = add_notes(current_settings, current_note_idx, song_finish) # should append every note between current note and note with timestep "song_finish", should return index of last note added 


    next_event = 

    length_battuta = resolution * 


    time_signatures = np.unique(list((time_s.numerator, time_s.denominator) for time_s in lmd[i].time_signatures)) 


In [None]:
stats = {
    "n_tracks" : np.zeros(1000),            # how many instruments are playing in a song (can also be 2 of the same instrument)
    "programs" : np.zeros(128),             # which type of instrument
    "velocity" : np.zeros(128),             # how loud (0-127)
    "durations": np.zeros(100000),          # how many quarters does the note last(?)
    "tempos": {},                           # how fast is the song in qpm (quarters per minute) --> can be non integer
    "time_signatures": {},                  # 4/4 or 3/4 etc... store it as a dict
    "key_signatures": np.zeros(100),        # which is the main key
    "key_mode": {"major": 0 , "minor":0}    # if main key is major or minor
}

for song in tqdm(lmd):
    stats["n_tracks"][len(song)] += 1                                                   # how many tracks are in the song (tracks = different "instrument voices" but could be 4 pianos)
    
    programs = np.unique(list(track.program for track in song.tracks))                  # how many different instruments and which
    for program in programs:
        stats["programs"][program] += 1

             
    tempos = np.unique(list(tempo.qpm for tempo in lmd[i].tempos))                      # stats on tempos (maybe not useful, but what do we do if tempo changes mid song?)

    for tempo in tempos:
        if tempo in stats["tempos"].keys():
            stats["tempos"][tempo] += 1
        else:
            stats["tempos"][tempo] = 1


                                                                                        # stats on time_signatures --> need to know which exist / are used to create possible tokens
    time_signatures = np.unique(list((time_s.numerator, time_s.denominator) for time_s in lmd[i].time_signatures)) 

    for time_s in time_signatures:
        if time_s in stats["time_signatures"].keys():
            stats["time_signatures"][time_s] += 1
        else:
            stats["time_signatures"][time_s] = 1


    key_signatures = song.key_signatures


    notes = np.concatenate([track.notes for track in song.tracks if len(track) > 0])    # stats on notes
    
    for duration in np.unique(list(note.duration for note in notes)):
        stats["durations"][duration] += 1
    

    

