In [2]:
import os
import numpy as np
import muspy
from tqdm import tqdm
import config
import utils

config_string = "standard"

conf = config.Config(config_string)

# muspy.download_musescore_soundfont()


N_CPUS = os.cpu_count()
ROOT_PATH = os.path.abspath(os.path.join(os.getcwd(), os.pardir))

# Warning, ~ 10 hours required

In [3]:
DATASET_PATH = os.path.join(ROOT_PATH, "data/lmd/")

if not os.path.exists(DATASET_PATH):
    os.mkdir(DATASET_PATH)
    
if len(os.listdir(DATASET_PATH)) == 0:
    lmd = muspy.LakhMIDIDataset(DATASET_PATH, download_and_extract=True, convert=True, n_jobs=N_CPUS)
else:
    lmd = muspy.LakhMIDIDataset(DATASET_PATH)

---
## Data exploration

In [None]:
# print(len(lmd[1]))
# print(np.unique(list(track.program for track in lmd[1].tracks)))
for i in range(10000):
    if len(k_sings := lmd[i].key_signatures) > 1: 
        print(k_sings)

# print(i)
# print(lmd[i])

# muspy.write_audio(os.path.join(ROOT_PATH, "data/songs"), lmd[19])

In [None]:
((time_s.numerator, time_s.denominator) for time_s in lmd[7].time_signatures)

for i in range(100):
    print("----")
    for tims_s in lmd[i].time_signatures:
        print((tims_s.numerator, tims_s.denominator))
    
# time_signatures = np.unique(((time_s.numerator, time_s.denominator) for time_s in lmd[7].time_signatures)) 
# time_signatures

In [7]:
lmd[0]

Music(metadata=Metadata(schema_version='0.2', title='Perspection (Solar Scape Remix)', source_filename='00000ec8a66b6bd2ef809b0443eeae41.mid', source_format='midi'), resolution=480, tempos=[Tempo(time=0, qpm=148.000148000148)], key_signatures=[KeySignature(time=0, root=0, mode='major')], time_signatures=[TimeSignature(time=0, numerator=4, denominator=4)], tracks=[Track(program=0, is_drum=False, notes=[Note(time=0, pitch=78, duration=120, velocity=127), Note(time=120, pitch=66, duration=120, velocity=127), Note(time=240, pitch=82, duration=120, velocity=127), ...])])

In [8]:
print(lmd[0].tempos)
print(lmd[0].key_signatures)
print(lmd[0].time_signatures)

[Tempo(time=0, qpm=148.000148000148)]
[KeySignature(time=0, root=0, mode='major')]
[TimeSignature(time=0, numerator=4, denominator=4)]


### What are annotations???
https://salu133445.github.io/muspy/doc/muspy.html#muspy.Annotation

In [21]:
lmd[3].tracks[0].annotations

[Annotation(time=482, annotation={'number': 121, 'value': 0}, group='control_change'),
 Annotation(time=486, annotation={'number': 10, 'value': 64}, group='control_change'),
 Annotation(time=490, annotation={'number': 93, 'value': 56}, group='control_change'),
 Annotation(time=493, annotation={'number': 91, 'value': 0}, group='control_change'),
 Annotation(time=497, annotation={'number': 11, 'value': 127}, group='control_change'),
 Annotation(time=502, annotation={'number': 7, 'value': 100}, group='control_change'),
 Annotation(time=510, annotation={'number': 0, 'value': 0}, group='control_change')]

In [22]:
for i in range(10):
    for j, track in enumerate(lmd[i].tracks):
        if track.is_drum:
            print(i, j)
            break

3 9
4 4
6 8
8 8


In [26]:
lmd[3].tracks[9]

Track(program=0, is_drum=True, notes=[Note(time=18480, pitch=40, duration=1, velocity=119), Note(time=18700, pitch=30, duration=156, velocity=93), Note(time=18720, pitch=40, duration=1, velocity=127), ...], annotations=[Annotation(time=806, annotation={'number': 121, 'value': 0}, group='control_change'), Annotation(time=810, annotation={'number': 93, 'value': 10}, group='control_change'), Annotation(time=813, annotation={'number': 91, 'value': 82}, group='control_change'), ...])

In [25]:
set(note.pitch for note in lmd[4].tracks[4].notes)

{3, 36, 40, 41, 42, 45, 46, 47, 48, 49}

In [46]:
lmd[3].tracks[9].notes[0]

Note(time=18480, pitch=40, duration=1, velocity=119)

In [54]:
song = lmd[3]
notes = np.zeros((
    np.sum([len(track) for track in song.tracks]), 
    4
))

i = 0
for track in song.tracks:
    if track.is_drum:
        for note in track.notes:
            notes[i, 0] = note.time
            notes[i, 1] = note.pitch + 128
            notes[i, 2] = note.duration
            notes[i, 3] = note.velocity
            i+=1
    else:
        for note in track.notes:
            notes[i, 0] = note.time
            notes[i, 1] = note.pitch
            notes[i, 2] = note.duration
            notes[i, 3] = note.velocity
            i+=1

# notes = sorted(notes, key = lambda x: x[0])
notes = notes[notes[:,0].argsort()]
print(notes)

[[2.64000e+03 8.80000e+01 1.19200e+03 6.50000e+01]
 [3.84000e+03 6.70000e+01 1.91600e+03 9.30000e+01]
 [3.84000e+03 6.30000e+01 5.96000e+02 1.20000e+02]
 ...
 [1.57443e+05 5.50000e+01 1.91600e+03 8.10000e+01]
 [1.57463e+05 5.80000e+01 1.91600e+03 7.60000e+01]
 [1.57483e+05 6.30000e+01 1.91600e+03 1.11000e+02]]


In [None]:
def transform_representation(song: muspy.music.Music, config: config.Config, verbose=0):

    '''
    This function accepts as input a song in the muspy format and transforms it in a series of tuples written in the following way

                            Description                                                     Interval                                                Possible values
    (
        type,               # see below -->                                                 [0, 7] -->                                              8
        measure,            # index of the measure inside the song in interval -->          [0, last_measure] -->                                   512?
        beat,               # index of beat inside measure -->                              [0, numerator of time_signature] -->                    ??
        position,           # index with 1/64 beat length granularity -->                   [0, 63/64] -->                                          64
        duration,           # hierarchical structure? -->                                   ??? better to specify after dataset exploration                                  
        pitch,              # height of pitch (128) + drums (another 128) -->               [0, 255] -->                                            256
        instrument_type,    # 128 instrument types + 1 for drums -->                        [0, 128] -->                                            129
        n_instrument,       # same instrument twice in the song for multiple voices -->     [0, 7??] -->                                            8
        velocity,           # amplitude of note, strength of play -->                       [0, 127] -->                                            128
        key_sign,           # [0,11] (all possible notes) and [maj,min] -->                 [0, 23] -->                                             24
        time_sign,          # denominator pow(2) in [1,64] and numerator int in [0,128] --> ??? better to specify after dataset exploration
        tempo,              # qpm, geometric progression from 16 to 256 -->                 [0, 48] -->                                             49
    )

    type:
        0: start of song
        1: new instrument
        2: start of events
        3: note
        4: key_signature change event
        5: time_signature change event
        6: tempo change event
        7: end of song


    if type = 0 --> all values 0
    if type = 1 --> only instrument_type must be specified (and n_instrument is 1 bigger than the previous identical instrument defined) (other values are 0)
    if type = 2 --> all values 0

    then, before ANY type = 3, MUST FOLLOW at least one of each:
        type = 4 --> only key_sign (other values are 0 except measure)
        type = 5 --> only time_sign (other values are 0 except measure)
        type = 6 --> only tempo (other values are 0 except measure)

    if type = 3 --> all values are full, and key_sign, time_sign and tempo are identical to the last 4, 5 or 6 respectively
    if type = 7 --> all values 0, end of the representation


    '''
    
    # list of all notes/events
    final_song = []

    events = []

    # record time signatures (the first is the "current" and influences notes immediately following, the others will influence 
    # the song later and are appended into "events", where time_sign, key_sign and tempo changes are all stored)

    for i, key_sign in enumerate(song.key_signatures):
        if i == 0:
            current_key_sign = (key_sign.key, key_sign.major_minor)
            final_song.append(utils.key_sign_repr(current_key_sign, measure = 0, config = conf))
        else:
            events.append((key_sign.time, "key_sign", key_sign.key, key_sign.major_minor))

    for i, time_sign in enumerate(song.time_signatures):
        if i == 0:
            current_time_sign = (time_sign.numerator, time_sign.denominator)
            final_song.append(utils.time_sign_repr(current_time_sign, measure = 0, config = conf))
        else:
            events.append((time_sign.time, "time_sign", time_sign.numerator, time_sign.denominator))

    for i, tempo in enumerate(song.beats):
        if i == 0:
            current_tempo = tempo.qpm
            final_song.append(utils.tempo_repr(current_tempo, measure = 0, config = conf))
        else:
            events.append((tempo.time, "tempo", tempo.qpm))

    # sort events by timestep
    events.sort(key = lambda x: x[0])

    # get all the notes from the song
    notes = np.zeros((
        np.sum([len(track) for track in song.tracks]), 
        4
    ))
    
    i = 0
    for track in song.tracks:
        if track.is_drum:
            for note in track.notes:
                notes[i, 0] = note.time
                notes[i, 1] = note.pitch + 128
                notes[i, 2] = note.duration
                notes[i, 3] = note.velocity
                i+=1
        else:
            for note in track.notes:
                notes[i, 0] = note.time
                notes[i, 1] = note.pitch
                notes[i, 2] = note.duration
                notes[i, 3] = note.velocity
                i+=1

    # sort them by the 0th column, the time
    notes = notes[notes[:,0].argsort()]

    resolution = song.resolution

    # n° of timesteps for each measure = resolution * denominator of time_sign
    current_measure_length = current_time_sign[1] * resolution                      
    
    assert current_measure_length % current_time_sign[0] == 0, "It doesn't work --> the resolution means something else"

    # n° of timesteps for each beat = measure / numerator of time_sign
    current_beat_length = current_measure_length / current_time_sign[0]                     

    current_settings = {
        "key_sign": current_key_sign,       # tuple with (key, major/minor)
        "time_sign": current_time_sign,     # tuple (nominator, denominator)
        "tempo": current_tempo,             # float
        "resolution": resolution,           # float
        "measure": current_measure_length,  # float
        "beat": current_beat_length         # float
    }


    # remember at which note we stopped changing representation
    current_note_idx = 0

    # also remember the number of the measure
    current_measure_index = 0

    # t = timestep --> differs for each song based on the resolution
    # remember also the timestep at which we stopped
    current_time = 0

    while i < len(events):
        # add notes in between events --> the current configuration is important to define to which measure/beat they belong to
        event = events[i]
        # the dataset is not clean, sometimes it happens that the same event is repeated twice or more --> we want to make changes only when a NEW event occurs
        flag_new_event = False
        new_settings = current_settings
        
        if event[1] == "key_sign":
            if event[2] != current_settings["key_sign"][0] and event[3] != current_settings["key_sign"][1]:
                new_settings["key_sign"] = (event[2], event[3]) # numerator, denominator
                new_settings["measure"] = event[3] * resolution
                new_settings["beat"] = new_settings["measure"] / event[2]

                tmp = utils.key_sign_repr(new_settings["key_sign"])
                flag_new_event = True

        if event[1] == "time_sign":
            if event[2] != current_settings["time_sign"][0] and event[3] != current_settings["time_sign"][1]:
                new_settings["time_sign"] = (event[2], event[3]) # note, major/minor
                tmp = utils.time_sign_repr(new_settings["time_sign"])
                flag_new_event = True
        
        if event[1] == "tempo":
            if event[2] != current_settings["tempo"]:
                new_settings["time_sign"] = event[2] # qpm
                tmp = utils.tempo_repr(new_settings["time_sign"])
                flag_new_event = True


        if flag_new_event:
            
            assert (delta_t := (event[0]-t) % current_measure_length) == 0, "The MIDI or the algorithm are wrong, events should happen only at the beginning of measures"

            # if the event happens in the middle of a beat because midi is "wrong" --> move the event to the beginning of THAT measure (not the following one)
            time_interval = current_time - (event[0] - delta_t)

            # shouldn't do anything if there are no notes between current time and t+delta_t
            final_song, current_measure_index = utils.add_notes( 
                final_song,
                current_settings, 
                current_time,
                current_measure_index,
                time_interval,
                [notes],
                conf
            )

            t = t + 
            
            current_settings = new_settings

        final_song.append(tmp)

        i+=1


    final_song, _ = add_notes(current_settings, current_note_idx, song_finish) # should append every note between current note and note with timestep "song_finish", should return index of last note added 


    next_event = 

    current_measure_length = resolution * 


    time_signatures = np.unique(list((time_s.numerator, time_s.denominator) for time_s in lmd[i].time_signatures)) 


In [None]:
stats = {
    "n_tracks" : np.zeros(1000),            # how many instruments are playing in a song (can also be 2 of the same instrument)
    "programs" : np.zeros(128),             # which type of instrument
    "velocity" : np.zeros(128),             # how loud (0-127)
    "durations": np.zeros(100000),          # how many quarters does the note last(?)
    "tempos": {},                           # how fast is the song in qpm (quarters per minute) --> can be non integer
    "time_signatures": {},                  # 4/4 or 3/4 etc... store it as a dict
    "key_signatures": np.zeros(100),        # which is the main key
    "key_mode": {"major": 0 , "minor":0}    # if main key is major or minor
}

for song in tqdm(lmd):
    stats["n_tracks"][len(song)] += 1                                                   # how many tracks are in the song (tracks = different "instrument voices" but could be 4 pianos)
    
    programs = np.unique(list(track.program for track in song.tracks))                  # how many different instruments and which
    for program in programs:
        stats["programs"][program] += 1

             
    tempos = np.unique(list(tempo.qpm for tempo in lmd[i].tempos))                      # stats on tempos (maybe not useful, but what do we do if tempo changes mid song?)

    for tempo in tempos:
        if tempo in stats["tempos"].keys():
            stats["tempos"][tempo] += 1
        else:
            stats["tempos"][tempo] = 1


                                                                                        # stats on time_signatures --> need to know which exist / are used to create possible tokens
    time_signatures = np.unique(list((time_s.numerator, time_s.denominator) for time_s in lmd[i].time_signatures)) 

    for time_s in time_signatures:
        if time_s in stats["time_signatures"].keys():
            stats["time_signatures"][time_s] += 1
        else:
            stats["time_signatures"][time_s] = 1


    key_signatures = song.key_signatures


    notes = np.concatenate([track.notes for track in song.tracks if len(track) > 0])    # stats on notes
    
    for duration in np.unique(list(note.duration for note in notes)):
        stats["durations"][duration] += 1
    

    

