In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install music21==7.1.0

Collecting music21==7.1.0
  Downloading music21-7.1.0.tar.gz (19.2 MB)
[K     |████████████████████████████████| 19.2 MB 3.0 MB/s 
Collecting jsonpickle
  Downloading jsonpickle-2.0.0-py2.py3-none-any.whl (37 kB)
Collecting webcolors>=1.5
  Downloading webcolors-1.11.1-py3-none-any.whl (9.9 kB)
Building wheels for collected packages: music21
  Building wheel for music21 (setup.py) ... [?25l[?25hdone
  Created wheel for music21: filename=music21-7.1.0-py3-none-any.whl size=21912606 sha256=9b3289135d017981a716556344c94ef1549fbd561e8a90764fba09c63d49064b
  Stored in directory: /root/.cache/pip/wheels/98/3c/67/9e30a8a4c3ac934068c95d2c27e38e1b5e5016a9257dbbf5d0
Successfully built music21
Installing collected packages: webcolors, jsonpickle, music21
  Attempting uninstall: music21
    Found existing installation: music21 5.5.0
    Uninstalling music21-5.5.0:
      Successfully uninstalled music21-5.5.0
Successfully installed jsonpickle-2.0.0 music21-7.1.0 webcolors-1.11.1


In [None]:
import zipfile
import os 
import sys
from tqdm import tqdm
from music21 import converter, instrument, note, chord
import music21
import numpy as np
from imageio import imwrite
from os import path
from collections import Counter

In [None]:
print(music21.__version__)

7.1.0


In [None]:
# in_file = "/content/drive/MyDrive/CS236/CS236_final_project/giant_midi/midis_v1.1.zip"
# out_file = "/content/drive/MyDrive/CS236/CS236_final_project/giant_midi_out_v2"

# with zipfile.ZipFile(in_file, 'r') as zip_ref:
#     zip_ref.extractall(out_file)

In [None]:
def get_midi_images(input_dir_name, output_dir_name):    
    images = []
    files = os.listdir(input_dir_name)
    print(files)
    for midi in tqdm(files):
        try:
            out = midi2image(os.path.join(input_dir_name,midi), output_dir_name)
            images += out
        except:
            print(os.path.join(input_dir_name,midi))
            
    
    return images

In [None]:
def get_name(og_path, num_composer, num_in_song, num_overall):
  idx = og_path.rfind("/")
  og_path = og_path[idx+1:].lower()
  split_path = og_path.split(" ")

  combined = "".join(split_path[:-1])
  combined = combined.replace(",", "_")

  combined += f"{num_composer}_{num_in_song}_{num_overall}.png"

  return combined

In [None]:
def get_composer(path):

  composer = path.lower().split(" ")
  composer = composer[0][:-1]

  return composer

In [None]:
name = get_name("Bruch, Max, 2 Klavierstücke, Op.14, VjU4n7Gfulw.mid", 1, 2, 3)
print(name)

bruch_max_2klavierstücke_op.14_1_2_3.png


In [None]:
def extractNote(element):
    return int(element.pitch.ps)

def extractDuration(element):
    return element.duration.quarterLength

def get_notes(notes_to_parse):

    """ Get all the notes and chords from the midi files in the ./midi_songs directory """
    durations = []
    notes = []
    start = []
    for element in notes_to_parse:
        if isinstance(element, note.Note):
            if element.isRest:
                continue

            start.append(element.offset)
            notes.append(extractNote(element))
            durations.append(extractDuration(element))
                
        elif isinstance(element, chord.Chord):
            if element.isRest:
                continue
            for chord_note in element.notes:
                start.append(element.offset)
                durations.append(extractDuration(element))
                notes.append(extractNote(chord_note))

    return {"start":start, "pitch":notes, "dur":durations}


def midi2image(midi_path, output_dir, composer_count, overall_count, max_repetitions = float("inf"), resolution = 0.25, lowerBoundNote = 31, upperBoundNote = 95, maxSongLength = 64):
    # print("HELLO")
    mid = converter.parse(midi_path)
    images = []
    # print("HERE")

    instruments = instrument.partitionByInstrument(mid)
    # print(mid)
    # print(instruments)

    data = {}

    if instruments is None:
      notes_to_parse = mid.flat.notes
      data["instrument_0"] = get_notes(notes_to_parse)
    else:

      try:
          i=0
          for instrument_i in instruments.parts:
              notes_to_parse = instrument_i.recurse()

              notes_data = get_notes(notes_to_parse)
              if len(notes_data["start"]) == 0:
                  continue

              if instrument_i.partName is None:
                  data["instrument_{}".format(i)] = notes_data
                  i+=1
              else:
                  data[instrument_i.partName] = notes_data

      except:
          notes_to_parse = mid.flat.notes
          data["instrument_0"] = get_notes(notes_to_parse)

        
    
    for instrument_name, values in data.items():
        # https://en.wikipedia.org/wiki/Scientific_pitch_notation#Similar_systems

        pitches = values["pitch"]
        durs = values["dur"]
        starts = values["start"]

        # print(min(pitches))
        # print(max(pitches))

        index = 0
        while index < max_repetitions:
            matrix = np.zeros((upperBoundNote-lowerBoundNote,maxSongLength))


            for dur, start, pitch in zip(durs, starts, pitches):
                dur = int(dur/resolution)
                start = int(start/resolution)

                if not start > index*(maxSongLength+1) or not dur+start < index*maxSongLength:
                    for j in range(start,start+dur):
                        if j - index*maxSongLength >= 0 and j - index*maxSongLength < maxSongLength:
                            if pitch-lowerBoundNote >= 64:
                                continue
                            matrix[pitch-lowerBoundNote,j - index*maxSongLength] = 255

            if matrix.any(): # If matrix contains no notes (only zeros) don't save it
                images.append(matrix)
                out_name = get_name(midi_path, composer_count, index, overall_count)
                write_path = out_name
                imwrite(os.path.join(output_dir, out_name), matrix.astype(np.uint8))
                index += 1
            else:
                break
        
    return images

In [None]:
num_images = 0
overall_count = 0
max_images = 60000
composer_count = Counter()
output_dir = "drive/MyDrive/CS236/CS236_final_project/giant_midi_out/midi_images_res_0.25"
input_dir = "drive/MyDrive/CS236/CS236_final_project/giant_midi_out/midis_v1.1"
stem = ""
overflow_dir = "drive/MyDrive/CS236/CS236 Final Project/giant_midi_out/overflow_res_0.25"


# path = "A., Jag, Je t'aime Juliette, OXC7Fd0ZN8o.mid"
pbar = tqdm(os.listdir(input_dir))
# for char in pbar:
#     pbar.set_description("Processing %s" % char)
for path in pbar:
  pbar.set_description(f"num processed: {num_images}")
  composer = get_composer(path)
  composer_count[composer] += 1
  overall_count += 1

  if num_images > max_images:
    imgs = midi2image(os.path.join(input_dir, path), overflow_dir, composer_count[composer], overall_count, resolution=0.25)
  else:
    imgs = midi2image(os.path.join(input_dir, path), output_dir, composer_count[composer], overall_count, resolution=0.25)
  
  num_images += len(imgs)

num processed: 60201:  13%|█▎        | 1392/10701 [9:27:35<63:15:44, 24.46s/it]


FileNotFoundError: ignored