In [None]:
import os
import pickle
import numpy
import glob
import time

import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.utils import plot_model
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.layers import Input, Conv2D, Flatten, Dense, Conv2DTranspose, Reshape, Lambda, Activation, BatchNormalization, LeakyReLU, Dropout, ZeroPadding2D, UpSampling2D
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras import backend as K

import math
import matplotlib.pyplot as plt

import pypianoroll
import numpy as np
from pypianoroll import Multitrack, Track

In [None]:
music_name = 'midis/'

# Number of timestept the slices Pianorolls should have (Needs to be dividable by 16)
pianrollLength = 128

data_folder = os.path.join('data', music_name)

# Number of different Notes between highest and lowest Note
minNumDifferentNotes = 5
# Set Note bounds for faster training
lowestNotePossible = 20
highestNotePossible = 108
# possibleNotes mus be dividable by 4 else the Architekture needs to be changed
possibleNotes = highestNotePossible - lowestNotePossible

In [None]:
# Returns a list of all the midi Files in the given data_folder and its subfolders
def get_Music_List(directory):
    global music_list
    music_list = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(".mid"):
                music_list.append(os.path.join(root, file))
                #print(os.path.join(root, file))               
                #(Quelle: https://stackoverflow.com/questions/3964681/find-all-files-in-a-directory-with-extension-txt-in-python)
    return music_list

In [None]:
music_list = get_Music_List(data_folder)
#print(music_list)
#pypianoroll = Multitrack(music_list[0])

In [None]:
isOnlyMonoTrackMode = True
pianoRolls = []
    
music_list = get_Music_List(data_folder)
print(len(music_list), 'files in total')

for i, file in enumerate(music_list):
    #print(i+1, "Parsing %s" % file)
    pypianoroll = Multitrack(file)
    ## Filter out multitrack Songs
    if len(pypianoroll.tracks) > 1:
        continue
    #pypianoroll.binarize()
    #print("Parsed succesfully")
    pianoRolls.append(pypianoroll)
print(len(pianoRolls))

In [None]:
pianoRolls[13].tracks[0].plot()

In [None]:
tmpPianoRolls = []
tmpSplitted = [] #numpy.empty((512,128,1))

pianoTracks = []
for pianoroll in pianoRolls:
    for track in pianoroll.tracks:
        # Filter to get only Piano Tracks
        if track.program <= 7:
            # Skip the track if only one note is played
            lowest, highest = track.get_active_pitch_range()
            # Remove all Tracks that don't fullfill Minimum different Notes Numbers
            if (highest - lowest < (minNumDifferentNotes - 1)):
                #print('Skipped only ' + str(minNumDifferentNotes - 1) + ' note(s)')
                continue
            if lowest < lowestNotePossible or highest > highestNotePossible:
                continue
            # pianoroll: np.ndarray, shape=(num_time_step, 128)
            #pianoTracks.append(track)
            
            npPianoroll = np.asarray(track.pianoroll)
            all_zeroes = not npPianoroll.any()
            if (all_zeroes):
                #print('Completely empty Pianoroll!')
                continue
            print('\n' + str(npPianoroll.shape))
            count = int(math.ceil(len(npPianoroll)/pianrollLength))
            for i in range(count):
                # Get the parts of the Pianoroll
                array1 = npPianoroll[i*pianrollLength:(i+1)*pianrollLength , lowestNotePossible:highestNotePossible]
                # Test if splitted Array is empty. If it is empty then it is skipped
                all_zeroes = not array1.any()
                if (all_zeroes):
                    #print('Empty Pianoroll!')
                    continue
                # If it is the last pianoroll Part then test if it its length is smaller then 
                # 80% of pianorollLength then it is skipped else pad it with zeroes
                if (i == (count -1)):
                    if(len(array1) > (pianrollLength * 0.8)):
                        
                        # Pad the array with zeroes to the needed shape
                        result = np.zeros((pianrollLength, possibleNotes))
                        result[:array1.shape[0],:array1.shape[1]] = array1
                        result.reshape(pianrollLength , possibleNotes, 1)
                        ###
                        result.clip(lowestNotePossible,highestNotePossible)
                        ###
                        tmpSplitted.append(result)
                else:
                    array1.reshape(pianrollLength , possibleNotes, 1)
                    array1.clip(lowestNotePossible,highestNotePossible)
                    tmpSplitted.append(array1)
            #print('Low: ' + str(lowest) + ' High: ' + str(highest))
        else:
            # Track is not a Piano Track
            #print('Skipped entirely')
            continue
# Quelle: https://stackoverflow.com/a/56888143/9179624 (Getting parts of Array)
# Quelle: https://stackoverflow.com/a/23567941/9179624 (Test if array is empty)
# Quelle: https://stackoverflow.com/a/35751427/9179624 (Padding of Arrays)

In [None]:
np.asarray(tmpSplitted).shape

In [None]:
# Instead of values from 0 to 1 there are now only values of 0 or 1 in the training set
newPianorolls = []
for data in tmpSplitted:
    repianoroll = data.reshape(pianrollLength,88)
    #print(repianoroll)
    result = (repianoroll > 10)*1
    #print(result)
    newPianorolls.append(result.reshape(pianrollLength,88,1))
print(np.asarray(newPianorolls).shape)

In [None]:
reshaped = np.asarray(newPianorolls)

In [None]:
#import sys
#with open('out.txt', 'w') as f:
#    numpy.set_printoptions(threshold=sys.maxsize)
#    print(tmpSplitted[186], file=f)  

In [None]:
# Clear list for more ram Space
tmpSplitted = []
splittedPianorolls = []
newPianorolls = []
# Save the numpyArray for further use
np.save(os.path.join('data/preprocessed/midi_binarized_p'+str(pianrollLength)+'_dn88.npy'),reshaped)