In [7]:
from datetime import datetime
import itertools
import json
import os
import shutil

from keras import backend as K
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
from tensorflow.keras.optimizers import RMSprop
import numpy as np

from data import *
from midi_util import array_to_midi, print_array

In [8]:
# All the pitches represented in the MIDI data arrays.
# directory.
PITCHES = [36, 37, 38, 40, 41, 42, 44, 45, 46, 47, 49, 50, 58, 59, 60, 61, 62, 63, 64, 66]
# The subset of pitches we'll actually use.
IN_PITCHES = [36, 38, 42, 58, 59, 61]#[36, 38, 41, 42, 47, 58, 59, 61]
# The pitches we want to generate (potentially for different drum kit)
OUT_PITCHES = IN_PITCHES#[54, 56, 58, 60, 61, 62, 63, 64]
# The minimum number of hits to keep a drum loop after the types of
# hits have been filtered by IN_PITCHES.
MIN_HITS = 8

In [9]:
########################################################################
# Network architecture parameters.
########################################################################
NUM_HIDDEN_UNITS = 128
# The length of the phrase from which the predict the next symbol.
PHRASE_LEN = 64
# Dimensionality of the symbol space.
SYMBOL_DIM = 2 ** len(IN_PITCHES)
NUM_ITERATIONS = 2
BATCH_SIZE = 128

# VALIDATION_PERCENT = 0.1
VALIDATION_PERCENT = 0.001

BASE_DIR = './'
#BASE_DIR = '/home/ubuntu/neural-beats'

#MIDI_IN_DIR = os.path.join(BASE_DIR, 'midi_arrays/mega/')
#MIDI_IN_DIR = os.path.join(BASE_DIR, 'midi_arrays/mega/Electronic Live 9 SD/Jungle')
MIDI_IN_DIR = os.path.join(BASE_DIR, 'drums midi')
#MIDI_IN_DIR = os.path.join(BASE_DIR, 'midi_arrays/mega/Rock Essentials 2 Live 9 SD/Preview Files/Fills/4-4 Fills')

MODEL_OUT_DIR = os.path.join(BASE_DIR, 'models')
MODEL_NAME = 'drum_generator'
TRIAL_DIR = os.path.join(MODEL_OUT_DIR, MODEL_NAME)

MIDI_OUT_DIR = os.path.join(TRIAL_DIR, 'gen-midi')

LOAD_WEIGHTS = True

In [10]:
# Encode each configuration of p pitches, each on or off, as a
# number between 0 and 2**p-1.
assert len(IN_PITCHES) <= 8, 'Too many configurations for this many pitches!'
encodings = {
    config : i
    for i, config in enumerate(itertools.product([0,1], repeat=len(IN_PITCHES)))
}

decodings = {
    i : config
    for i, config in enumerate(itertools.product([0,1], repeat=len(IN_PITCHES)))
}

In [11]:
def sample(a, temperature=1.0):
    # helper function to sample an index from a probability array
    a = np.log(a) / temperature
    a = a.astype('float64')
    a = np.exp(a) / np.sum(np.exp(a))
    return np.argmax(np.random.multinomial(1, a, 1))


def encode(midi_array):
    '''Encode a folded MIDI array into a sequence of integers.'''
    return [
        encodings[tuple((time_slice>0).astype(int))]
        for time_slice in midi_array
    ]


def decode(config_ids):
    '''Decode a sequence of integers into a folded MIDI array.'''
    velocity = 120
#     velocity = 1
    return velocity * np.vstack(
        [list(decodings[id]) for id in config_ids])

In [12]:
def unfold(midi_array, pitches):
    '''Unfold a folded MIDI array with the given pitches.'''
    # Create an array of all the 128 pitches and fill in the
    # corresponding pitches.
    res = np.zeros((midi_array.shape[0], 128))
    assert midi_array.shape[1] == len(pitches), 'Mapping between unequal number of pitches!'
    for i in range(len(pitches)):
        res[:,pitches[i]] = midi_array[:,i]
    return res

In [13]:
def prepare_data():
    # Load the data.
    # Concatenate all the vectorized midi files.
    num_steps = 0

    # Sequence of configuration numbers representing combinations of
    # active pitches.
    config_sequences = []
    num_dirs = len([x for x in os.walk(MIDI_IN_DIR)])
    assert num_dirs > 0, 'No data found at {}'.format(MIDI_IN_DIR)

    in_pitch_indices = [ PITCHES.index(p) for p in IN_PITCHES ]
    for dir_idx, (root, dirs, files) in enumerate(os.walk(MIDI_IN_DIR)):
        for filename in files:
            if filename.split('.')[-1] != 'npy':
                continue
            array = np.load(os.path.join(root, filename))
            if np.sum(np.sum(array[:, in_pitch_indices]>0)) < MIN_HITS:
                continue
            config_sequences.append(np.array(encode(array[:, in_pitch_indices])))
        print('Loaded {}/{} directories'.format(dir_idx + 1, num_dirs))

    # Construct labeled examples.
    # Use a generator for X and y as the whole dataset may not fit in
    # memory.
    train_generator = SequenceDataGenerator(config_sequences,
                                            phrase_length=PHRASE_LEN,
                                            dim=SYMBOL_DIM,
                                            batch_size=BATCH_SIZE,
                                            is_validation=False,
                                            validation_percent=VALIDATION_PERCENT)

    valid_generator = SequenceDataGenerator(config_sequences,
                                            phrase_length=PHRASE_LEN,
                                            dim=SYMBOL_DIM,
                                            batch_size=BATCH_SIZE,
                                            is_validation=True,
                                            validation_percent=VALIDATION_PERCENT)

    return config_sequences, train_generator, valid_generator

In [14]:
def init_model():
    # Build the model.
    model = Sequential()
    model.add(LSTM(
        NUM_HIDDEN_UNITS,
        return_sequences=True,
        input_shape=(PHRASE_LEN, SYMBOL_DIM)))
    model.add(Dropout(0.3))
    '''
    model.add(LSTM(
        NUM_HIDDEN_UNITS,
        return_sequences=True,
        input_shape=(SYMBOL_DIM, SYMBOL_DIM)))
    model.add(Dropout(0.2))
    '''
    model.add(LSTM(NUM_HIDDEN_UNITS, return_sequences=False))
    model.add(Dropout(0.3))
    model.add(Dense(SYMBOL_DIM))
    model.add(Activation('softmax'))
    model.compile(
        loss='categorical_crossentropy',
        optimizer=RMSprop(learning_rate=1e-03, rho=0.9, epsilon=1e-08))
    return model

In [15]:
def generate(model, seed, mid_name, temperature=1.0, length=512, tpq=1000):
    '''Generate sequence using model, seed, and temperature.'''

    generated = []
    phrase = seed

    if not hasattr(temperature, '__len__'):
        temperature = [temperature for _ in range(length)]

    for temp in temperature:
        x = np.zeros((1, PHRASE_LEN, SYMBOL_DIM))
        for t, config_id in enumerate(phrase):
            x[0, t, config_id] = 1
        preds = model.predict(x, verbose=0)[0]
        next_id = sample(preds, temp)

        generated += [next_id]
        phrase = phrase[1:] + [next_id]

    # ticks per quarter has negative correlation with drums speed
    mid = array_to_midi(unfold(decode(generated), OUT_PITCHES), mid_name, ticks_per_quarter=tpq)
    mid.save(os.path.join(MIDI_OUT_DIR, mid_name))
    return mid

In [16]:
def train(config_sequences, train_generator, valid_generator):
    '''Train model and save weights.'''

    # Create the trial directory.
    if not os.path.exists(TRIAL_DIR):
        os.makedirs(TRIAL_DIR)
    # Copy the source file, with a version number, to the trial directory.
#     source_filename = '__file__'
#     versioned_source_filename = ''.join([
#         ''.join(source_filename.split('.')[:-1]),
#         '-' + datetime.strftime(datetime.now(), '%Y%m%d%H%M%S') + '.',
#         source_filename.split('.')[-1]
#     ])
#     shutil.copyfile(
#         source_filename,
#         os.path.join(TRIAL_DIR, versioned_source_filename))


    # Initialize the model.
    model = init_model()
    print(model.summary())

    # Train the model
    if not os.path.exists(MIDI_OUT_DIR):
        os.makedirs(MIDI_OUT_DIR)
    if not os.path.exists(MODEL_OUT_DIR):
        os.makedirs(MODEL_OUT_DIR)
    print('Training the model...')

    if LOAD_WEIGHTS:
        print('Attempting to load previous weights...')
        weights_path = os.path.join(TRIAL_DIR, MODEL_NAME)
        if os.path.exists(weights_path):
            model.load_weights(weights_path)

    best_val_loss = None

    sequence_indices = idx_seq_of_length(config_sequences, PHRASE_LEN + 1)
    n_points = len(sequence_indices)

    nb_val_samples = n_points * VALIDATION_PERCENT
    print('Number of training points: {}'.format(n_points))
    print('Using {} validation batches'.format(nb_val_samples))

    for i in range(NUM_ITERATIONS):
        print('Iteration {}'.format(i))

        history = model.fit(
            train_generator.gen(),
#             steps_per_epoch=BATCH_SIZE*512,
            steps_per_epoch=BATCH_SIZE*128,
            epochs=1,
            validation_data=valid_generator.gen(),
            validation_steps=nb_val_samples)

        val_loss = history.history['val_loss'][-1]
        if best_val_loss is None or val_loss < best_val_loss:
            print('Best validation loss so far. Saving...')
            best_val_loss = val_loss
            model.save_weights(os.path.join(TRIAL_DIR, MODEL_NAME),
                               overwrite=True)
        # Write history.
        with open(os.path.join(TRIAL_DIR, 'history.jsonl'), 'a') as fp:
            json.dump(history.history, fp)
            fp.write('\n')

        # Reset seed so we can compare generated patterns across iterations.
        np.random.seed(0)
        
        sequence_indices = idx_seq_of_length(config_sequences, PHRASE_LEN)
        seq_index, phrase_start_index = sequence_indices[
            np.random.choice(len(sequence_indices))]
        gen_length = 512

        # Generate samples.
        if not (i > 9 and i % 10 == 0):
            continue

        for temperature in [0.5, 0.75, 1.0]:
            generated = []
            phrase = list(
                config_sequences[seq_index][
                    phrase_start_index: phrase_start_index + PHRASE_LEN])

            print('----- Generating with temperature:', temperature)

            generate(model,
                     phrase,
                     'out_{}_{}_{}.mid'.format(gen_length, temperature, i),
                     temperature=temperature,
                     length=gen_length)
    return model

In [17]:
def generate_drums(model, config_sequences, output_path):
    sequence_indices = idx_seq_of_length(config_sequences, PHRASE_LEN)
    seq_index, phrase_start_index = sequence_indices[
                np.random.choice(len(sequence_indices))]
    gen_length = 512
    for temperature in [1.0, 0.5, 0.75, 1.0]:
        generated = []
        phrase = list(
            config_sequences[seq_index][
                phrase_start_index: phrase_start_index + PHRASE_LEN
            ]
        )

        print('----- Generating with temperature:', temperature)

        midi = generate(model,
                        phrase,
                        'out_{}_{}_{}.mid'.format(gen_length, temperature, 0),
                        temperature=temperature,
                        length=gen_length)
        break
        
    midi.save(output_path)

In [18]:
config_sequences, train_generator, valid_generator = prepare_data()

Loaded 1/1 directories


In [29]:
len(train_generator.sequences), len(train_generator.sequences[4])

(1161, 1536)

In [19]:
len(config_sequences[0]),len(config_sequences[1]),len(config_sequences[2])

(1024, 2048, 1024)

In [None]:
%config

In [66]:
m = train(config_sequences, train_generator, valid_generator)

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_6 (LSTM)                (None, 64, 128)           98816     
_________________________________________________________________
dropout_6 (Dropout)          (None, 64, 128)           0         
_________________________________________________________________
lstm_7 (LSTM)                (None, 128)               131584    
_________________________________________________________________
dropout_7 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 64)                8256      
_________________________________________________________________
activation_3 (Activation)    (None, 64)                0         
Total params: 238,656
Trainable params: 238,656
Non-trainable params: 0
________________________________________________

In [67]:
m.save_weights('./model02')

In [87]:
generate_drums(m, config_sequences, 'output_drums3.mid')

----- Generating with temperature: 1.0


  a = np.log(a) / temperature


In [88]:
os.system('fluidsynth -ni Touhou.sf2 output_drums3.mid -F drums_generated_03.wav -r 44100')

0

In [85]:
ls

Drums Generator.ipynb        lofi_output7.mid
README.md                    [34mmidi_songs[m[m/
Simple Model.ipynb           midi_util.py
Touhou.sf2                   model02.data-00000-of-00001
[34m__pycache__[m[m/                 model02.index
checkpoint                   [34mmodels[m[m/
[34mdata[m[m/                        output_drums.midi
data.py                      output_drums2.midi
[34mdrums midi[m[m/                  output_drums3.midi
lofi_output3.mid             [34mpiano models[m[m/
lofi_output4.mid             piano_generated_07.wav
lofi_output5.mid             piano_generated_08.wav
lofi_output6.mid             [34mpiano_model[m[m/


In [72]:
m.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_6 (LSTM)                (None, 64, 128)           98816     
_________________________________________________________________
dropout_6 (Dropout)          (None, 64, 128)           0         
_________________________________________________________________
lstm_7 (LSTM)                (None, 128)               131584    
_________________________________________________________________
dropout_7 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 64)                8256      
_________________________________________________________________
activation_3 (Activation)    (None, 64)                0         
Total params: 238,656
Trainable params: 238,656
Non-trainable params: 0
________________________________________________