In [1]:
import sys
import logging
import pandas as pd
import seaborn as sns
%matplotlib inline

# Add Midi2Vec to Python working directory
sys.path.append('../')

from data_loading.data_loaders import MidiDataLoader
from midi_to_dataframe.note_mapper import NoteMapper
from pipeline.pipeline import Pipeline
from optimization.optimizers import BruteForce

Using TensorFlow backend.
  return f(*args, **kwds)


In [2]:
logger = logging.getLogger()
logger.level = logging.INFO
stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler)

In [3]:
# Documents used to train semantic encoder model
encoder_training_docs = "../../midi-embeddings/data/full_1_measure.txt"

param_sweep_values = {

    # Encoder (doc2vec) settings:
    'doc2vec_docs': [encoder_training_docs],
    'doc2vec_dm': [1],
    'doc2vec_dm_mean': [1],
    'doc2vec_epochs': [1,2],
    'doc2vec_hs': [0],
    'doc2vec_learning_rate_start': [0.025],
    'doc2vec_learning_rate_end': [0.2],
    'doc2vec_min_count': [5,10],
    'doc2vec_negative': [0],
    'doc2vec_vector_size': [20,24],
    'doc2vec_window': [1,2,3],

    # Sequence learning (Keras LSTM) settings:
    'nn_features': [['bpm', 'measure', 'beat']],
    'nn_batch_size': [25,50,75],
    'nn_dense_activation_function': ["linear"],
    'nn_dropout': [0],
    'nn_epochs': [20,30,40,50],
    'nn_hidden_neurons': [15,20,30],
    'nn_layers': [15,20,25],
    'nn_lstm_activation_function': ["selu"],
    'nn_lstm_n_prev': [12,16,20,24]
}

In [4]:
# Define note mapper for MIDI file loading
note_mapping_config_path = "../settings/map-to-group.json"
note_mapper = NoteMapper(note_mapping_config_path)

# Data loader used to encode MIDI-format training files
data_loader = MidiDataLoader(note_mapper)

# Define training documents for sequence learning
training_docs = ["../resources/breakbeats/084 Breakthru.mid", "../resources/breakbeats/086 Clouds.mid",
                 "../resources/breakbeats/089 Get Out.mid", "../resources/breakbeats/089 Wrong.mid",
                 "../resources/breakbeats/090 Deceive.mid", "../resources/breakbeats/090 New York.mid",
                 "../resources/breakbeats/090 Radio.mid", "../resources/breakbeats/093 Pretender.mid",
                 "../resources/breakbeats/093 Right Won.mid", "../resources/breakbeats/094 Run.mid"]

In [None]:
results_dfs = []
runs_completed = [0]
max_runs = 2

def callback(params, metrics):
    runs_completed[0] += 1
    merged = {**params, **metrics}
    merged['nn_features'] = ', '.join(merged['nn_features'])
    print("Completed " + str(runs_completed) + " runs.")
    print(merged)
    results_dfs.append(pd.DataFrame(merged, index=[0]))
    if runs_completed[0] >= max_runs:
        print("Max. runcount reached.")
        sys.exit(0)

In [None]:
pipeline = Pipeline()
pipeline.set_data_loader(data_loader)
pipeline.set_training_docs(training_docs)
pipeline.set_k_fold_cross_eval(k=3)

brute_force_param_sweep = BruteForce(params=param_sweep_values)
brute_force_param_sweep.set_callback(callback)
pipeline.set_optimizer(brute_force_param_sweep)

results_df = pipeline.run()

BruteForce evaluation of 10368 hyperparameter combinations.
Loaded 100000 documents
Loaded 200000 documents
Loaded 300000 documents
Loaded 400000 documents
Loaded 500000 documents
Loaded 600000 documents
Loaded 700000 documents


In [None]:
#results_df = pd.concat(results_dfs)

In [None]:
variables = []
for key, value in param_sweep_values.items():
    if len(value) > 1:
        variables.append(key)

In [None]:
for variable in variables:
    print(results_df[[variable,'f1']].groupby([variable], as_index=False).median()

In [None]:
import matplotlib.pyplot as plt
import math

# TODO loops not necessary...
num_plots = 0
for heatmap_x in variables:
    for heatmap_y in variables:
        if heatmap_x != heatmap_y:
            num_plots += 1
            
dim = math.ceil(num_plots / 3)

fig, axes = plt.subplots(dim, 3, sharex=False, sharey=False)
fig.set_size_inches(20, 12)

axis_index = 0
for heatmap_x in variables:
    for heatmap_y in variables:
        if heatmap_x != heatmap_y:
            pivoted = results_df.groupby([heatmap_y, heatmap_x], as_index=False).median().pivot(heatmap_y, heatmap_x, "f1")
            sns.heatmap(pivoted, annot=True, fmt="g", cmap='viridis', ax=axes.flat[axis_index])
            axis_index += 1