In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pretty_midi
import visual_midi
import joblib
from os import listdir
from os.path import getsize
from MIDIComposingAI.utils import piano_roll_to_pretty_midi
from MIDIComposingAI.create_dataset import *
from MIDIComposingAI.get_back_data import *
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.preprocessing import Normalizer, MinMaxScaler

### Get data

In [3]:
path = '../raw_data/pretty_midi'
directory = [file_name for file_name in listdir(path) if getsize(f'{path}/{file_name}') < 300_000]

for i, file_name in enumerate(directory):
    file = joblib.load(f'{path}/{file_name}')
    if i == 0:
        X, y = create_simple_dataset(file, ratio=0.2)
    else:
        try:
            loaded = create_simple_dataset(file, ratio=0.2)
            X = np.concatenate((X, loaded[0]))
            y = np.concatenate((y, loaded[1]))
            del([loaded, file])
        except:
            pass
    if i % 10 == 0:
        print(f'{X.shape[0]} observations')
    if X.shape[0] >= 100:
        break

print('Loading chord feature')

chord = adding_chords_info('../raw_data/chords_midi.csv', X, verbose=1)

print('Chord feature loaded')

X = X.reshape((X.shape[0], -1))
X = np.concatenate((chord, X), axis=1, dtype=np.int8)

print('Chord feature implemented')

y = y.reshape((y.shape[0], -1))

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.01, random_state=2)

print('Done')

14 observations
95 observations
Loading chord feature
1 done
101 done
Chord feature loaded
Chord feature implemented
Done


### Make predictions

#### Initiate, train and save our model

In [4]:
# Latest best params

best_params = {
    # 'criterion': 'squared_error',
    # 'max_depth': None,
    # 'min_samples_split': 2,
    # 'min_samples_leaf': 1,
    # 'min_weight_fraction_leaf': 0.0,
    # 'max_leaf_nodes': 128,
    'min_impurity_decrease': 0.5
}

In [5]:
# New best params

# best_params = {
#     # 'criterion': 'absolute_error',
#     'max_depth': 12,
#     'min_samples_split': 5,
#     'min_samples_leaf': 1,
#     'min_weight_fraction_leaf': 0.0,
#     'max_leaf_nodes': 12,
#     'min_impurity_decrease':0.5
#}

In [6]:
# Old best params

# best_params = {
#     'criterion': 'friedman_mse',
#     'max_depth': None,
#     'min_samples_split': 3,
#     'min_samples_leaf': 1,
#     'min_weight_fraction_leaf': 0.0,
#     'max_leaf_nodes': 128,
#     'min_impurity_decrease': 0.8
# }

In [7]:
tree = DecisionTreeRegressor(**best_params)
tree.fit(X_train, y_train)
# joblib.dump(tree, '../Models/3rdbest_params_tree.joblib')
# tree = joblib.load('../Models/tree.joblib')

DecisionTreeRegressor(min_impurity_decrease=0.5)

#### Attempt to debug it

In [8]:
# example_file = []
# for file in directory:
#     if 200_000 > getsize(f'{path}/{file}'): # We don't want too big or too little files
#         example_file.append(joblib.load(f'{path}/{file}'))
#         break

In [9]:
# dir = 'first_pres_dataset'
# for i, file in enumerate(example_file):
#     create_nparray_dataset(file, dir, name=f'chose{i}')

In [10]:
# X_test, y_test = joblib.load(f'../raw_data/pandas_dataframes/first_pres_dataset/chose0')

In [11]:
# X_test, y_test = create_nparray_dataset(example_file[0], 'f', 'n', store=False)

In [12]:
# X_test = X_test.reshape((X_test.shape[0], -1))
# y_test = y_test.reshape((y_test.shape[0], -1))

#### Predictions

In [13]:
# Making our predictions
# X_test_scale = scaler.transform(X_test)
predictions = tree.predict(X_test)

In [14]:
# Test for the "false predictions" problem
for i, pred in enumerate(predictions):
    if i % 10 == 0:
        print((pred == y_test[i]).mean())

0.5


In [15]:
predicted_melodies = np.array([assembled_target_to_melody(prediction) for prediction in predictions])
true_melodies = np.array([assembled_target_to_melody(melody) for melody in y_test])

In [16]:
# They should be different

In [17]:
X_test = np.delete(X_test, np.s_[0:24], axis=1)
X_test = X_test.reshape(-1, 128, 500)

In [18]:
%%capture --no-display

coloring = visual_midi.presets.Coloring.INSTRUMENT
plotter = visual_midi.Plotter(coloring=coloring)
plotter.show_notebook(piano_roll_to_pretty_midi(X_test[1]))

In [19]:
%%capture --no-display

coloring = visual_midi.presets.Coloring.INSTRUMENT
plotter = visual_midi.Plotter(coloring=coloring)
plotter.show_notebook(piano_roll_to_pretty_midi(predicted_melodies[1]))

In [20]:
piano_roll_to_pretty_midi(predicted_melodies[1]).write('test.mid')

In [21]:
%%capture --no-display

coloring = visual_midi.presets.Coloring.INSTRUMENT
plotter = visual_midi.Plotter(coloring=coloring)
plotter.show_notebook(piano_roll_to_pretty_midi(true_melodies[1]))

In [22]:
copy = X_test.copy()
copy_2 = X_test.copy()

In [23]:
assembled_predicted_music = np.array(
    [assemblate_accompaniment_melody(test, predicted_melody) for test, predicted_melody in zip(copy, predicted_melodies)],
)

assembled_true_music = np.array(
    [assemblate_accompaniment_melody(test, true_melody) for test, true_melody in zip(copy_2, true_melodies)],
)

In [24]:
for i, (pred, true) in enumerate(zip(assembled_predicted_music, assembled_true_music)):
    try:
        piano_roll_to_pretty_midi(pred, fs=50).write(f'../PredVSTrue/ManualTuning/{i+2}_pred.mid')
        piano_roll_to_pretty_midi(true, fs=50).write(f'../PredVSTrue/ManualTuning/{i+2}_true.mid')
    except:
        pass