In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from sklearn.metrics import make_scorer
from MIDIComposingAI.create_dataset import *
from MIDIComposingAI.get_back_data import *
from MIDIComposingAI.utils import piano_roll_to_pretty_midi
import joblib
import pretty_midi
from scipy.stats import entropy
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import make_scorer
from sklearn.preprocessing import StandardScaler, Normalizer, MinMaxScaler
from sklearn.model_selection import train_test_split
from statistics import mean
from os import listdir
from os.path import getsize

In [3]:
file = joblib.load('../raw_data/pretty_midi/(Day Dream) Prayer')
file2 = pretty_midi.PrettyMIDI('../raw_data/1.mid')

In [11]:
def pattern_recognition(acc, mel):

    # Create list of pitches for each frame in accompaniment's piano roll
    acc_pitches = []
    
    for frame in acc.T:
        pitches = []
        for vel in frame:
            if vel > 0:
                pitches.append(list(frame).index(vel))
        if not pitches:
            pitches.append(0)
        acc_pitches.append(pitches)
    
    # Take the mean pitch of each frame
    mean_pitches_acc = [(np.sum(pitches) / len(pitches)) for pitches in acc_pitches]
    
    # Get only the pitches (not the velocities)
    melody_pitches = mel[:500]
    
    
    # Create the pattern for accompaniment
    acc_passed_pitch = []
    pattern_acc = [0]
    
    for pitch in mean_pitches_acc:
        if acc_passed_pitch:
            if pitch != acc_passed_pitch[-1]:
                relative_pitch = pitch - acc_passed_pitch[-1]
                pattern_acc.append(relative_pitch)
            else:
                pattern_acc.append(pattern_acc[-1])
        acc_passed_pitch.append(pitch)
    
    # Create the pattern for melody
    mel_passed_pitch = []
    pattern_mel = [0]
    
    for pitch in melody_pitches:
        if mel_passed_pitch:
            if pitch != mel_passed_pitch[-1]:
                relative_pitch = pitch - mel_passed_pitch[-1]
                pattern_mel.append(relative_pitch)
            else:
                pattern_mel.append(pattern_mel[-1])
        mel_passed_pitch.append(pitch)
        
    return pattern_acc, pattern_mel

def custom_metric(acc, pred):
    
    # Get the pitch pattern for both accompaniment and melody
    pattern_acc, pattern_mel = pattern_recognition(acc, pred)
    
    # Compute the mean of velocities for both accompaniment and melody
    list_mean_vel_acc = [
        np.mean(
            [vel for vel in frame if vel > 0]
        )
        for frame in acc.T
        if np.sum(frame) > 0]
    
    # Check if the list is empty
    if not list_mean_vel_acc:
        list_mean_vel_acc = [0]
    
    mean_vel_acc = np.mean(list_mean_vel_acc)
    
    list_mean_vel_pred = [pred for pred in pred[500:] if pred > 0]
    
    # Check if the list is empty
    if not list_mean_vel_pred:
        list_mean_vel_pred = [0]
        
    mean_vel_pred = np.mean(list_mean_vel_pred)
    
    # Compute the diff beetween the two velocities mean
    velocity_diff = abs(mean_vel_acc - mean_vel_pred)
    
    # Compute the "diff pattern" beetween accompaniment and melody
    diff_pattern = np.array([abs(acc - mel) for acc, mel in zip(pattern_acc, pattern_mel)]).reshape(-1, 1)
    
    # Compute the entropy of the diff pattern
    if np.sum(diff_pattern) == 0:
        entropy_score = 0
    else:
        entropy_score = entropy(diff_pattern)[0]
    
    # Compute the final score
    # score = (velocity_diff, entropy_score)
    
    return np.mean([entropy_score, velocity_diff])

### Compute the score within a grid search

In [5]:
tree = DecisionTreeRegressor()

grid = {
    # 'criterion':               ["squared_error","friedman_mse","absolute_error","poisson"],
    'max_depth':               [None, 2, 12, 128],
    'min_samples_split':       [2, 3, 5, 10],
    'min_samples_leaf':        [1, 2, 3, 4],
    'min_weight_fraction_leaf':[0.0, 0.2, 0.4, 0.5],
    'max_leaf_nodes':          [None, 128, 12, 2],
    # 'min_impurity_decrease':   [0.0, 0.2, 0.5, 0.8],
}

In [6]:
params = [{
    # 'criterion':crit,
           'max_depth':max_d,
           'min_samples_split':min_ss,
           'min_samples_leaf':min_sl,
           'min_weight_fraction_leaf':min_w,
           'max_leaf_nodes':max_l}
           # 'min_impurity_decrease':min_i}
          # for crit in grid['criterion']
          for max_d in grid['max_depth']
          for min_ss in grid['min_samples_split']
          for min_sl in grid['min_samples_leaf']
          for min_w in grid['min_weight_fraction_leaf']
          for max_l in grid['max_leaf_nodes']]
          # for min_i in grid['min_impurity_decrease']]

In [7]:
len(params)

1024

In [9]:
path = '../raw_data/pretty_midi'
directory = [file_name for file_name in listdir(path) if getsize(f'{path}/{file_name}') < 300_000]

for i, file_name in enumerate(directory):
    file = joblib.load(f'{path}/{file_name}')
    if i == 0:
        X, y = create_simple_dataset(file, ratio=0.2)
    else:
        try:
            loaded = create_simple_dataset(file, ratio=0.2)
            X = np.concatenate((X, loaded[0]))
            y = np.concatenate((y, loaded[1]))
            del([loaded, file])
        except:
            pass
    if i % 10 == 0:
        print(f'{X.shape[0]} observations')
    if X.shape[0] >= 0:
        break

chord = adding_chords_info('../raw_data/chords_midi.csv', X)

X_reshaped = X.reshape((X.shape[0], -1))
X_reshaped = np.concatenate((chord, X_reshaped), axis=1, dtype=np.int8)

y = y.reshape((y.shape[0], -1))

X_train, X_test, X_reshaped_train, X_reshaped_test, y_train, y_test = train_test_split(X, X_reshaped, y, test_size=0.1, random_state=2)

14 observations


In [12]:
params_and_scores = []

for i, param in enumerate(params):
    
    tree = DecisionTreeRegressor(**param)
    tree.fit(X_reshaped_train, y_train)
    predictions = tree.predict(X_reshaped_test)
    scores = [custom_metric(test, pred) for test, pred in zip(X_test, predictions)]
    score = np.mean(scores)
    params_and_scores.append({'params':param, 'score':score})
    if i % 10 == 0:
        print(f'{i+1} done')

1 done
11 done
21 done
31 done
41 done
51 done
61 done
71 done
81 done
91 done
101 done
111 done
121 done
131 done
141 done
151 done
161 done
171 done
181 done
191 done
201 done
211 done
221 done
231 done
241 done
251 done
261 done
271 done
281 done
291 done
301 done
311 done
321 done
331 done
341 done
351 done
361 done
371 done
381 done
391 done
401 done
411 done
421 done
431 done
441 done
451 done
461 done
471 done
481 done
491 done
501 done
511 done
521 done
531 done
541 done
551 done
561 done
571 done
581 done
591 done
601 done
611 done
621 done
631 done
641 done
651 done
661 done
671 done
681 done
691 done
701 done
711 done
721 done
731 done
741 done
751 done
761 done
771 done
781 done
791 done
801 done
811 done
821 done
831 done
841 done
851 done
861 done
871 done
881 done
891 done
901 done
911 done
921 done
931 done
941 done
951 done
961 done
971 done
981 done
991 done
1001 done
1011 done
1021 done


In [18]:
ps = [i, params['score'] for i, params in enumerate(params_and_scores)]

SyntaxError: invalid syntax (890067043.py, line 1)

In [23]:
ps = []
for params in params_and_scores:
    ps.append(params['score'])

In [27]:
best_params = params_and_scores[ps.index(np.min(ps))]
worst_params = params_and_scores[ps.index(np.max(ps))]

In [28]:
best_params

{'params': {'max_depth': None,
  'min_samples_split': 3,
  'min_samples_leaf': 1,
  'min_weight_fraction_leaf': 0.0,
  'max_leaf_nodes': 128},
 'score': 4.371726592684219}

In [29]:
worst_params

{'params': {'max_depth': None,
  'min_samples_split': 3,
  'min_samples_leaf': 4,
  'min_weight_fraction_leaf': 0.2,
  'max_leaf_nodes': None},
 'score': 11.102578913240704}

In [30]:
# best params
{'params': {'max_depth': None,
  'min_samples_split': 3,
  'min_samples_leaf': 1,
  'min_weight_fraction_leaf': 0.0,
  'max_leaf_nodes': 128},
 'score': 4.371726592684219}

{'params': {'max_depth': None,
  'min_samples_split': 3,
  'min_samples_leaf': 1,
  'min_weight_fraction_leaf': 0.0,
  'max_leaf_nodes': 128},
 'score': 4.371726592684219}

In [None]:
# worst params
{'params': {'max_depth': None,
  'min_samples_split': 3,
  'min_samples_leaf': 4,
  'min_weight_fraction_leaf': 0.2,
  'max_leaf_nodes': None},
 'score': 11.102578913240704}