In [1]:
import pandas as pd
import numpy as np
import ast
from collections import Counter, OrderedDict
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from utilities import *
from music21.harmony import chordSymbolFigureFromChord as figureChord
from music21.chord import Chord

from tensorflow.keras.datasets import imdb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.python.keras.layers.embeddings import Embedding
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import model_from_json

import warnings

warnings.filterwarnings(action="once")

In [2]:
def prep_data(df):
    SAMPLE_LEN = 8
    data = [ast.literal_eval(chords_string) for chords_string in df["encoded_chords"]]
    # Remove too short songs
    data = [chords for chords in data if len(chords) > SAMPLE_LEN]
    x = []
    y = []
    for chords in data:
        for i in range(len(chords)):
            if i < len(chords) - SAMPLE_LEN:
                x.append(chords[i : i + SAMPLE_LEN - 1])
            else:
                x.append(
                    chords[i : len(chords) - 1]
                    + chords[: (i + SAMPLE_LEN) % len(chords)]
                )
            y.append(chords[(i + SAMPLE_LEN) % len(chords)])
    x = np.array(x)
    y = np.array(y)
    print(x.shape, y.shape)
    return train_test_split(x, y, test_size=0.33, random_state=42)


def decode(chord):
    if not all(isinstance(x, np.int32) for x in chord):
        raise Exception(
            "Expected chord to be array([0, 1, 0, 0 , 1, ...]) instead got: ", chord
        )
    decoded_chord = []
    for counter, value in enumerate(chord):
        if value == 1:
            decoded_chord.append(counter)
    return decoded_chord


def print_chords(encoded_chords):
    chords_symbols = [figureChord(Chord(decode(chord))) for chord in encoded_chords]
    print(*chords_symbols, sep="  | ")


def threshold_prediction(pred, notes_num):
    thresholded_pred = np.zeros(pred.shape, dtype=np.int32)
    selected_notes = sorted(range(len(pred)), key=lambda i: pred[i], reverse=True)[
        :notes_num
    ]
    thresholded_pred[selected_notes] = 1
    return thresholded_pred


def save_model(model):
    model_json = model.to_json()
    with open("models/model.json", "w") as json_file:
        json_file.write(model_json)
    model.save_weights("models/model.h5")
    print("Saved model to disk")


def load_model(json_path, h5_path):
    json_file = open(json_path, "r")
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json)
    loaded_model.load_weights(h5_path)
    print("Loaded model from disk")
    return loaded_model

def predict_chords(model, input_chords):
    pred = model.predict(input_chords[np.newaxis, :, :])[0]
    pred = threshold_prediction(pred, 4)
    print_chords(np.vstack((input_chords[:3],pred)))

In [3]:
df = pd.read_csv("songs_and_chords.csv")
X_train, X_test, y_train, y_test = prep_data(df)

(103631, 7, 12) (103631, 12)


In [4]:
print(X_train[0][0])
print(decode(X_train[0][0]))

[0 0 1 0 0 1 0 1 0 0 0 1]
[2, 5, 7, 11]


LSTM requirements:

- The LSTM input layer must be 3D.
- LSTMs don’t like sequences of more than 200-400 time steps, so the data will need to be split into samples.
- If you have a long sequence of thousands of observations, you must split it into samples and then reshape it for your LSTM model.
- The LSTM needs data with the format of [samples, time steps and features].
- The LSTM input layer is defined by the input_shape argument on the first hidden layer.
- The input_shape argument takes a tuple of two values that define the number of time steps and features.
- The number of samples is assumed to be 1 or more.
- The reshape() function on NumPy arrays can be used to reshape your 1D or 2D data to be 3D.
- The reshape() function takes a tuple as an argument that defines the new shape.


# Train Model

In [5]:
# model = Sequential()
# model.add(LSTM(32, input_shape=(7, 12)))
# model.add(Dense(12, activation='sigmoid'))
# model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# print(model.summary())
# model.fit(X_train, y_train, epochs=3, batch_size=8)
# scores = model.evaluate(X_test, y_test, verbose=0)
# print("Accuracy: %.2f%%" % (scores[1]*100))

# Save Model

In [6]:
# save_model(model)

# Load Model

In [7]:
loaded_model = load_model('models/model.json', "models/model.h5")

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Loaded model from disk


# Test Model

In [8]:
loaded_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print(loaded_model.summary())
scores = loaded_model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_6 (LSTM)                (None, 32)                5760      
_________________________________________________________________
dense_6 (Dense)              (None, 12)                396       
Total params: 6,156
Trainable params: 6,156
Non-trainable params: 0
_________________________________________________________________
None
Accuracy: 72.68%


In [10]:
for i in range(20):
    predict_chords(loaded_model, X_test[i])

A-maj7/C  | A#m7/C#  | A-maj7/C  | Fm7/C
Fm/C  | Fm/C  | C7  | Fm/CaddB-
Gm/D  | B-7/D  | Fm/C  | Gm7/D
E-  | A-7/C  | A-7/C  | A-/CaddA
Gm7/D  | C7  | F/C  | F/CaddB-
E-  | Gm/D  | Cm  | Cm7
C7  | F-+/CaddB-  | F/C  | B-/C
B-/D  | E-+  | C7  | Chord Symbol Cannot Be Identified
F7/C  | B-/D  | B-/D  | B-/C
E-m  | A#m7/C#  | E-m  | Chord Symbol Cannot Be Identified
Gm7/D  | C7  | F/C  | B-/C
D/o7/C  | E-+addF  | Cm7  | B-/C
F9/CaddB  | B-7/D  | Cm7addD-,A-,F  | D-maj7/C
A7/C#  | D7/C  | D7/C  | Chord Symbol Cannot Be Identified
Fm7/C  | B-7/D  | E-maj7/D  | D/o7/C
Am7/C  | E7/D  | E7/D  | CaddD
B-7/D  | E-maj7/D  | B-/D  | Cm7
B-/D  | A7/C#  | B-/D  | E-addF
Gm7/D  | Gm7/D  | Cm7  | B-/C
A7/C#  | Dm  | G7/D  | Chord Symbol Cannot Be Identified
