In [29]:
import pickle
import numpy as np
# import pandas as pd
import random
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.layers import LSTM,SimpleRNN,Dense,Dropout,Bidirectional,Input,Flatten, Conv1D, MaxPooling1D

In [2]:
with open("piano_data_spectrogram.pkl", "rb") as f:
    data = pickle.load(f)
    print('data:',len(data))

data: 329


In [3]:
# create test set
random.seed(42)
data_test = random.sample(data, 29) # Randomly sample 29 elements from the list
data = [elem for elem in data if elem not in data_test] # Remove the selected elements from the original list

In [None]:
# measurer = []
# for note in X:
#     measurer.append(len(note))
# print(sorted(measurer)[0])
# print(sorted(measurer)[-1])

# import matplotlib.pyplot as plt

# plt.boxplot(measurer)
# plt.title('Box Plot')
# plt.ylabel('Values')
# plt.show()

In [10]:
# X / y for train/val data
X = []
y = []
for song in data:
    for note in song:
        X.append(note['spectrogram'])
        y.append(note['pitch'])

# X / y for test data
X_test = []
y_test = []
for song in data_test:
    for note in song:
        X_test.append(note['spectrogram'])
        y_test.append(note['pitch'])

In [11]:
# pad/truncate and turn to np array

maxlen = 85 # desired sequence length

X_padded = pad_sequences(X, maxlen=maxlen, dtype='float32', padding='post', truncating='post')
X_test_padded = pad_sequences(X_test, maxlen=maxlen, dtype='float32', padding='post', truncating='post')

X = np.array(X_padded)
y = np.array(y)
X_test = np.array(X_test_padded)
y_test = np.array(y_test)

In [12]:
# train-val split
y_train, y_val, X_train, X_val = train_test_split(y, X, test_size=0.2, random_state=42)

In [16]:
# scale
scaler = MinMaxScaler()

# Reshape the training spectrograms to 2D, fit the scaler and transform the data
X_train = np.array(X_train).reshape(len(X_train), -1)
scaler.fit(X_train)
X_train = scaler.transform(X_train)

# Reshape the testing spectrograms to 2D and transform the data
X_val = np.array(X_val).reshape(len(X_val), -1)
X_val = scaler.transform(X_val)

# Reshape the spectrograms back to their original shape
X_train = X_train.reshape(-1, 85)
X_val = X_val.reshape(-1, 85)

In [18]:
X_train[0].shape

(85,)

In [19]:
# define input shape for models:
input_shape = (85, 1)

In [20]:
# model 0: SimpleRNN
model = tf.keras.Sequential()

model.add(Input(shape=input_shape))
model.add(SimpleRNN(128, activation='relu', return_sequences=True))
model.add(SimpleRNN(64, activation='relu'))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [27]:
# model 1: LSTM with Dropout
model1 = tf.keras.Sequential()

model1.add(Input(shape=input_shape))
model1.add(LSTM(128, return_sequences=True))
model1.add(Dropout(0.2))
model1.add(LSTM(64))
model1.add(Dropout(0.2))
model1.add(Dense(1))

model1.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [None]:
# model 2: Stacked LSTM
model2 = tf.keras.Sequential()

model2.add(Input(shape=input_shape))
model2.add(LSTM(128, return_sequences=True))
model2.add(LSTM(64, return_sequences=True))
model2.add(LSTM(32))
model2.add(Dense(1))

model2.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [40]:
# model 3: Bidirectional LSTM
model3 = tf.keras.Sequential()

model3.add(Input(shape=input_shape))
model3.add(Bidirectional(LSTM(128, return_sequences=True)))
model3.add(Bidirectional(LSTM(64)))
model3.add(Dense(1))

model3.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [33]:
# model 4: Simple CNN
model4 = tf.keras.Sequential()

model4.add(Input(shape=input_shape))
model4.add(Conv1D(32, 3, activation='relu'))
model4.add(MaxPooling1D(2))
model4.add(Flatten())
model4.add(Dense(64, activation='relu'))
model4.add(Dense(1))

model4.compile(optimizer='adam', loss='mae', metrics=['mae'])

In [36]:
# model 5: Deep CNN
model5 = tf.keras.Sequential()

model5.add(Input(shape=input_shape))
model5.add(Conv1D(32, 3, activation='relu'))
model5.add(Conv1D(64, 3, activation='relu'))
model5.add(MaxPooling1D(2))
model5.add(Conv1D(128, 3, activation='relu'))
model5.add(MaxPooling1D(2))
model5.add(Flatten())
model5.add(Dense(64, activation='relu'))
model5.add(Dense(1))

model5.compile(optimizer='adam', loss='mse', metrics=['mae'])

In [42]:
model3.fit(X_train, y_train, epochs=10,batch_size=64, validation_data=(X_val, y_val))

Epoch 1/10
[1m 257/8896[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m8:29[0m 59ms/step - loss: 1840.9836 - mae: 40.2857

KeyboardInterrupt: 

In [39]:
model5.save('piano_deepCNN.keras')