Code referenced from https://www.youtube.com/watch?v=CbTU92pbDKw

In [None]:
from helpers import *
from preprocessor import *

# Load the MIDI file
df = midi_get_df('classical-piano-type0_chpn-p19.mid')
df.head()

In [None]:
import matplotlib.pyplot as plt

plt.plot(df['note'], df['duration'], "*")

In [None]:
df = df[['start_time', 'note', 'duration']]
# df.index = df.pop('start_time') # set date as the index instead of numbering
df

In [None]:
import matplotlib.pyplot as plt
plt.plot(df["note"], "*")

In [None]:
def prepare_note_duration_data(df, start_idx=0, end_idx=None, samples_per_row=3):
    # Create end_time as the next note's start_time
    df['end_time'] = df['start_time'].shift(-1)
    df = df.dropna(subset=['end_time'])

    # Slice based on start/end index
    df = df.iloc[start_idx:end_idx].reset_index(drop=True)

    # Ensure we have a duration column
    if "duration" not in df.columns:
        if "start_time" in df.columns and "end_time" in df.columns:
            df["duration"] = df["end_time"] - df["start_time"]
        else:
            raise ValueError("DataFrame must have either 'duration' or both 'start_time' and 'end_time'.")

    rows = []
    for i in range(len(df) - samples_per_row):
        start_time = df["start_time"].iloc[i]  # keep start time of first note in window
        past_durations = df["duration"].iloc[i:i+samples_per_row].tolist()
        past_notes = df["note"].iloc[i:i+samples_per_row].tolist()

        target_duration = df["duration"].iloc[i + samples_per_row]
        target_note = df["note"].iloc[i + samples_per_row]

        row = [start_time] + past_durations + past_notes + [target_duration, target_note]
        rows.append(row)

    # Column names
    duration_cols = [f"duration_{j+1}" for j in range(samples_per_row)]
    note_cols = [f"note_{j+1}" for j in range(samples_per_row)]
    cols = ["start_time"] + duration_cols + note_cols + ["target_duration", "target_note"]

    return pd.DataFrame(rows, columns=cols)

windowed_df = prepare_note_duration_data(df)
windowed_df

# target 1-3 are previous n values. This converts the inputs to the target output.

In [None]:
# time, X, y = windowed_df_to_date_X_y(windowed_df)
time = windowed_df["start_time"]
X = windowed_df[["duration_1", "note_1",
                "duration_2", "note_2",
                "duration_3", "note_3"]]
y = windowed_df[["target_duration", "target_note"]]

time.shape, X.shape, y.shape

In [None]:
q_80 = int(len(time) * 0.8)
q_90 = int(len(time) * 0.9)

times_train, X_train, y_train = time[:q_80], X[:q_80], y[:q_80]

times_val, X_val, y_val = time[q_80:q_90], X[q_80:q_90], y[q_80:q_90]
times_test, X_test, y_test = time[q_90:], X[q_90:], y[q_90:]

plt.plot(times_train, y_train, "*")
plt.plot(times_val, y_val, "*")
plt.plot(times_test, y_test, "*")

plt.legend(['Train', 'Validation', 'Test'])

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers

model = Sequential([layers.Input((6, 1)), # 3 days in the past input shape
                    layers.LSTM(64),
                    layers.Dense(32, activation='swish'),
                    layers.Dense(32, activation='swish'),
                    layers.Dense(2)])

model.compile(loss='mse', 
              optimizer=Adam(learning_rate=0.001),
              metrics=['mean_absolute_error'])

model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=100)


In [None]:
train_predictions = model.predict(X_train).flatten()

plt.plot(times_train, train_predictions, "*")
plt.plot(times_train, y_train, "*")
plt.legend(['Training Predictions', 'Training Observations'])

Recall that we use a prev 3 day to predict current point. This is characteristics of a lagging controller. This is shown in the plots where the predictions lag behind the actual value. 

In [None]:
val_predictions = model.predict(X_val).flatten()

plt.plot(times_val, val_predictions, "*")
plt.plot(times_val, y_val, "*")
plt.legend(['Validation Predictions', 'Validation Observations'])

In [None]:
test_predictions = model.predict(X_test).flatten()

plt.plot(times_test, test_predictions, "*")
plt.plot(times_test, y_test, "*")
plt.legend(['Testing Predictions', 'Testing Observations'])

In [None]:
plt.plot(times_train, train_predictions, "*")
plt.plot(times_train, y_train, "*")
plt.plot(times_val, val_predictions, "*")
plt.plot(times_val, y_val, "*")
plt.plot(times_test, test_predictions, "*")
plt.plot(times_test, y_test, "*")
plt.legend(['Training Predictions', 
            'Training Observations',
            'Validation Predictions', 
            'Validation Observations',
            'Testing Predictions', 
            'Testing Observations'])