# Model Training for Piano Arrangement

This notebook processes audio files, generates spectrograms, trains a machine learning model, and generates an arrangement from the trained model.

In [None]:
# Importing necessary libraries
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

import librosa
import librosa.display

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, LSTM, Dense, TimeDistributed, Reshape
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
# Defining directories
BASE_DIR = './dataset/instrumental_only/'
OUTPUT_DIR = os.path.join(BASE_DIR, 'spectrograms')
os.makedirs(OUTPUT_DIR, exist_ok=True)

In [None]:
# Function to generate and save spectrogram
def generate_mel_spectrogram(audio_path, save_path):
    y, sr = librosa.load(audio_path, sr=None)
    S = librosa.feature.melspectrogram(y, sr=sr, n_mels=128)
    spectrogram_db = librosa.power_to_db(S, ref=np.max)
    np.save(save_path, spectrogram_db)
    return spectrogram_db

In [None]:
# Function to display spectrogram
def plot_spectrogram(spectrogram_db, title):
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(spectrogram_db, x_axis='time', y_axis='mel', cmap='viridis')
    plt.colorbar(format='%+2.0f dB')
    plt.title(title)
    plt.tight_layout()
    plt.show()

In [None]:
# Processing audio files
def process_audios(original_dir, piano_dir):
    original_files = [f for f in os.listdir(original_dir) if f.endswith(('.mp3', '.wav'))]
    piano_files = [f for f in os.listdir(piano_dir) if f.endswith(('.mp3', '.wav'))]

    original_specs = []
    piano_specs = []

    for original_file, piano_file in zip(original_files, piano_files):
        original_spec = generate_mel_spectrogram(os.path.join(original_dir, original_file), os.path.join(OUTPUT_DIR, 'original', f'{original_file}.npy'))
        piano_spec = generate_mel_spectrogram(os.path.join(piano_dir, piano_file), os.path.join(OUTPUT_DIR, 'piano', f'{piano_file}.npy'))
        original_specs.append(original_spec)
        piano_specs.append(piano_spec)

    return np.array(original_specs), np.array(piano_specs)

In [None]:
# Defining the model (CNN + LSTM)
def build_model(input_shape):
    model = Sequential([
        Reshape((*input_shape, 1), input_shape=input_shape),
        Conv2D(32, (3, 3), activation='relu', padding='same'),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu', padding='same'),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(256, activation='relu'),
        Reshape((input_shape[0], -1)), 
        LSTM(128, return_sequences=True),
        TimeDistributed(Dense(input_shape[1]))
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

In [None]:
# Conversion of spectrogram to audio
def spectrogram_to_audio(spectrogram_db, sr=22050):
    spectogram = librosa.db_to_power(spectrogram_db)
    audio = librosa.feature.inverse.mel_to_audio(spectogram, sr=sr)
    return audio

In [None]:
# Input directories
original_dir = os.path.join(BASE_DIR, 'original')
piano_dir = os.path.join(BASE_DIR, 'piano')

# Processing audio files
original_specs, piano_specs = process_audios(original_dir, piano_dir)

# Dividing data into train and test
X_train, X_test, y_train, y_test = train_test_split(original_specs, piano_specs, test_size=0.1, random_state=42)

# Dividing data into train and validation
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Checking the length of the data
len(X_train), len(X_val), len(X_test)

In [None]:
# Building and summarizing the model
model = build_model(X_train.shape[1:])
model.summary()

In [None]:
# Early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

In [None]:
# Training the model
model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=200, callbacks=[early_stopping])

In [None]:
# Generating an arrangement
predicted_spec = model.predict(X_test)
generated_audio = spectrogram_to_audio(predicted_spec[0])

# Saving the generated audio
librosa.output.write_wav('generated_arrangement.wav', generated_audio, sr=22050)