## Analyze data preprocessed using Constant-Q Transform and create several preprocessed datasets with different hyperparameters (in particular, different number of frequency bins per octave)

In [1]:
from src.preprocessing.data_preprocessing import generate_note_with_instrument_labels
from src.preprocessing.data_preprocessing import calculate_cqt_stats
from src.preprocessing.data_preprocessing import process_files_using_cqt
from src.preprocessing.data_preprocessing import calculate_stft_stats
from src.preprocessing.data_preprocessing import process_files_using_stft

In [2]:
from src.preprocessing.data_utils import find_max_length
from src.preprocessing.data_utils import pad_data
from src.preprocessing.data_utils import save_preprocessed_data

In [3]:
import numpy as np

In [4]:
# Paths to the directories
train_data_dir = '../Data/raw/musicnet/train_data'
train_labels_dir = '../Data/raw/musicnet/train_labels'
test_data_dir = '../Data/raw/musicnet/test_data'
test_labels_dir = '../Data/raw/musicnet/test_labels'

### Create preprocessed dataset with CQT frequency bins per octave = 24

#### Initially process audio files to spectrograms and generate labels

In [5]:
train_mean, train_std_dev = calculate_cqt_stats(train_data_dir, bins_per_octave=24, data_size=320)
X_train, y_train = process_files_using_cqt(train_data_dir, train_labels_dir, generate_note_with_instrument_labels, mean=train_mean, std=train_std_dev, bins_per_octave=24, data_size=320)

test_mean, test_std_dev = calculate_cqt_stats(test_data_dir, bins_per_octave=24, data_size=10)
X_test, y_test = process_files_using_cqt(test_data_dir, test_labels_dir, generate_note_with_instrument_labels, mean=test_mean, std=test_std_dev, bins_per_octave=24, data_size=10)

Calculating CQT stats: 100%|██████████| 320/320 [02:44<00:00,  1.95it/s]
Processing files in '../Data/raw/musicnet/train_data' and '../Data/raw/musicnet/train_labels': 100%|██████████| 320/320 [03:19<00:00,  1.60it/s]
Calculating CQT stats: 100%|██████████| 10/10 [00:02<00:00,  4.62it/s]
Processing files in '../Data/raw/musicnet/test_data' and '../Data/raw/musicnet/test_labels': 100%|██████████| 10/10 [00:02<00:00,  3.84it/s]


In [6]:
print('Number of training examples:', len(X_train))
print('Number of training labels:', len(y_train))
print(f"X_train[0] shape: {X_train[0].shape}")
print(f"y_train[0] shape: {y_train[0].shape}")

Number of training examples: 320
Number of training labels: 320
X_train[0] shape: (19254, 168)
y_train[0] shape: (19254, 1056)


#### Pad data to the same length

In [7]:
max_length = max(find_max_length(X_train), find_max_length(X_test))
print('Max length:', max_length)

Max length: 46040


In [8]:
X_train_padded = pad_data(X_train, max_length)
y_train_padded = pad_data(y_train, max_length, return_type=np.int8)
X_test_padded = pad_data(X_test, max_length)
y_test_padded = pad_data(y_test, max_length, return_type=np.int8)

In [9]:
print('X_train_padded shape:', X_train_padded.shape)
print('y_train_padded shape:', y_train_padded.shape)
print('X_test_padded shape:', X_test_padded.shape)
print('y_test_padded shape:', y_test_padded.shape)

X_train_padded shape: (320, 46040, 168)
y_train_padded shape: (320, 46040, 1056)
X_test_padded shape: (10, 46040, 168)
y_test_padded shape: (10, 46040, 1056)


#### Reshape data

In [10]:
X_train = X_train_padded.reshape(-1, X_train_padded.shape[2])
y_train = y_train_padded.reshape(-1, 88 * 12)  # 88 notes * 11 instruments + 1 note was not played
X_test = X_test_padded.reshape(-1, X_test_padded.shape[2])
y_test = y_test_padded.reshape(-1, 88 * 12)  # 88 notes * 11 instruments + 1 note was not played

In [11]:
print('X_train shape:', X_train.shape)
print('y_train shape:', y_train.shape)
print('X_test shape:', X_test.shape)
print('y_test shape:', y_test.shape)

X_train shape: (14732800, 168)
y_train shape: (14732800, 1056)
X_test shape: (460400, 168)
y_test shape: (460400, 1056)


#### Save preprocessed data

In [12]:
save_preprocessed_data(X_train, y_train, '../Data/processed/final_data/cqt_bins_24/', 'train.h5')
save_preprocessed_data(X_test, y_test, '../Data/processed/final_data/cqt_bins_24/', 'test.h5')

Starting to save preprocessed data...
Data successfully saved to ../Data/processed/final_data/cqt_bins_24/train.h5
Starting to save preprocessed data...
Data successfully saved to ../Data/processed/final_data/cqt_bins_24/test.h5


### Create preprocessed dataset with CQT frequency bins per octave = 36

#### Initially process audio files to spectrograms and generate labels

In [13]:
train_mean, train_std_dev = calculate_cqt_stats(train_data_dir, bins_per_octave=36, data_size=320)
X_train, y_train = process_files_using_cqt(train_data_dir, train_labels_dir, generate_note_with_instrument_labels, mean=train_mean, std=train_std_dev, bins_per_octave=36, data_size=320)

test_mean, test_std_dev = calculate_cqt_stats(test_data_dir, bins_per_octave=36, data_size=10)
X_test, y_test = process_files_using_cqt(test_data_dir, test_labels_dir, generate_note_with_instrument_labels, mean=test_mean, std=test_std_dev, bins_per_octave=36, data_size=10)

Calculating CQT stats: 100%|██████████| 320/320 [05:08<00:00,  1.04it/s]
Processing files in '../Data/raw/musicnet/train_data' and '../Data/raw/musicnet/train_labels':  52%|█████▏    | 166/320 [02:41<02:30,  1.03it/s]


KeyboardInterrupt: 

In [None]:
print('Number of training examples:', len(X_train))
print('Number of training labels:', len(y_train))
print(f"X_train[0] shape: {X_train[0].shape}")
print(f"y_train[0] shape: {y_train[0].shape}")

#### Pad data to the same length

In [None]:
max_length = max(find_max_length(X_train), find_max_length(X_test))
print('Max length:', max_length)

In [None]:
X_train_padded = pad_data(X_train, max_length)
y_train_padded = pad_data(y_train, max_length, return_type=np.int8)
X_test_padded = pad_data(X_test, max_length)
y_test_padded = pad_data(y_test, max_length, return_type=np.int8)

In [None]:
print('X_train_padded shape:', X_train_padded.shape)
print('y_train_padded shape:', y_train_padded.shape)
print('X_test_padded shape:', X_test_padded.shape)
print('y_test_padded shape:', y_test_padded.shape)

#### Reshape data

In [None]:
X_train = X_train_padded.reshape(-1, X_train_padded.shape[2])
y_train = y_train_padded.reshape(-1, 88 * 12)  # 88 notes * 11 instruments + 1 note was not played
X_test = X_test_padded.reshape(-1, X_test_padded.shape[2])
y_test = y_test_padded.reshape(-1, 88 * 12)  # 88 notes * 11 instruments + 1 note was not played

In [None]:
print('X_train shape:', X_train.shape)
print('y_train shape:', y_train.shape)
print('X_test shape:', X_test.shape)
print('y_test shape:', y_test.shape)

#### Save preprocessed data

In [None]:
save_preprocessed_data(X_train, y_train, '../Data/processed/final_data/cqt_bins_36/', 'train.h5')
save_preprocessed_data(X_test, y_test, '../Data/processed/final_data/cqt_bins_36/', 'test.h5')