## Analyze data preprocessed using Constant-Q Transform and create several preprocessed datasets with different hyperparameters (in particular, different number of frequency bins per octave)

In [1]:
from src.preprocessing.data_preprocessing import generate_note_labels
from src.preprocessing.data_preprocessing import calculate_cqt_stats
from src.preprocessing.data_preprocessing import process_files_using_cqt

In [2]:
from src.preprocessing.data_utils import find_note_range
from src.preprocessing.data_utils import find_max_length
from src.preprocessing.data_utils import pad_data
from src.preprocessing.data_utils import save_preprocessed_data
from src.preprocessing.data_utils import spectrogram_row_to_image

In [3]:
# Paths to the directories
train_data_dir = '../Data/raw/musicnet/train_data'
train_labels_dir = '../Data/raw/musicnet/train_labels'
test_data_dir = '../Data/raw/musicnet/test_data'
test_labels_dir = '../Data/raw/musicnet/test_labels'

### Explore notes range in dataset

In [4]:
all_label_paths = [train_labels_dir, test_labels_dir]
min_note_value, max_note_value = find_note_range(all_label_paths)
note_range = max_note_value - min_note_value

In [5]:
print('Min note value:', min_note_value)
print('Max note value:', max_note_value)
print('Note range:', note_range)

Min note value: 21
Max note value: 105
Note range: 84


### Create preprocessed dataset with frequency bins per octave = 12

#### Initially process audio files to spectrograms and generate labels

In [6]:
train_mean, train_std_dev = calculate_cqt_stats(train_data_dir, bins_per_octave=12, data_size=320)
X_train, y_train = process_files_using_cqt(train_data_dir, train_labels_dir, generate_note_labels, mean=train_mean, std=train_std_dev, bins_per_octave=12, data_size=320)

test_mean, test_std_dev = calculate_cqt_stats(test_data_dir, bins_per_octave=12, data_size=10)
X_test, y_test = process_files_using_cqt(test_data_dir, test_labels_dir, generate_note_labels, mean=test_mean, std=test_std_dev, bins_per_octave=12, data_size=10)

Calculating CQT stats: 100%|██████████| 320/320 [01:42<00:00,  3.13it/s]
Processing files in '../Data/raw/musicnet/train_data' and '../Data/raw/musicnet/train_labels': 100%|██████████| 320/320 [02:03<00:00,  2.58it/s]
Calculating CQT stats: 100%|██████████| 10/10 [00:01<00:00,  7.51it/s]
Processing files in '../Data/raw/musicnet/test_data' and '../Data/raw/musicnet/test_labels': 100%|██████████| 10/10 [00:01<00:00,  6.24it/s]


In [7]:
print('Number of training examples:', len(X_train))
print('Number of training labels:', len(y_train))
print(f"X_train[0] shape: {X_train[0].shape}")
print(f"y_train[0] shape: {y_train[0].shape}")

Number of training examples: 320
Number of training labels: 320
X_train[0] shape: (19254, 84)
y_train[0] shape: (19254, 88)


In [8]:
for i, sample in enumerate(X_train):
    print(f"Sample {i + 1} shape: {sample.shape}")

Sample 1 shape: (19254, 84)
Sample 2 shape: (10816, 84)
Sample 3 shape: (19144, 84)
Sample 4 shape: (15872, 84)
Sample 5 shape: (23515, 84)
Sample 6 shape: (14024, 84)
Sample 7 shape: (30790, 84)
Sample 8 shape: (21122, 84)
Sample 9 shape: (39817, 84)
Sample 10 shape: (29991, 84)
Sample 11 shape: (33772, 84)
Sample 12 shape: (20486, 84)
Sample 13 shape: (15621, 84)
Sample 14 shape: (9897, 84)
Sample 15 shape: (15999, 84)
Sample 16 shape: (30604, 84)
Sample 17 shape: (20173, 84)
Sample 18 shape: (28234, 84)
Sample 19 shape: (25854, 84)
Sample 20 shape: (18875, 84)
Sample 21 shape: (29139, 84)
Sample 22 shape: (18406, 84)
Sample 23 shape: (12527, 84)
Sample 24 shape: (27880, 84)
Sample 25 shape: (17979, 84)
Sample 26 shape: (13238, 84)
Sample 27 shape: (24902, 84)
Sample 28 shape: (31271, 84)
Sample 29 shape: (27111, 84)
Sample 30 shape: (22102, 84)
Sample 31 shape: (19868, 84)
Sample 32 shape: (13948, 84)
Sample 33 shape: (15140, 84)
Sample 34 shape: (20362, 84)
Sample 35 shape: (21745,

#### Pad data to the same length

In [9]:
max_length = max(find_max_length(X_train), find_max_length(X_test))
print('Max length:', max_length)

Max length: 46040


In [10]:
X_train_padded = pad_data(X_train, max_length)
y_train_padded = pad_data(y_train, max_length)
X_test_padded = pad_data(X_test, max_length)
y_test_padded = pad_data(y_test, max_length)

In [11]:
print('X_train_padded shape:', X_train_padded.shape)
print('y_train_padded shape:', y_train_padded.shape)
print('X_test_padded shape:', X_test_padded.shape)
print('y_test_padded shape:', y_test_padded.shape)


X_train_padded shape: (320, 46040, 84)
y_train_padded shape: (320, 46040, 88)
X_test_padded shape: (10, 46040, 84)
y_test_padded shape: (10, 46040, 88)


#### Reshape data

In [12]:
X_train = X_train_padded.reshape(-1, X_train_padded.shape[2])
y_train = y_train_padded.reshape(-1, 88)
X_test = X_test_padded.reshape(-1, X_test_padded.shape[2])
y_test = y_test_padded.reshape(-1, 88)

In [13]:
print('X_train shape:', X_train.shape)
print('y_train shape:', y_train.shape)
print('X_test shape:', X_test.shape)
print('y_test shape:', y_test.shape)

X_train shape: (14732800, 84)
y_train shape: (14732800, 88)
X_test shape: (460400, 84)
y_test shape: (460400, 88)


#### Save preprocessed data

In [14]:
save_preprocessed_data(X_train, y_train, '../Data/processed/cqt/bins_12/', 'train.h5')
save_preprocessed_data(X_test, y_test, '../Data/processed/cqt/bins_12/', 'test.h5')

Starting to save preprocessed data...
Data successfully saved to ../Data/processed/cqt/bins_12/train.h5
Starting to save preprocessed data...
Data successfully saved to ../Data/processed/cqt/bins_12/test.h5


### Create preprocessed dataset with frequency bins per octave = 24

In [15]:
train_mean, train_std_dev = calculate_cqt_stats(train_data_dir, bins_per_octave=24, data_size=320)
X_train, y_train = process_files_using_cqt(train_data_dir, train_labels_dir, generate_note_labels, mean=train_mean, std=train_std_dev, bins_per_octave=24, data_size=320)

test_mean, test_std_dev = calculate_cqt_stats(test_data_dir, bins_per_octave=24, data_size=10)
X_test, y_test = process_files_using_cqt(test_data_dir, test_labels_dir, generate_note_labels, mean=test_mean, std=test_std_dev, bins_per_octave=24, data_size=10)

Calculating CQT stats: 100%|██████████| 320/320 [02:51<00:00,  1.86it/s]
Processing files in '../Data/raw/musicnet/train_data' and '../Data/raw/musicnet/train_labels': 100%|██████████| 320/320 [03:39<00:00,  1.46it/s]
Calculating CQT stats: 100%|██████████| 10/10 [00:02<00:00,  4.35it/s]
Processing files in '../Data/raw/musicnet/test_data' and '../Data/raw/musicnet/test_labels': 100%|██████████| 10/10 [00:02<00:00,  3.80it/s]


#### Pad data to the same length

In [16]:
print('Number of training examples:', len(X_train))
print('Number of training labels:', len(y_train))
print(f"X_train[0] shape: {X_train[0].shape}")
print(f"y_train[0] shape: {y_train[0].shape}")

Number of training examples: 320
Number of training labels: 320
X_train[0] shape: (19254, 168)
y_train[0] shape: (19254, 88)


In [17]:
max_length = max(find_max_length(X_train), find_max_length(X_test))
print('Max length:', max_length)

Max length: 46040


In [18]:
X_train_padded = pad_data(X_train, max_length)
y_train_padded = pad_data(y_train, max_length)
X_test_padded = pad_data(X_test, max_length)
y_test_padded = pad_data(y_test, max_length)

In [19]:
print('X_train_padded shape:', X_train_padded.shape)
print('y_train_padded shape:', y_train_padded.shape)
print('X_test_padded shape:', X_test_padded.shape)
print('y_test_padded shape:', y_test_padded.shape)

X_train_padded shape: (320, 46040, 168)
y_train_padded shape: (320, 46040, 88)
X_test_padded shape: (10, 46040, 168)
y_test_padded shape: (10, 46040, 88)


#### Reshape data

In [20]:
X_train = X_train_padded.reshape(-1, X_train_padded.shape[2])
y_train = y_train_padded.reshape(-1, 88)
X_test = X_test_padded.reshape(-1, X_test_padded.shape[2])
y_test = y_test_padded.reshape(-1, 88)

In [21]:
print('X_train shape:', X_train.shape)
print('y_train shape:', y_train.shape)
print('X_test shape:', X_test.shape)
print('y_test shape:', y_test.shape)

X_train shape: (14732800, 168)
y_train shape: (14732800, 88)
X_test shape: (460400, 168)
y_test shape: (460400, 88)


#### Save preprocessed data

In [22]:
save_preprocessed_data(X_train, y_train, '../Data/processed/cqt/bins_24/', 'train.h5')
save_preprocessed_data(X_test, y_test, '../Data/processed/cqt/bins_24/', 'test.h5')

Starting to save preprocessed data...
Data successfully saved to ../Data/processed/cqt/bins_24/train.h5
Starting to save preprocessed data...
Data successfully saved to ../Data/processed/cqt/bins_24/test.h5


### Create preprocessed dataset with frequency bins per octave = 36

In [23]:
train_mean, train_std_dev = calculate_cqt_stats(train_data_dir, bins_per_octave=36, data_size=320)
X_train, y_train = process_files_using_cqt(train_data_dir, train_labels_dir, generate_note_labels, mean=train_mean, std=train_std_dev, bins_per_octave=36, data_size=320)

test_mean, test_std_dev = calculate_cqt_stats(test_data_dir, bins_per_octave=36, data_size=10)
X_test, y_test = process_files_using_cqt(test_data_dir, test_labels_dir, generate_note_labels, mean=test_mean, std=test_std_dev, bins_per_octave=36, data_size=10)

Calculating CQT stats: 100%|██████████| 320/320 [04:58<00:00,  1.07it/s]
Processing files in '../Data/raw/musicnet/train_data' and '../Data/raw/musicnet/train_labels': 100%|██████████| 320/320 [05:35<00:00,  1.05s/it]
Calculating CQT stats: 100%|██████████| 10/10 [00:04<00:00,  2.40it/s]
Processing files in '../Data/raw/musicnet/test_data' and '../Data/raw/musicnet/test_labels': 100%|██████████| 10/10 [00:04<00:00,  2.17it/s]


In [24]:
print('Number of training examples:', len(X_train))
print('Number of training labels:', len(y_train))
print(f"X_train[0] shape: {X_train[0].shape}")
print(f"y_train[0] shape: {y_train[0].shape}")

Number of training examples: 320
Number of training labels: 320
X_train[0] shape: (19254, 252)
y_train[0] shape: (19254, 88)


#### Pad data to the same length

In [25]:
max_length = max(find_max_length(X_train), find_max_length(X_test))
print('Max length:', max_length)

Max length: 46040


In [26]:
X_train_padded = pad_data(X_train, max_length)
y_train_padded = pad_data(y_train, max_length)
X_test_padded = pad_data(X_test, max_length)
y_test_padded = pad_data(y_test, max_length)

In [27]:
print('X_train_padded shape:', X_train_padded.shape)
print('y_train_padded shape:', y_train_padded.shape)
print('X_test_padded shape:', X_test_padded.shape)
print('y_test_padded shape:', y_test_padded.shape)

X_train_padded shape: (320, 46040, 252)
y_train_padded shape: (320, 46040, 88)
X_test_padded shape: (10, 46040, 252)
y_test_padded shape: (10, 46040, 88)


#### Reshape data

In [28]:
X_train = X_train_padded.reshape(-1, X_train_padded.shape[2])
y_train = y_train_padded.reshape(-1, 88)
X_test = X_test_padded.reshape(-1, X_test_padded.shape[2])
y_test = y_test_padded.reshape(-1, 88)

In [29]:
print('X_train shape:', X_train.shape)
print('y_train shape:', y_train.shape)
print('X_test shape:', X_test.shape)
print('y_test shape:', y_test.shape)

X_train shape: (14732800, 252)
y_train shape: (14732800, 88)
X_test shape: (460400, 252)
y_test shape: (460400, 88)


#### Save preprocessed data

In [30]:
save_preprocessed_data(X_train, y_train, '../Data/processed/cqt/bins_36/', 'train.h5')
save_preprocessed_data(X_test, y_test, '../Data/processed/cqt/bins_36/', 'test.h5')

Starting to save preprocessed data...
Data successfully saved to ../Data/processed/cqt/bins_36/train.h5
Starting to save preprocessed data...
Data successfully saved to ../Data/processed/cqt/bins_36/test.h5
