In [1]:
import sys
from pathlib import Path
sys.path.append(str(Path().resolve().parent))

In [4]:
from utils.notes_processing import generate_spectrogram
from models.dataset import GoodSoundsDatabase, GoodSoundsDataset

In [5]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt

# Initial CNN test with just trumpet recordings

We are first going to train a small-scale CNN on soley trumpet note spectrograms to see what kind of results we get.

In [6]:
#Get dataframe for trumpet sounds
db = GoodSoundsDatabase("/Users/dhanush/documents/musaic/good-sounds")
trumpet_df = db.get_sounds_by_instrument("trumpet")
trumpet_df.head()

INFO:models.dataset:Connected to database: /Users/dhanush/documents/musaic/good-sounds/database.sqlite


Unnamed: 0,id,instrument,note,octave,dynamics,recorded_at,location,player,bow_velocity,bridge_position,...,decay,sustain,release,offset,reference,klass,comments,semitone,pitch_reference,file_path
0,1020,trumpet,A#,5,mf,2014-04-24 11:30:00.000000,upf studio,ramon,,,...,,,,,1,good-sound,,70,440.0,/Users/dhanush/documents/musaic/good-sounds/so...
1,1021,trumpet,A,5,mf,2014-04-24 11:30:00.000000,upf studio,ramon,,,...,9195.0,,199445.0,213500.0,1,good-sound,,69,440.0,/Users/dhanush/documents/musaic/good-sounds/so...
2,1022,trumpet,G#,5,mf,2014-04-24 11:30:00.000000,upf studio,ramon,,,...,12271.0,,169546.0,184641.0,1,good-sound,,68,440.0,/Users/dhanush/documents/musaic/good-sounds/so...
3,1023,trumpet,G,5,mf,2014-04-24 11:30:00.000000,upf studio,ramon,,,...,,,,,1,good-sound,,67,440.0,/Users/dhanush/documents/musaic/good-sounds/so...
4,1024,trumpet,F#,5,mf,2014-04-24 11:30:00.000000,upf studio,ramon,,,...,22495.0,,187446.0,201000.0,1,good-sound,,66,440.0,/Users/dhanush/documents/musaic/good-sounds/so...


In [9]:
#Analyze label distribution

print(f"\n Label Distribution:")
print("-" * 40)

label_counts = trumpet_df['klass'].value_counts()
total = len(trumpet_df)

for label, count in label_counts.items():
    percentage = (count / total) * 100
    print(f"{label}: {count} samples ({percentage:.1f}%)")


 Label Distribution:
----------------------------------------
good-sound: 90 samples (14.2%)
bad-dynamics-stability-errors: 62 samples (9.8%)
bad-timbre-stability-errors: 53 samples (8.4%)
bad-pitch: 45 samples (7.1%)
good-attack: 32 samples (5.1%)
bad-attack: 32 samples (5.1%)
bad-attack-air: 29 samples (4.6%)
too-much-air: 29 samples (4.6%)
bad-attack-pitch-up: 29 samples (4.6%)
good-attack-no-picat: 29 samples (4.6%)
bad-pitch-stability-errors: 29 samples (4.6%)
bad-pitch-stability-bend: 29 samples (4.6%)
bad-dynamics-stability-decrescendo: 29 samples (4.6%)
bad-dynamics-stability-crescendo: 27 samples (4.3%)
bad-richness: 26 samples (4.1%)
bad-dynamics-stability-tremolo: 25 samples (4.0%)
bad-attack-pitch-down: 24 samples (3.8%)
good-attack-soft: 13 samples (2.1%)


In [10]:
#Create training, validation, and testing sets
test_size=0.2
val_size=0.2
random_state = 42
stratify = True

# First split: separate test set
stratify_col = trumpet_df['klass'] if stratify else None

train_val_df, test_df = train_test_split(
    trumpet_df, 
    test_size=test_size, 
    random_state=random_state,
    stratify=stratify_col
)

# Second split: separate validation from training
stratify_col_remaining = train_val_df['klass'] if stratify else None

train_df, val_df = train_test_split(
    train_val_df,
    test_size=val_size,
    random_state=random_state,
    stratify=stratify_col_remaining
)

print(f"Data split summary:")
print(f"Total samples: {len(trumpet_df)}")
print(f"Training: {len(train_df)} ({len(train_df)/len(trumpet_df)*100:.1f}%)")
print(f"Validation: {len(val_df)} ({len(val_df)/len(trumpet_df)*100:.1f}%)")
print(f"Testing: {len(test_df)} ({len(test_df)/len(trumpet_df)*100:.1f}%)")

Data split summary:
Total samples: 632
Training: 404 (63.9%)
Validation: 101 (16.0%)
Testing: 127 (20.1%)


In [11]:
#Create label encoder

label_encoder = LabelEncoder()
label_encoder.fit(train_df['klass'])

print(f"\nLabel Encoding:")
print("-" * 20)
for i, label in enumerate(label_encoder.classes_):
    print(f"{i}: {label}")


Label Encoding:
--------------------
0: bad-attack
1: bad-attack-air
2: bad-attack-pitch-down
3: bad-attack-pitch-up
4: bad-dynamics-stability-crescendo
5: bad-dynamics-stability-decrescendo
6: bad-dynamics-stability-errors
7: bad-dynamics-stability-tremolo
8: bad-pitch
9: bad-pitch-stability-bend
10: bad-pitch-stability-errors
11: bad-richness
12: bad-timbre-stability-errors
13: good-attack
14: good-attack-no-picat
15: good-attack-soft
16: good-sound
17: too-much-air


In [12]:
spectrogram_function = generate_spectrogram
cache_spectrograms = False
cache_dir = None
batch_size=32
num_workers=4

# Create datasets
train_dataset = GoodSoundsDataset(
    train_df, spectrogram_function, label_encoder,
    cache_spectrograms=cache_spectrograms, cache_dir=cache_dir
)

val_dataset = GoodSoundsDataset(
    val_df, spectrogram_function, label_encoder,
    cache_spectrograms=cache_spectrograms, cache_dir=cache_dir
)

test_dataset = GoodSoundsDataset(
    test_df, spectrogram_function, label_encoder,
    cache_spectrograms=cache_spectrograms, cache_dir=cache_dir
)

# Create data loaders
train_loader = DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True, 
    num_workers=num_workers, pin_memory=True
)

val_loader = DataLoader(
    val_dataset, batch_size=batch_size, shuffle=False,
    num_workers=num_workers, pin_memory=True
)

test_loader = DataLoader(
    test_dataset, batch_size=batch_size, shuffle=False,
    num_workers=num_workers, pin_memory=True
)

In [13]:
#Test that data loading works correctly
data_loader = train_loader
num_samples = 3
    
print(f"\nTesting data loading with {num_samples} samples...")

for i, batch in enumerate(data_loader):
    if i >= num_samples:
        break
        
    spectrograms = batch['spectrogram']
    labels = batch['label']
    
    print(f"Batch {i+1}:")
    print(f"  Spectrogram shape: {spectrograms.shape}")
    print(f"  Labels shape: {labels.shape}")
    print(f"  Label values: {labels.numpy()}")
    print(f"  Spectrogram range: [{spectrograms.min():.3f}, {spectrograms.max():.3f}]")


Testing data loading with 3 samples...


  file_sample_rate, signal = wavfile.read(filename, mmap=True)
  file_sample_rate, signal = wavfile.read(filename, mmap=True)
  file_sample_rate, signal = wavfile.read(filename, mmap=True)
  file_sample_rate, signal = wavfile.read(filename, mmap=True)
  file_sample_rate, signal = wavfile.read(filename, mmap=True)
  file_sample_rate, signal = wavfile.read(filename, mmap=True)
  file_sample_rate, signal = wavfile.read(filename, mmap=True)
  file_sample_rate, signal = wavfile.read(filename, mmap=True)
  file_sample_rate, signal = wavfile.read(filename, mmap=True)
  file_sample_rate, signal = wavfile.read(filename, mmap=True)
  file_sample_rate, signal = wavfile.read(filename, mmap=True)
  file_sample_rate, signal = wavfile.read(filename, mmap=True)
  file_sample_rate, signal = wavfile.read(filename, mmap=True)
  file_sample_rate, signal = wavfile.read(filename, mmap=True)
  file_sample_rate, signal = wavfile.read(filename, mmap=True)
  file_sample_rate, signal = wavfile.read(filename, mma

RuntimeError: Caught RuntimeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/Users/dhanush/miniconda3/envs/musaic_env/lib/python3.9/site-packages/torch/utils/data/_utils/worker.py", line 349, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
  File "/Users/dhanush/miniconda3/envs/musaic_env/lib/python3.9/site-packages/torch/utils/data/_utils/fetch.py", line 55, in fetch
    return self.collate_fn(data)
  File "/Users/dhanush/miniconda3/envs/musaic_env/lib/python3.9/site-packages/torch/utils/data/_utils/collate.py", line 398, in default_collate
    return collate(batch, collate_fn_map=default_collate_fn_map)
  File "/Users/dhanush/miniconda3/envs/musaic_env/lib/python3.9/site-packages/torch/utils/data/_utils/collate.py", line 171, in collate
    {
  File "/Users/dhanush/miniconda3/envs/musaic_env/lib/python3.9/site-packages/torch/utils/data/_utils/collate.py", line 172, in <dictcomp>
    key: collate(
  File "/Users/dhanush/miniconda3/envs/musaic_env/lib/python3.9/site-packages/torch/utils/data/_utils/collate.py", line 155, in collate
    return collate_fn_map[elem_type](batch, collate_fn_map=collate_fn_map)
  File "/Users/dhanush/miniconda3/envs/musaic_env/lib/python3.9/site-packages/torch/utils/data/_utils/collate.py", line 271, in collate_tensor_fn
    out = elem.new(storage).resize_(len(batch), *list(elem.size()))
RuntimeError: Trying to resize storage that is not resizable


  file_sample_rate, signal = wavfile.read(filename, mmap=True)
  file_sample_rate, signal = wavfile.read(filename, mmap=True)
  file_sample_rate, signal = wavfile.read(filename, mmap=True)
  file_sample_rate, signal = wavfile.read(filename, mmap=True)
  file_sample_rate, signal = wavfile.read(filename, mmap=True)
  file_sample_rate, signal = wavfile.read(filename, mmap=True)
  file_sample_rate, signal = wavfile.read(filename, mmap=True)
  file_sample_rate, signal = wavfile.read(filename, mmap=True)
  file_sample_rate, signal = wavfile.read(filename, mmap=True)
  file_sample_rate, signal = wavfile.read(filename, mmap=True)
  file_sample_rate, signal = wavfile.read(filename, mmap=True)
  file_sample_rate, signal = wavfile.read(filename, mmap=True)
  file_sample_rate, signal = wavfile.read(filename, mmap=True)
  file_sample_rate, signal = wavfile.read(filename, mmap=True)
  file_sample_rate, signal = wavfile.read(filename, mmap=True)
  file_sample_rate, signal = wavfile.read(filename, mma