# Enhanced Preprocessing for Ensemble Learning

This notebook extends the existing preprocessing to generate multiple feature types for ensemble learning.

In [11]:
import os
import uuid
from tqdm import tqdm
import random
from collections import defaultdict
import numpy as np
import librosa
from joblib import Parallel, delayed
import matplotlib.pyplot as plt
from modules.PostgresDBHandler import PostgresDBHandler
import warnings

In [12]:
# Configuration
base_dir = "./"
intermediate_dir = "ensemble_intermediate_results"
fixedLength = 128
dbParams = {
    "dbname": "mydatabase",
    "user": "myuser",
    "password": "mypassword",
    "host": "postgres_server",
    "port": "5432",
}

# Feature types to generate
FEATURE_TYPES = [
    'mel_spectrogram', 'mfcc', 'chromagram', 'spectral_contrast',
    'tonnetz', 'constant_q', 'cqt', 'stft', 'harmonic_percussive', 'onset_strength'
]

# Create directories for each feature type
for feature_type in FEATURE_TYPES:
    os.makedirs(os.path.join(intermediate_dir, feature_type), exist_ok=True)

In [13]:
# Initialize database and ensure feature types exist
db = PostgresDBHandler(**dbParams)
db.connect()

# Check if feature types exist, if not create them
existing_feature_types = [ft['name'] for ft in db.get_all_feature_types()]

FEATURE_SHAPES = {
    'mel_spectrogram': (64, 128),
    'mfcc': (8, 128),
    'chromagram': (8, 128),
    'spectral_contrast': (3, 128),
    'tonnetz': (6, 128),
    'constant_q': (42, 128),
    'cqt': (42, 128),
    'stft': (512, 128),
    'harmonic_percussive': (1025, 128),
    'onset_strength': (1, 128)
}

feature_type_params = {
    'mel_spectrogram': {'n_mels': 64, 'fmin': 0, 'fmax': None},
    'mfcc': {'n_mfcc': 8, 'n_mels': 64},
    'chromagram': {'n_chroma': 8},
    'spectral_contrast': {'n_bands': 2},  # To get 3 bands in output (n_bands+1)
    'tonnetz': {},
    'constant_q': {'bins_per_octave': 6, 'n_bins': 42},  # 7 octaves x 6 bins/octave
    'cqt': {'bins_per_octave': 12, 'n_bins': 84},  # 7 octaves x 12 bins/octave (standard musical scale)
    'stft': {'n_fft': 512, 'hop_length': 256},
    'harmonic_percussive': {'margin': 3.0, 'n_fft': 2048, 'hop_length': 1024},
    'onset_strength': {'hop_length': 128}
}



for feature_type, params in feature_type_params.items():
    if feature_type not in existing_feature_types:
        description = f"{feature_type.replace('_', ' ').title()} features"
        db.insert_feature_type(feature_type, description, params)
        print(f"Created feature type: {feature_type}")

db.close()

In [None]:
def pad_or_truncate(array, fixed_length):
    # If fixed_length is a tuple (e.g., (freq_bins, time_steps)), get the time_steps
    if isinstance(fixed_length, tuple):
        fixed_length = fixed_length[1]

    # Defensive check: if array has fewer than 2 dims, raise error
    if array.ndim < 2:
        raise ValueError("Input array must have at least 2 dimensions")

    current_length = array.shape[1]

    if current_length > fixed_length:
        return array[:, :fixed_length]
    elif current_length < fixed_length:
        pad_width = fixed_length - current_length
        return np.pad(array, ((0, 0), (0, pad_width)), mode="constant")
    else:
        return array


In [15]:
def augment_audio(audio_data, sr):
    """Apply audio augmentation techniques."""
    # Time-stretching
    stretched = librosa.effects.time_stretch(audio_data, rate=1.1)
    
    # Pitch-shifting
    pitched = librosa.effects.pitch_shift(audio_data, sr=sr, n_steps=2)
    
    # Adding noise
    noise = np.random.randn(len(audio_data))
    audio_data_noisy = audio_data + 0.005 * noise
    
    # Ensure all augmented data have the same dtype
    audio_data_noisy = audio_data_noisy.astype(np.float32)
    stretched = stretched.astype(np.float32)
    pitched = pitched.astype(np.float32)
    
    augmented_data = {
        "time_stretch": stretched,
        "pitch_shifting": pitched,
        "noise": audio_data_noisy,
    }
    
    return augmented_data

In [16]:
def extract_feature_in_chunks(audio_data, sr, chunk_size=3.0, hop_size=1.5):

    feature_outputs = defaultdict(list)
    total_length = len(audio_data)
    chunk_sample_length = int(chunk_size * sr)
    hop_sample_length = int(hop_size * sr)

    def process_chunk(start):
        end = start + chunk_sample_length
        if end > total_length:
            chunk = np.pad(audio_data[start:], (0, end - total_length))
        else:
            chunk = audio_data[start:end]

        if len(chunk) < 64 or np.max(np.abs(chunk)) < 1e-5:
            return None

        chunk_features = {}

        for feature_type, params in feature_type_params.items():
            try:
                local_params = dict(params)
                if 'n_fft' in local_params:
                    local_params['n_fft'] = min(local_params['n_fft'], len(chunk))

                if feature_type == 'mel_spectrogram':
                    feat = librosa.feature.melspectrogram(y=chunk, sr=sr, **local_params)
                elif feature_type == 'mfcc':
                    feat = librosa.feature.mfcc(y=chunk, sr=sr, **local_params)
                elif feature_type == 'chromagram':
                    feat = librosa.feature.chroma_stft(y=chunk, sr=sr, **local_params)
                elif feature_type == 'spectral_contrast':
                    feat = librosa.feature.spectral_contrast(y=chunk, sr=sr, **local_params)
                elif feature_type == 'tonnetz':
                    y_harmonic = librosa.effects.harmonic(chunk)
                    feat = librosa.feature.tonnetz(y=y_harmonic, sr=sr)
                elif feature_type == 'constant_q':
                    local_params['n_bins'] = 42
                    local_params['bins_per_octave'] = 42
                    feat = np.abs(librosa.cqt(y=chunk, sr=sr, **local_params))
                elif feature_type == 'cqt':
                    feat = librosa.feature.chroma_cqt(y=chunk, sr=sr)
                elif feature_type == 'stft':
                    feat = np.abs(librosa.stft(y=chunk, **local_params))
                elif feature_type == 'harmonic_percussive':
                    harmonic, percussive = librosa.effects.hpss(chunk)
                    feat = np.vstack([harmonic, percussive])
                elif feature_type == 'onset_strength':
                    feat = librosa.onset.onset_strength(y=chunk, sr=sr, **local_params)
                    feat = feat[np.newaxis, :]
                else:
                    continue

                chunk_features[feature_type] = feat

            except Exception as e:
                print(f"[WARN] Feature extraction failed for {feature_type} on chunk starting at {start}: {e}")
                continue

        return chunk_features

    # Run parallel processing on chunks
    chunk_results = Parallel(n_jobs=-1)(
        delayed(process_chunk)(start)
        for start in range(0, total_length, hop_sample_length)
    )

    for chunk_result in chunk_results:
        if chunk_result is None:
            continue
        for feature_type, feat in chunk_result.items():
            feature_outputs[feature_type].append(feat)

    processed_features = {}
    for feature_name, chunks in feature_outputs.items():
        if not chunks:
            continue
        feat = np.concatenate(chunks, axis=1)
        target_shape = FEATURE_SHAPES[feature_name]
        feat = pad_or_truncate(feat, target_shape)
        processed_features[feature_name] = feat

    return processed_features

In [21]:
def process_file_ensemble(audio_index, db_params):
    import traceback
    db = PostgresDBHandler(**db_params)
    db.connect()
    warnings.filterwarnings("ignore")
    try:
        db_data = db.get_audio_file(audio_index)
        audio_path = db_data['filePath']
        sr = db_data["sampleRate"]

        if not os.path.exists(audio_path):
            raise FileNotFoundError(f"Audio file not found: {audio_path}")

        y, _ = librosa.load(audio_path, sr=None)

        if y is None or len(y) == 0:
            raise ValueError(f"Empty audio data: {audio_path}")

        feature_type_ids = {ftype: db.get_feature_type_id(ftype) for ftype in FEATURE_TYPES}
        features = extract_feature_in_chunks(y, sr)

        for feature_type, data in features.items():
            unique_id = uuid.uuid4()
            feature_path = os.path.join(intermediate_dir, feature_type, f"{unique_id}_{feature_type}.npy")
            os.makedirs(os.path.dirname(feature_path), exist_ok=True)
            np.save(feature_path, data)

            db.insert_processed_audio(
                db_data["instrumentID"],
                db_data["audioID"],
                fixedLength,
                feature_type_ids[feature_type],
                feature_path,
                "original",
            )

        return True

    except Exception as e:
        print(f"Error processing audio {audio_index}: {e}")
        traceback.print_exc()
        return False
    finally:
        db.close()


In [22]:
def apply_ensemble_preprocessing(audios_ids, db_params, n_jobs=-1):
    results = Parallel(n_jobs=n_jobs)(
        delayed(process_file_ensemble)(audio_index, db_params) for audio_index in tqdm(audios_ids)
    )
    successful = sum(results)
    total = len(audios_ids)
    print(f"Successfully processed {successful}/{total} audio files")
    return results


In [23]:
# Get all audio IDs and process them
db = PostgresDBHandler(**dbParams)
db.connect()
audio_ids = db.get_all_audio_ids()
print(f"Found {len(audio_ids)} audio files to process")
db.close()

Found 100 audio files to process


In [24]:
apply_ensemble_preprocessing(audio_ids, dbParams)


  0%|          | 0/100 [00:00<?, ?it/s]Traceback (most recent call last):
  File "/tmp/ipykernel_301/2951497862.py", line 20, in process_file_ensemble
  File "/tmp/ipykernel_301/1889092299.py", line 80, in extract_feature_in_chunks
  File "/tmp/ipykernel_301/3454397435.py", line 2, in pad_or_truncate
TypeError: '>' not supported between instances of 'int' and 'tuple'
 32%|███▏      | 32/100 [00:02<00:05, 13.04it/s]Traceback (most recent call last):
  File "/tmp/ipykernel_301/2951497862.py", line 20, in process_file_ensemble
  File "/tmp/ipykernel_301/1889092299.py", line 80, in extract_feature_in_chunks
  File "/tmp/ipykernel_301/3454397435.py", line 2, in pad_or_truncate
TypeError: '>' not supported between instances of 'int' and 'tuple'
Traceback (most recent call last):
  File "/tmp/ipykernel_301/2951497862.py", line 20, in process_file_ensemble
  File "/tmp/ipykernel_301/1889092299.py", line 80, in extract_feature_in_chunks
  File "/tmp/ipykernel_301/3454397435.py", line 2, in pad_

Successfully processed 0/100 audio files


Traceback (most recent call last):
  File "/tmp/ipykernel_301/2951497862.py", line 20, in process_file_ensemble
  File "/tmp/ipykernel_301/1889092299.py", line 80, in extract_feature_in_chunks
  File "/tmp/ipykernel_301/3454397435.py", line 2, in pad_or_truncate
TypeError: '>' not supported between instances of 'int' and 'tuple'
Traceback (most recent call last):
  File "/tmp/ipykernel_301/2951497862.py", line 20, in process_file_ensemble
  File "/tmp/ipykernel_301/1889092299.py", line 80, in extract_feature_in_chunks
  File "/tmp/ipykernel_301/3454397435.py", line 2, in pad_or_truncate
TypeError: '>' not supported between instances of 'int' and 'tuple'
Traceback (most recent call last):
  File "/tmp/ipykernel_301/2951497862.py", line 20, in process_file_ensemble
  File "/tmp/ipykernel_301/1889092299.py", line 80, in extract_feature_in_chunks
  File "/tmp/ipykernel_301/3454397435.py", line 2, in pad_or_truncate
TypeError: '>' not supported between instances of 'int' and 'tuple'


[False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False]

In [None]:
# Visualization function to compare different feature types
def visualize_features_comparison(audio_file_path, sr=22050):
    """Visualize all feature types for a single audio file."""
    features = {}
    for feature_type in FEATURE_TYPES:
        feature_data = extract_feature_in_chunks(audio_file_path, feature_type, sr)
        if feature_data is not None:
            features[feature_type] = feature_data
    
    n_features = len(features)
    cols = 3
    rows = (n_features + cols - 1) // cols
    
    fig, axes = plt.subplots(rows, cols, figsize=(15, 5*rows))
    axes = axes.flatten() if rows > 1 else [axes] if cols == 1 else axes
    
    for i, (feature_name, feature_data) in enumerate(features.items()):
        ax = axes[i]
        
        if feature_name in ['mel_spectrogram', 'constant_q', 'stft', 'harmonic_percussive']:
            librosa.display.specshow(feature_data, sr=sr, x_axis='time', y_axis='mel', ax=ax)
            ax.set_title(f'{feature_name.replace("_", " ").title()}')
        elif feature_name == 'mfcc':
            librosa.display.specshow(feature_data, x_axis='time', ax=ax)
            ax.set_title('MFCC')
        elif feature_name in ['chromagram', 'cqt']:
            librosa.display.specshow(feature_data, x_axis='time', y_axis='chroma', ax=ax)
            ax.set_title(f'{feature_name.replace("_", " ").title()}')
        elif feature_name == 'spectral_contrast':
            librosa.display.specshow(feature_data, x_axis='time', ax=ax)
            ax.set_title('Spectral Contrast')
        elif feature_name == 'tonnetz':
            librosa.display.specshow(feature_data, x_axis='time', ax=ax)
            ax.set_title('Tonal Centroids')
        elif feature_name == 'onset_strength':
            ax.plot(feature_data[0])
            ax.set_title('Onset Strength')
            ax.set_ylabel('Strength')
            ax.set_xlabel('Time')
    
    # Hide empty subplots if any
    for i in range(n_features, len(axes)):
        axes[i].set_visible(False)
    
    plt.tight_layout()
    plt.show()

In [None]:
db = PostgresDBHandler(**dbParams)
db.connect()
audio_files = [db.get_audio_file(audio_id)['filePath'] for audio_id in db.get_all_audio_ids()]
db.close()

random_audio_path = random.choice(audio_files)
visualize_features_comparison(random_audio_path, sr=22050)

In [None]:
# Statistics about processed data
def print_processing_stats():
    """Print statistics about the processed data."""
    db = PostgresDBHandler(**dbParams)
    db.connect()
    
    print("=== Processing Statistics ===")
    
    # Get counts for each feature type
    for feature_type in FEATURE_TYPES:
        processed_data = db.get_processed_data_by_feature_type(feature_type)
        print(f"{feature_type}: {len(processed_data)} samples")
    
    # Get augmentation statistics
    query = """
    SELECT augmentation, COUNT(*) as count 
    FROM Processed 
    GROUP BY augmentation
    """
    db.execute_query(query)
    augmentation_stats = db.fetchall()
    
    print("\n=== Augmentation Statistics ===")
    for aug, count in augmentation_stats:
        print(f"{aug}: {count} samples")
    
    db.close()

print_processing_stats()