# Enhanced Preprocessing for Ensemble Learning

This notebook extends the existing preprocessing to generate multiple feature types for ensemble learning.

In [None]:
import os
import uuid
import numpy as np
import librosa
from joblib import Parallel, delayed
import matplotlib.pyplot as plt
from modules.PostgresDBHandler import PostgresDBHandler

In [None]:
# Configuration
base_dir = "./"
intermediate_dir = "ensemble_intermediate_results"
fixedLength = 128
dbParams = {
    "dbname": "mydatabase",
    "user": "myuser",
    "password": "mypassword",
    "host": "postgres_server",
    "port": "5432",
}

# Feature types to generate
FEATURE_TYPES = [
    'mel_spectrogram', 'mfcc', 'chromagram', 'spectral_contrast',
    'tonnetz', 'constant_q', 'cqt', 'stft', 'harmonic_percussive', 'onset_strength'
]

# Create directories for each feature type
for feature_type in FEATURE_TYPES:
    os.makedirs(os.path.join(intermediate_dir, feature_type), exist_ok=True)

In [None]:
# Initialize database and ensure feature types exist
db = PostgresDBHandler(**dbParams)
db.connect()

# Check if feature types exist, if not create them
existing_feature_types = [ft['name'] for ft in db.get_all_feature_types()]

feature_type_params = {
    'mel_spectrogram': {'n_mels': 64, 'fmin': 0, 'fmax': None},
    'mfcc': {'n_mfcc': 8, 'n_mels': 64},
    'chromagram': {'n_chroma': 8},
    'spectral_contrast': {'n_bands': 3},
    'tonnetz': {},
    'constant_q': {'bins_per_octave': 6, 'n_bins': 42},
    'cqt': {'bins_per_octave': 6, 'n_chroma': 42},
    'stft': {'n_fft': 512, 'hop_length': 256},
    'harmonic_percussive': {'margin': 3.0},
    'onset_strength': {'hop_length': 128}
}

for feature_type, params in feature_type_params.items():
    if feature_type not in existing_feature_types:
        description = f"{feature_type.replace('_', ' ').title()} features"
        db.insert_feature_type(feature_type, description, params)
        print(f"Created feature type: {feature_type}")

db.close()

In [None]:
def pad_or_truncate(array, fixed_length):
    if array.shape[1] > fixed_length:
        return array[:, :fixed_length]
    elif array.shape[1] < fixed_length:
        pad_width = fixed_length - array.shape[1]
        return np.pad(array, ((0, 0), (0, pad_width)), mode="constant")
    else:
        return array

In [None]:
def augment_audio(audio_data, sr):
    """Apply audio augmentation techniques."""
    # Time-stretching
    stretched = librosa.effects.time_stretch(audio_data, rate=1.1)
    
    # Pitch-shifting
    pitched = librosa.effects.pitch_shift(audio_data, sr=sr, n_steps=2)
    
    # Adding noise
    noise = np.random.randn(len(audio_data))
    audio_data_noisy = audio_data + 0.005 * noise
    
    # Ensure all augmented data have the same dtype
    audio_data_noisy = audio_data_noisy.astype(np.float32)
    stretched = stretched.astype(np.float32)
    pitched = pitched.astype(np.float32)
    
    augmented_data = {
        "time_stretch": stretched,
        "pitch_shifting": pitched,
        "noise": audio_data_noisy,
    }
    
    return augmented_data

In [None]:
def extract_feature_in_chunks(audio_path, feature_type, sr, chunk_size=10):
    """Extract features from an audio file in smaller chunks."""
    try:
        # Open audio file
        audio_data, _ = librosa.load(audio_path, sr=sr, offset=0, duration=None)
        
        # Get the length of the audio file
        audio_length = len(audio_data)
        
        # Create an empty dictionary to store features
        features = {feature_type: []}
        
        # Process the audio file in chunks
        for start in range(0, audio_length, chunk_size):
            chunk = audio_data[start:start + chunk_size]
            if len(chunk) < chunk_size:  # Padding if chunk is smaller than desired size
                chunk = np.pad(chunk, (0, chunk_size - len(chunk)), mode="constant")
            
            # Adjust n_fft dynamically: make sure it's never larger than the chunk length
            n_fft = min(2048, len(chunk))  # `n_fft` can't be larger than the chunk size
            hop_length = n_fft // 2  # Half-overlap for hop_length
            
            # Check if chunk has enough signal (i.e., it's not silent)
            non_silent_intervals = librosa.effects.split(chunk, top_db=30)  # Detect non-silent parts
            if len(non_silent_intervals) == 0:
                continue  # Skip this chunk if it's silent
            
            # Extract the feature based on feature_type
            if feature_type == 'mel_spectrogram':
                mel_spectrogram = librosa.feature.melspectrogram(y=chunk, sr=sr, n_mels=64)
                features[feature_type].append(librosa.power_to_db(mel_spectrogram, ref=np.max))
                
            elif feature_type == 'mfcc':
                features[feature_type].append(librosa.feature.mfcc(y=chunk, sr=sr, n_mfcc=8))
                
            elif feature_type == 'chromagram':
                features[feature_type].append(librosa.feature.chroma_stft(y=chunk, sr=sr, n_chroma=8))
                
            elif feature_type == 'spectral_contrast':
                features[feature_type].append(librosa.feature.spectral_contrast(y=chunk, sr=sr, n_bands=3))
                
            elif feature_type == 'tonnetz':
                y_harmonic = librosa.effects.harmonic(chunk)
                features[feature_type].append(librosa.feature.tonnetz(y=y_harmonic, sr=sr))
                
            elif feature_type == 'constant_q':
                cqt = librosa.cqt(y=chunk, sr=sr, bins_per_octave=6, n_bins=42)
                features[feature_type].append(librosa.amplitude_to_db(np.abs(cqt), ref=np.max))
                
            elif feature_type == 'cqt':
                features[feature_type].append(librosa.feature.chroma_cqt(y=chunk, sr=sr, bins_per_octave=6, n_chroma=42))
                
            elif feature_type == 'stft':
                stft = librosa.stft(y=chunk, n_fft=n_fft, hop_length=hop_length)
                features[feature_type].append(librosa.amplitude_to_db(np.abs(stft), ref=np.max))
                
            elif feature_type == 'harmonic_percussive':
                y_harmonic, y_percussive = librosa.effects.hpss(chunk, margin=3.0)
                harmonic_stft = librosa.stft(y_harmonic, n_fft=n_fft, hop_length=hop_length)  # Match STFT settings
                features[feature_type].append(librosa.amplitude_to_db(np.abs(harmonic_stft), ref=np.max))
                
            elif feature_type == 'onset_strength':
                onset_env = librosa.onset.onset_strength(y=chunk, sr=sr, hop_length=hop_length)
                features[feature_type].append(onset_env.reshape(1, -1))
            
        # Return the extracted features (concatenating across chunks)
        return np.concatenate(features[feature_type], axis=-1)  # Concatenate features across chunks
        
    except Exception as e:
        print(f"Error extracting features from {audio_path}: {e}")
        return None

In [None]:
def process_file_ensemble(audio_index, db_params):
    """Process a single audio file and generate features incrementally."""
    db = PostgresDBHandler(**db_params)
    db.connect()
    
    try:
        db_data = db.get_audio_file(audio_index)
        audio_path = db_data['filePath']
        sr = db_data["sampleRate"]
        
        # Get feature type IDs
        feature_type_ids = {}
        for feature_type in FEATURE_TYPES:
            feature_type_ids[feature_type] = db.get_feature_type_id(feature_type)
        
        # Process each feature type incrementally
        for feature_type in FEATURE_TYPES:
            print(f"Processing feature: {feature_type}")
            feature_data = extract_feature_in_chunks(audio_path, feature_type, sr)
            
            if feature_data is not None:
                unique_id = uuid.uuid4()
                feature_path = os.path.join(intermediate_dir, feature_type, f"{unique_id}_{feature_type}.npy")
                
                np.save(feature_path, feature_data)
                
                # Insert processed feature into the database
                db.insert_processed_audio(
                    db_data["instrumentID"],
                    db_data["audioID"],
                    fixedLength,
                    feature_type_ids[feature_type],
                    feature_path,
                    "original",
                )
        
        return True
        
    except Exception as e:
        print(f"Error processing audio {audio_index}: {e}")
        return False
    finally:
        db.close()

In [None]:
def apply_ensemble_preprocessing(audios_ids, db_params, n_jobs=-1):
    """Apply preprocessing with all feature types using parallel processing."""
    results = Parallel(n_jobs=n_jobs)(
        delayed(process_file_ensemble)(audio_index, db_params) for audio_index in audios_ids
    )
    
    successful = sum(results)
    total = len(audios_ids)
    print(f"Successfully processed {successful}/{total} audio files")
    
    return results

In [None]:
# Get all audio IDs and process them
db = PostgresDBHandler(**dbParams)
db.connect()
audio_ids = db.get_all_audio_ids()
print(f"Found {len(audio_ids)} audio files to process")
db.close()

In [None]:
apply_ensemble_preprocessing(audio_ids, dbParams)


In [None]:
# Visualization function to compare different feature types
def visualize_features_comparison(audio_file_path, sr=22050):
    """Visualize all feature types for a single audio file."""
    audio, _ = librosa.load(audio_file_path, sr=sr)
    features = extract_feature_in_chunks(audio, sr)
    
    # Create subplots
    n_features = len(features)
    cols = 3
    rows = (n_features + cols - 1) // cols
    
    fig, axes = plt.subplots(rows, cols, figsize=(15, 5*rows))
    axes = axes.flatten() if rows > 1 else [axes] if cols == 1 else axes
    
    for i, (feature_name, feature_data) in enumerate(features.items()):
        ax = axes[i]
        
        if feature_name in ['mel_spectrogram', 'constant_q', 'stft', 'harmonic_percussive']:
            librosa.display.specshow(feature_data, sr=sr, x_axis='time', y_axis='mel', ax=ax)
            ax.set_title(f'{feature_name.replace("_", " ").title()}')
        elif feature_name == 'mfcc':
            librosa.display.specshow(feature_data, x_axis='time', ax=ax)
            ax.set_title('MFCC')
        elif feature_name in ['chromagram', 'cqt']:
            librosa.display.specshow(feature_data, x_axis='time', y_axis='chroma', ax=ax)
            ax.set_title(f'{feature_name.replace("_", " ").title()}')
        elif feature_name == 'spectral_contrast':
            librosa.display.specshow(feature_data, x_axis='time', ax=ax)
            ax.set_title('Spectral Contrast')
        elif feature_name == 'tonnetz':
            librosa.display.specshow(feature_data, x_axis='time', ax=ax)
            ax.set_title('Tonal Centroids')
        elif feature_name == 'onset_strength':
            ax.plot(feature_data[0])
            ax.set_title('Onset Strength')
            ax.set_ylabel('Strength')
            ax.set_xlabel('Time')
    
    # Hide empty subplots
    for i in range(n_features, len(axes)):
        axes[i].set_visible(False)
    
    plt.tight_layout()
    plt.show()

In [None]:
# Statistics about processed data
def print_processing_stats():
    """Print statistics about the processed data."""
    db = PostgresDBHandler(**dbParams)
    db.connect()
    
    print("=== Processing Statistics ===")
    
    # Get counts for each feature type
    for feature_type in FEATURE_TYPES:
        processed_data = db.get_processed_data_by_feature_type(feature_type)
        print(f"{feature_type}: {len(processed_data)} samples")
    
    # Get augmentation statistics
    query = """
    SELECT augmentation, COUNT(*) as count 
    FROM Processed 
    GROUP BY augmentation
    """
    db.execute_query(query)
    augmentation_stats = db.fetchall()
    
    print("\n=== Augmentation Statistics ===")
    for aug, count in augmentation_stats:
        print(f"{aug}: {count} samples")
    
    db.close()

print_processing_stats()