# Audio Instrument and Note Classifier - Data Exploration

This notebook helps explore and visualize the audio data used for training.


In [None]:
import sys
sys.path.append('..')

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import librosa
import librosa.display
import os
import pandas as pd
from IPython.display import Audio

import config
from audio_processor import AudioProcessor

sns.set_style('whitegrid')
%matplotlib inline


## 1. Data Statistics


In [None]:
# Count samples per instrument and split
def count_samples(data_dir):
    counts = {}
    for instrument in config.INSTRUMENTS:
        instrument_dir = os.path.join(data_dir, instrument)
        if os.path.exists(instrument_dir):
            files = [f for f in os.listdir(instrument_dir) 
                    if f.lower().endswith(('.wav', '.mp3', '.ogg', '.flac'))]
            counts[instrument] = len(files)
        else:
            counts[instrument] = 0
    return counts

# Count for each split
train_counts = count_samples(config.TRAIN_DIR)
val_counts = count_samples(config.VAL_DIR)
test_counts = count_samples(config.TEST_DIR)

# Create DataFrame
df_counts = pd.DataFrame({
    'Train': train_counts,
    'Validation': val_counts,
    'Test': test_counts
})

print("Dataset Statistics:")
print(df_counts)
print(f"\nTotal samples: {df_counts.sum().sum()}")


In [None]:
# Visualize distribution
df_counts.plot(kind='bar', figsize=(12, 6))
plt.title('Sample Distribution by Instrument and Split')
plt.xlabel('Instrument')
plt.ylabel('Number of Samples')
plt.legend(title='Split')
plt.tight_layout()
plt.show()


## 2. Audio Waveform Visualization


In [None]:
# Load and visualize sample audio files
processor = AudioProcessor()

def visualize_audio(audio_path):
    """Visualize audio waveform and spectrogram"""
    # Load audio
    audio = processor.load_audio(audio_path)
    mel_spec = processor.extract_mel_spectrogram(audio)
    
    # Create figure
    fig, axes = plt.subplots(2, 1, figsize=(14, 8))
    
    # Waveform
    times = np.arange(len(audio)) / config.SAMPLE_RATE
    axes[0].plot(times, audio)
    axes[0].set_title(f'Waveform: {os.path.basename(audio_path)}')
    axes[0].set_xlabel('Time (s)')
    axes[0].set_ylabel('Amplitude')
    axes[0].grid(True)
    
    # Mel Spectrogram
    img = librosa.display.specshow(mel_spec, x_axis='time', y_axis='mel',
                                   sr=config.SAMPLE_RATE, hop_length=config.HOP_LENGTH,
                                   ax=axes[1], cmap='viridis')
    axes[1].set_title('Mel Spectrogram')
    fig.colorbar(img, ax=axes[1], format='%+2.0f dB')
    
    plt.tight_layout()
    plt.show()
    
    # Play audio
    return Audio(audio, rate=config.SAMPLE_RATE)

# Example: visualize a sample from each instrument
for instrument in config.INSTRUMENTS:
    instrument_dir = os.path.join(config.TRAIN_DIR, instrument)
    if os.path.exists(instrument_dir):
        files = [f for f in os.listdir(instrument_dir) 
                if f.lower().endswith('.wav')][:1]
        if files:
            audio_path = os.path.join(instrument_dir, files[0])
            print(f"\n{instrument.upper()}:")
            display(visualize_audio(audio_path))


## 3. Note Distribution Analysis


In [None]:
# Analyze note distribution
def analyze_note_distribution(data_dir):
    note_counts = {note: 0 for note in config.NOTES}
    
    for instrument in config.INSTRUMENTS:
        instrument_dir = os.path.join(data_dir, instrument)
        if not os.path.exists(instrument_dir):
            continue
            
        for filename in os.listdir(instrument_dir):
            if not filename.lower().endswith(('.wav', '.mp3', '.ogg', '.flac')):
                continue
            
            # Extract note from filename
            note = filename.split('_')[0]
            if note in note_counts:
                note_counts[note] += 1
    
    return note_counts

train_note_dist = analyze_note_distribution(config.TRAIN_DIR)

# Plot
plt.figure(figsize=(20, 6))
plt.bar(train_note_dist.keys(), train_note_dist.values())
plt.title('Note Distribution in Training Set')
plt.xlabel('Note')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.grid(axis='y')
plt.tight_layout()
plt.show()

print(f"\nNotes with fewest samples:")
sorted_notes = sorted(train_note_dist.items(), key=lambda x: x[1])
for note, count in sorted_notes[:5]:
    print(f"  {note}: {count} samples")


## 4. Spectrogram Comparison


In [None]:
# Compare spectrograms of the same note from different instruments
def compare_instruments(note='C4'):
    """Compare spectrograms of the same note from different instruments"""
    fig, axes = plt.subplots(len(config.INSTRUMENTS), 1, figsize=(14, 12))
    
    for i, instrument in enumerate(config.INSTRUMENTS):
        instrument_dir = os.path.join(config.TRAIN_DIR, instrument)
        if not os.path.exists(instrument_dir):
            continue
        
        # Find a file with the target note
        files = [f for f in os.listdir(instrument_dir) if f.startswith(note)]
        if not files:
            continue
        
        audio_path = os.path.join(instrument_dir, files[0])
        audio = processor.load_audio(audio_path)
        mel_spec = processor.extract_mel_spectrogram(audio)
        
        # Plot
        img = librosa.display.specshow(mel_spec, x_axis='time', y_axis='mel',
                                      sr=config.SAMPLE_RATE, hop_length=config.HOP_LENGTH,
                                      ax=axes[i], cmap='viridis')
        axes[i].set_title(f'{instrument.upper()} - Note {note}')
        fig.colorbar(img, ax=axes[i], format='%+2.0f dB')
    
    plt.tight_layout()
    plt.show()

compare_instruments('A4')


## 5. Data Augmentation Examples


In [None]:
# Demonstrate data augmentation
def show_augmentation_effects(audio_path):
    """Show effects of different augmentation techniques"""
    # Load original audio
    original_audio = processor.load_audio(audio_path)
    
    # Apply different augmentations
    augmentations = {
        'Original': original_audio,
        'Time Stretch': processor.augment_time_stretch(original_audio, rate=1.1),
        'Add Noise': processor.augment_add_noise(original_audio, snr_db=30),
        'Volume Change': processor.augment_volume(original_audio, gain_db=10)
    }
    
    # Plot
    fig, axes = plt.subplots(len(augmentations), 1, figsize=(14, 12))
    
    for i, (name, audio) in enumerate(augmentations.items()):
        mel_spec = processor.extract_mel_spectrogram(audio)
        img = librosa.display.specshow(mel_spec, x_axis='time', y_axis='mel',
                                      sr=config.SAMPLE_RATE, hop_length=config.HOP_LENGTH,
                                      ax=axes[i], cmap='viridis')
        axes[i].set_title(f'{name} - {os.path.basename(audio_path)}')
        fig.colorbar(img, ax=axes[i], format='%+2.0f dB')
    
    plt.tight_layout()
    plt.show()

# Example
for instrument in config.INSTRUMENTS[:1]:  # Just show first instrument
    instrument_dir = os.path.join(config.TRAIN_DIR, instrument)
    if os.path.exists(instrument_dir):
        files = [f for f in os.listdir(instrument_dir) if f.lower().endswith('.wav')][:1]
        if files:
            show_augmentation_effects(os.path.join(instrument_dir, files[0]))


## 6. Load Metadata (if available)


In [None]:
# Load and display metadata
metadata_path = os.path.join(config.DATA_DIR, 'train_metadata.csv')
if os.path.exists(metadata_path):
    df_metadata = pd.read_csv(metadata_path)
    print("Training Metadata:")
    print(df_metadata.head(10))
    print(f"\nTotal samples: {len(df_metadata)}")
    
    # Group by instrument and note
    print("\nSamples per instrument:")
    print(df_metadata.groupby('instrument').size())
    
    print("\nSamples per note (first 10):")
    print(df_metadata.groupby('note').size().head(10))
else:
    print(f"Metadata file not found: {metadata_path}")
    print("Run prepare_data.py to create metadata files")
