# Pitch Detection Algorithm Analysis

This notebook helps you analyze audio samples and test different pitch detection algorithms.

## Setup

First, make sure you've installed the required packages:
```bash
pip install -r ../requirements.txt
```

In [None]:
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import signal
from scipy.fft import fft, fftfreq
import soundfile as sf
from pathlib import Path

# Set style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

print("Libraries loaded successfully!")

## 1. Load Audio Sample

Load a recorded audio sample from the samples folder.

In [None]:
# List available samples
samples_dir = Path('../samples')
audio_files = list(samples_dir.rglob('*.webm')) + list(samples_dir.rglob('*.wav')) + list(samples_dir.rglob('*.mp3'))

print("Available audio samples:")
for i, file in enumerate(audio_files):
    print(f"{i}: {file.relative_to(samples_dir)}")

# If no files found
if not audio_files:
    print("\nNo audio samples found. Please record some using the Sound2Score app!")

In [None]:
# Load a specific audio file
# Change the index or path to load different files
if audio_files:
    audio_path = audio_files[0]  # Change index to load different file
    
    # Load audio with librosa
    y, sr = librosa.load(audio_path, sr=None)
    
    print(f"Loaded: {audio_path.name}")
    print(f"Sample rate: {sr} Hz")
    print(f"Duration: {len(y) / sr:.2f} seconds")
    print(f"Samples: {len(y)}")

## 2. Visualize Audio Signal

In [None]:
if audio_files:
    fig, axes = plt.subplots(3, 1, figsize=(14, 10))
    
    # Waveform
    librosa.display.waveshow(y, sr=sr, ax=axes[0])
    axes[0].set_title('Waveform')
    axes[0].set_xlabel('Time (s)')
    axes[0].set_ylabel('Amplitude')
    
    # Spectrogram
    D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
    img = librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='hz', ax=axes[1])
    axes[1].set_title('Spectrogram')
    axes[1].set_ylim([0, 4000])  # Focus on piano frequency range
    fig.colorbar(img, ax=axes[1], format='%+2.0f dB')
    
    # Chromagram (shows musical notes over time)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    img2 = librosa.display.specshow(chroma, sr=sr, x_axis='time', y_axis='chroma', ax=axes[2])
    axes[2].set_title('Chromagram')
    fig.colorbar(img2, ax=axes[2])
    
    plt.tight_layout()
    plt.show()

## 3. Pitch Detection Methods

### Method 1: Autocorrelation (Current JS Implementation)

In [None]:
def autocorrelation_pitch(audio_buffer, sample_rate, min_freq=27.5, max_freq=4186):
    """
    Autocorrelation-based pitch detection (similar to current JS implementation)
    """
    # Calculate RMS
    rms = np.sqrt(np.mean(audio_buffer ** 2))
    if rms < 0.01:
        return None
    
    # Autocorrelation
    size = len(audio_buffer)
    max_samples = size // 2
    
    correlation = np.correlate(audio_buffer, audio_buffer, mode='full')
    correlation = correlation[size-1:]
    
    # Normalize
    correlation = correlation / correlation[0]
    
    # Find first peak after zero
    min_period = int(sample_rate / max_freq)
    max_period = int(sample_rate / min_freq)
    
    # Search for peak
    peak_idx = np.argmax(correlation[min_period:max_period]) + min_period
    
    if correlation[peak_idx] > 0.5:  # Threshold for confidence
        frequency = sample_rate / peak_idx
        return frequency
    
    return None

if audio_files:
    # Test on a small window
    window_size = 4096
    test_window = y[:window_size]
    
    freq = autocorrelation_pitch(test_window, sr)
    if freq:
        print(f"Detected frequency (Autocorrelation): {freq:.2f} Hz")
        # Convert to note
        note = librosa.hz_to_note(freq)
        print(f"Note: {note}")
    else:
        print("No pitch detected")

### Method 2: YIN Algorithm (Current Python Backend)

In [None]:
def yin_pitch(audio_buffer, sample_rate, threshold=0.1):
    """
    YIN algorithm for pitch detection
    """
    buffer_size = len(audio_buffer)
    half_size = buffer_size // 2
    
    # Step 1: Calculate difference function
    diff = np.zeros(half_size)
    for tau in range(1, half_size):
        for i in range(half_size):
            diff[tau] += (audio_buffer[i] - audio_buffer[i + tau]) ** 2
    
    # Step 2: Cumulative mean normalized difference
    cmnd = np.zeros(half_size)
    cmnd[0] = 1
    running_sum = 0
    for tau in range(1, half_size):
        running_sum += diff[tau]
        cmnd[tau] = diff[tau] / (running_sum / tau)
    
    # Step 3: Find first minimum below threshold
    tau = 1
    while tau < half_size:
        if cmnd[tau] < threshold:
            while tau + 1 < half_size and cmnd[tau + 1] < cmnd[tau]:
                tau += 1
            frequency = sample_rate / tau
            return frequency
        tau += 1
    
    return None

if audio_files:
    freq = yin_pitch(test_window, sr)
    if freq:
        print(f"Detected frequency (YIN): {freq:.2f} Hz")
        note = librosa.hz_to_note(freq)
        print(f"Note: {note}")
    else:
        print("No pitch detected")

### Method 3: FFT-based Harmonic Product Spectrum

In [None]:
def hps_pitch(audio_buffer, sample_rate, num_harmonics=5):
    """
    Harmonic Product Spectrum method
    """
    # Compute FFT
    fft_data = np.abs(fft(audio_buffer))
    freqs = fftfreq(len(audio_buffer), 1/sample_rate)
    
    # Only use positive frequencies
    positive_freqs = freqs[:len(freqs)//2]
    positive_fft = fft_data[:len(fft_data)//2]
    
    # Initialize HPS with the spectrum
    hps = positive_fft.copy()
    
    # Multiply downsampled versions
    for h in range(2, num_harmonics + 1):
        decimated = positive_fft[::h]
        hps[:len(decimated)] *= decimated
    
    # Find peak
    peak_idx = np.argmax(hps)
    frequency = positive_freqs[peak_idx]
    
    if frequency > 27.5 and frequency < 4186:
        return frequency
    return None

if audio_files:
    freq = hps_pitch(test_window, sr)
    if freq:
        print(f"Detected frequency (HPS): {freq:.2f} Hz")
        note = librosa.hz_to_note(freq)
        print(f"Note: {note}")
    else:
        print("No pitch detected")

### Method 4: Using librosa's piptrack

In [None]:
if audio_files:
    # Extract pitch using librosa
    pitches, magnitudes = librosa.piptrack(y=y, sr=sr, fmin=27.5, fmax=4186)
    
    # Get the pitch with highest magnitude in first frame
    pitch_idx = magnitudes[:, 0].argmax()
    pitch = pitches[pitch_idx, 0]
    
    if pitch > 0:
        print(f"Detected frequency (librosa piptrack): {pitch:.2f} Hz")
        note = librosa.hz_to_note(pitch)
        print(f"Note: {note}")
    else:
        print("No pitch detected")

## 4. Compare All Methods on Full Audio

In [None]:
if audio_files:
    # Analyze audio in frames
    frame_size = 4096
    hop_length = 2048
    
    results = {
        'autocorrelation': [],
        'yin': [],
        'hps': [],
        'times': []
    }
    
    for i in range(0, len(y) - frame_size, hop_length):
        frame = y[i:i+frame_size]
        time = i / sr
        
        results['times'].append(time)
        results['autocorrelation'].append(autocorrelation_pitch(frame, sr))
        results['yin'].append(yin_pitch(frame, sr))
        results['hps'].append(hps_pitch(frame, sr))
    
    # Plot comparison
    fig, ax = plt.subplots(figsize=(14, 6))
    
    for method in ['autocorrelation', 'yin', 'hps']:
        freqs = [f if f else np.nan for f in results[method]]
        ax.plot(results['times'], freqs, label=method.upper(), marker='o', markersize=3, alpha=0.7)
    
    ax.set_xlabel('Time (s)')
    ax.set_ylabel('Frequency (Hz)')
    ax.set_title('Pitch Detection Methods Comparison')
    ax.legend()
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    print("\nMethod Statistics:")
    for method in ['autocorrelation', 'yin', 'hps']:
        valid_detections = [f for f in results[method] if f is not None]
        detection_rate = len(valid_detections) / len(results[method]) * 100
        print(f"{method.upper()}: {detection_rate:.1f}% detection rate")

## 5. Analyze Specific Segment

Pick a time segment to analyze in detail.

In [None]:
if audio_files:
    # Select time range (in seconds)
    start_time = 0.5
    end_time = 1.5
    
    start_sample = int(start_time * sr)
    end_sample = int(end_time * sr)
    segment = y[start_sample:end_sample]
    
    # Visualize segment
    fig, axes = plt.subplots(2, 1, figsize=(14, 8))
    
    # Time domain
    time_axis = np.linspace(start_time, end_time, len(segment))
    axes[0].plot(time_axis, segment)
    axes[0].set_title('Audio Segment (Time Domain)')
    axes[0].set_xlabel('Time (s)')
    axes[0].set_ylabel('Amplitude')
    
    # Frequency domain
    fft_data = np.abs(fft(segment))
    freqs = fftfreq(len(segment), 1/sr)
    
    # Plot only positive frequencies up to 2000 Hz
    positive_mask = (freqs > 0) & (freqs < 2000)
    axes[1].plot(freqs[positive_mask], fft_data[positive_mask])
    axes[1].set_title('Frequency Spectrum')
    axes[1].set_xlabel('Frequency (Hz)')
    axes[1].set_ylabel('Magnitude')
    axes[1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    # Detect pitch in segment
    print("\nPitch Detection Results for Segment:")
    for method_name, method_func in [('Autocorrelation', autocorrelation_pitch), 
                                       ('YIN', yin_pitch), 
                                       ('HPS', hps_pitch)]:
        freq = method_func(segment, sr)
        if freq:
            note = librosa.hz_to_note(freq)
            print(f"{method_name:15s}: {freq:7.2f} Hz -> {note}")
        else:
            print(f"{method_name:15s}: No pitch detected")

## 6. Save Your Findings

Document what you learned and save visualizations to the results folder.

In [None]:
# Save your notes and findings
notes = """
Experiment Date: [DATE]
Audio File: [FILENAME]

Findings:
- 
- 

Best performing method:
- 

Next steps:
- 
"""

# Uncomment to save
# with open('../results/experiment_notes.txt', 'a') as f:
#     f.write(notes)

print("Remember to document your findings!")