# TranscribeAI - Data Exploration

This notebook explores audio data and demonstrates basic audio processing techniques used in the TranscribeAI project.

In [None]:
# Import required libraries
import sys
sys.path.append('../src')

import numpy as np
import matplotlib.pyplot as plt
import librosa
import librosa.display
from audio_processing import AudioLoader, FeatureExtractor, PitchDetector, OnsetDetector

%matplotlib inline
plt.style.use('seaborn-v0_8-darkgrid')

## 1. Load Audio File

In [None]:
# Initialize audio loader
audio_loader = AudioLoader(sample_rate=22050)

# Load audio file (replace with your audio file path)
audio_path = '../data/raw/sample.wav'
audio, sr = audio_loader.load_audio(audio_path)

print(f"Audio shape: {audio.shape}")
print(f"Sample rate: {sr} Hz")
print(f"Duration: {len(audio)/sr:.2f} seconds")

## 2. Visualize Waveform

In [None]:
plt.figure(figsize=(14, 4))
librosa.display.waveshow(audio, sr=sr)
plt.title('Audio Waveform')
plt.xlabel('Time (s)')
plt.ylabel('Amplitude')
plt.tight_layout()
plt.show()

## 3. Extract Features

In [None]:
# Initialize feature extractor
feature_extractor = FeatureExtractor(sample_rate=sr)

# Extract mel spectrogram
mel_spec = feature_extractor.extract_mel_spectrogram(audio)

# Visualize
plt.figure(figsize=(14, 5))
librosa.display.specshow(mel_spec, sr=sr, hop_length=512, x_axis='time', y_axis='mel')
plt.colorbar(format='%+2.0f dB')
plt.title('Mel Spectrogram')
plt.tight_layout()
plt.show()

## 4. Pitch Detection

In [None]:
# Initialize pitch detector
pitch_detector = PitchDetector(sample_rate=sr)

# Extract pitch contour
pitch_info = pitch_detector.extract_pitch_contour(audio, smooth=True)

# Visualize pitch contour
plt.figure(figsize=(14, 4))
plt.plot(pitch_info['times'], pitch_info['f0'], label='Pitch (Hz)')
plt.xlabel('Time (s)')
plt.ylabel('Frequency (Hz)')
plt.title('Pitch Contour')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Get pitch statistics
stats = pitch_detector.get_pitch_statistics(pitch_info['f0'])
print("\nPitch Statistics:")
for key, value in stats.items():
    print(f"{key}: {value}")

## 5. Onset Detection

In [None]:
# Initialize onset detector
onset_detector = OnsetDetector(sample_rate=sr)

# Detect onsets
onsets = onset_detector.detect_onsets(audio, units='time')

print(f"Detected {len(onsets)} onsets")
print(f"Onset times: {onsets[:10]}...")  # Show first 10

# Visualize onsets on waveform
plt.figure(figsize=(14, 4))
librosa.display.waveshow(audio, sr=sr, alpha=0.6)
plt.vlines(onsets, -1, 1, color='r', alpha=0.8, linestyle='--', label='Onsets')
plt.title('Audio Waveform with Detected Onsets')
plt.xlabel('Time (s)')
plt.ylabel('Amplitude')
plt.legend()
plt.tight_layout()
plt.show()

# Get onset statistics
duration = len(audio) / sr
onset_stats = onset_detector.get_onset_statistics(onsets, duration)
print("\nOnset Statistics:")
for key, value in onset_stats.items():
    print(f"{key}: {value}")

## 6. Combined Visualization

In [None]:
fig, axes = plt.subplots(3, 1, figsize=(14, 10))

# Waveform with onsets
librosa.display.waveshow(audio, sr=sr, ax=axes[0], alpha=0.6)
axes[0].vlines(onsets, -1, 1, color='r', alpha=0.8, linestyle='--')
axes[0].set_title('Waveform with Onsets')
axes[0].set_xlabel('Time (s)')

# Mel spectrogram
img = librosa.display.specshow(mel_spec, sr=sr, hop_length=512, 
                                x_axis='time', y_axis='mel', ax=axes[1])
axes[1].set_title('Mel Spectrogram')
plt.colorbar(img, ax=axes[1], format='%+2.0f dB')

# Pitch contour
axes[2].plot(pitch_info['times'], pitch_info['f0'], color='blue', linewidth=2)
axes[2].set_title('Pitch Contour')
axes[2].set_xlabel('Time (s)')
axes[2].set_ylabel('Frequency (Hz)')
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 7. Convert to MIDI Notes

In [None]:
# Quantize pitch to MIDI notes
midi_notes = pitch_detector.quantize_pitch(pitch_info['midi_notes'])

# Filter out NaN values
valid_mask = ~np.isnan(midi_notes)
valid_midi = midi_notes[valid_mask]

print(f"Detected MIDI note range: {int(np.min(valid_midi))} to {int(np.max(valid_midi))}")
print(f"Note range: {pitch_detector.midi_to_note_name(np.min(valid_midi))} to {pitch_detector.midi_to_note_name(np.max(valid_midi))}")

# Plot MIDI notes histogram
plt.figure(figsize=(12, 4))
plt.hist(valid_midi, bins=50, edgecolor='black', alpha=0.7)
plt.xlabel('MIDI Note Number')
plt.ylabel('Frequency')
plt.title('Distribution of Detected MIDI Notes')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## Summary

This notebook demonstrated:
1. Loading and visualizing audio data
2. Extracting audio features (mel spectrogram)
3. Detecting pitch using pYIN algorithm
4. Detecting note onsets
5. Converting pitch to MIDI notes

These techniques form the foundation of the TranscribeAI transcription pipeline.