# LibriSpeech Data Exploration

This notebook is for exploring the LibriSpeech dataset and understanding the data pipeline for the LMU-based ASR system.

In [None]:
import torch
import torchaudio
import librosa
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datasets import load_dataset

%matplotlib inline

## Load LibriSpeech Dataset

In [None]:
# Load a small sample of the dataset
dataset = load_dataset("librispeech_asr", "clean", split="train.100[:10]")
print(f"Dataset size: {len(dataset)}")
print(f"Features: {dataset.features}")

## Explore Audio Features

In [None]:
# Examine a sample
sample = dataset[0]
print(f"Text: {sample['text']}")
print(f"Audio shape: {sample['audio']['array'].shape}")
print(f"Sample rate: {sample['audio']['sampling_rate']}")

## Mel Spectrogram Conversion

In [None]:
# Convert to mel spectrogram
audio_array = sample['audio']['array']
sample_rate = sample['audio']['sampling_rate']

# Extract mel features
mel_features = librosa.feature.melspectrogram(
    y=audio_array,
    sr=sample_rate,
    n_mels=80,
    hop_length=int(0.010 * sample_rate),
    win_length=int(0.025 * sample_rate)
)

# Convert to log scale
log_mel = librosa.power_to_db(mel_features, ref=np.max)

print(f"Mel spectrogram shape: {log_mel.shape}")

# Plot the mel spectrogram
plt.figure(figsize=(12, 4))
librosa.display.specshow(log_mel, sr=sample_rate, x_axis='time', y_axis='mel')
plt.colorbar(format='%+2.0f dB')
plt.title('Mel Spectrogram')
plt.show()