In [3]:
import os
import librosa
import csv
from tqdm import tqdm

DATA_DIR = "../data/librispeech/LibriSpeech/dev-clean"
OUT_CSV = "../data/librispeech/dev_clean_manifest.csv"

rows = []

for speaker in tqdm(os.listdir(DATA_DIR)):
    speaker_path = os.path.join(DATA_DIR, speaker)
    for chapter in os.listdir(speaker_path):
        chapter_path = os.path.join(speaker_path, chapter)

        # load transcript file
        trans_file = [f for f in os.listdir(chapter_path) if f.endswith(".trans.txt")][0]
        trans_path = os.path.join(chapter_path, trans_file)

        transcripts = {}
        with open(trans_path, "r") as f:
            for line in f:
                utt_id, text = line.strip().split(" ", 1)
                transcripts[utt_id] = text

        # iterate audio files
        for audio_file in os.listdir(chapter_path):
            if audio_file.endswith(".flac"):
                utt_id = audio_file.replace(".flac", "")
                audio_path = os.path.join(chapter_path, audio_file)

                y, sr = librosa.load(audio_path, sr=None)
                duration = len(y) / sr

                rows.append([
                    os.path.abspath(audio_path).replace("\\", "/"),
                    transcripts[utt_id],
                    round(duration, 3)
                ])

# write CSV
with open(OUT_CSV, "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["audio_path", "transcript", "duration"])
    writer.writerows(rows)

print(f"Manifest saved to: {OUT_CSV}")
print("Total entries:", len(rows))


100%|██████████| 40/40 [00:28<00:00,  1.39it/s]

Manifest saved to: ../data/librispeech/dev_clean_manifest.csv
Total entries: 2703





In [4]:
import pandas as pd

df = pd.read_csv("../data/librispeech/dev_clean_manifest.csv")
df.head()


Unnamed: 0,audio_path,transcript,duration
0,c:/Users/Asus/Desktop/Git-Hub Projects/ASR/dat...,MISTER QUILTER IS THE APOSTLE OF THE MIDDLE CL...,5.855
1,c:/Users/Asus/Desktop/Git-Hub Projects/ASR/dat...,NOR IS MISTER QUILTER'S MANNER LESS INTERESTIN...,4.815
2,c:/Users/Asus/Desktop/Git-Hub Projects/ASR/dat...,HE TELLS US THAT AT THIS FESTIVE SEASON OF THE...,12.485
3,c:/Users/Asus/Desktop/Git-Hub Projects/ASR/dat...,HE HAS GRAVE DOUBTS WHETHER SIR FREDERICK LEIG...,9.9
4,c:/Users/Asus/Desktop/Git-Hub Projects/ASR/dat...,LINNELL'S PICTURES ARE A SORT OF UP GUARDS AND...,29.4
