In [3]:
! apt-get update && apt-get install -y libsndfile1 ffmpeg
! pip install Cython
! pip install packaging
! pip -q install nemo_toolkit['asr'] Cython packaging
! pip install pandas torch

In [4]:
import os
import torch
import pandas as pd
from nemo.collections.asr.models import EncDecCTCModel

In [5]:
model_path = "best_model_checkpoint/quartznet_15x5_final.nemo"


# To Load the model
quartznet = EncDecCTCModel.restore_from(model_path)

if torch.cuda.is_available():
    print('Cuda')
    quartznet.to('cuda')

In [6]:
def transcribe_audio_files(folder_path, output_csv):
    # List to store file paths
    file_paths = []

    # Loop through all files in the specified directory
    for filename in os.listdir(folder_path):
        if filename.endswith(".wav"):  # Check if the file is a WAV file
            file_path = os.path.join(folder_path, filename)
            file_paths.append(file_path)

    # Transcribe all audio files at once
    with torch.no_grad():
        transcriptions = quartznet.transcribe(file_paths)

    # List to store transcription results
    results = []

    # Loop through transcriptions and file paths
    for filename, transcription in zip(os.listdir(folder_path), transcriptions):
        if filename.endswith(".wav"):  # Check if the file is a WAV file
            # Remove the .wav extension from the filename
            audio_name = filename.replace('.wav', '')
            results.append({
                'audio': audio_name,
                'transcript': transcription
            })

    # Convert results to a DataFrame and save to CSV
    df = pd.DataFrame(results)
    df.to_csv(output_csv, index=False)

In [None]:
folder_path = "path/to/audio_folder"
output_csv = "transcriptions.csv"
transcribe_audio_files(folder_path, output_csv)