In [10]:
import os
import pandas as pd
import torch
import whisper
from tqdm import tqdm

# Set the path to your FFmpeg executable
os.environ["WHISPER_FFMPEG"] = "C:/ffmpeg/bin/ffmpeg.exe"

# Use CPU as the device
device = "cpu"

# models to use
"""
tiny    39 M   tiny.en    tiny   ~1 GB  ~32x
base    74 M   base.en    base   ~1 GB  ~16x
small   244 M  small.en   small  ~2 GB  ~6x
medium  769 M  medium.en  medium ~5 GB  ~2x
"""
print(f"Using device: {device}")
model = whisper.load_model("small.en", device=device)

# Directory containing the .mp3 files
audio_dir = "audio test"

# List to store the transcription data
transcriptions = []

# Iterate over the files in the directory
for file_name in tqdm(os.listdir(audio_dir), desc="processing mp3 files"):
    if file_name.endswith('.mp3'):
        # Extract the id from the file name (e.g., "1.mp3" -> 1)
        file_id = int(os.path.splitext(file_name)[0])
        
        # Path to the audio file
        file_path = os.path.join(audio_dir, file_name)
        
        # Transcribe the audio file
        result = model.transcribe(file_path)
        
        # Get the transcription text
        transcription_text = result['text']
        
        # Append the transcription to the list
        transcriptions.append({"id": file_id, "transcription": transcription_text})

# Create a DataFrame from the transcriptions list
df = pd.DataFrame(transcriptions, columns=["id", "transcription"])

# Save the DataFrame to a CSV file
df.to_csv("transcriptions.csv", index=False)

print(df)

Using device: cpu


processing mp3 files: 100%|██████████████████████| 4/4 [25:10<00:00, 377.59s/it]

   id                                      transcription
0   1   The story of two toys. There seems to be no s...
1   2   I'm glad you decided to buy this place. I'm s...
2   3   All right. Pat. Crankier. My door was as ugly...
3   4   The one man I love is marrying God a kid. Are...



