In [None]:
# %%
!pip install transformers pandas gdown

# %%
import os
import gdown
import pandas as pd
from transformers import pipeline

# Download prompts CSV
PROMPTS_URL = 'https://drive.google.com/uc?id=18kFuhe47eKXJpKrESkPdGzisNtGnBp0q'
gdown.download(PROMPTS_URL, 'prompts_asr.csv', quiet=False)

# Unzip uploaded audio archive into audio_files/
os.makedirs('audio_files', exist_ok=True)
!unzip -q private-test-data-asr.zip -d audio_files

# Read prompt paths
df = pd.read_csv('prompts_asr.csv')

# %%
# Load Whisper ASR pipeline
device = 0 if os.environ.get('CUDA_VISIBLE_DEVICES') else -1
asr = pipeline(
    task='automatic-speech-recognition',
    model='vinai/PhoWhisper-small',
    device=device
)

In [None]:
sample_df = df.sample(n=10, random_state=42)

output_path = 'sample_transcripts.txt'
with open(output_path, 'w') as fout:
    for _, row in sample_df.iterrows():
        audio_file = os.path.join('audio_files/private-test-data-asr', row['path'])
        result = asr(audio_file)
        text = result['text'].lower().strip().rstrip('.')  # strip final period if present
        fout.write(f"{row['path']} | {text}\n")

print(f"✅ Sample transcription complete. Results saved to {output_path}")

In [None]:
# %%
# Transcribe and write to transcripts.txt
output_path = 'transcripts.txt'
with open(output_path, 'w') as fout:
    for _, row in df.iterrows():
        audio_file = os.path.join('audio_files/private-test-data-asr', row['path'])
        result = asr(audio_file)
        text = result['text'].lower().strip()
        fout.write(text + '\n')

print(f"✅ Transcription complete. Results saved to {output_path}")