In [19]:
import os
import pandas as pd
from pydub import AudioSegment
from faster_whisper import WhisperModel


AUDIO_DIR = '../audios-wav/12-audios-ar-en'
DIARIZATION_OUTPUT_DIR = '../results/Task 2 Results'
GROUND_TRUTH_TRANSCRIPTS_DIR = '../data'
OUTPUT_TRANSCRIPTS_DIR = '../Task 3 Results'


# Hasan Run This only

In [8]:
model_size = "large-v2" 
compute_type = "int8" 

print(f"Loading FasterWhisper model: {model_size} with compute type: {compute_type} for CPU...")
try:
    model = WhisperModel(model_size, device="cpu", compute_type=compute_type)
    print("FasterWhisper model loaded successfully!")
except Exception as e:
    print(f"Error loading FasterWhisper model: {e}")
    print("If you encounter 'out of memory' errors with 'large-v2', try 'medium' or 'small'.")
    print("Please ensure you have enough disk space and a stable internet connection if downloading the model weights.")



Loading FasterWhisper model: large-v2 with compute type: int8 for CPU...


KeyboardInterrupt: 

In [21]:
import glob
import json

# Print resolved paths for debugging
print(f"Resolved AUDIO_DIR: {os.path.abspath(AUDIO_DIR)}")
print(f"Resolved DIARIZATION_OUTPUT_DIR: {os.path.abspath(DIARIZATION_OUTPUT_DIR)}")

# List all audio files in both subfolders of 12-audios-ar-en (recursively)
subfolders = ['6-audios-ar', '6-audios-en']
audio_files = []
for sub in subfolders:
    sub_path = os.path.join(AUDIO_DIR, sub)
    found = glob.glob(os.path.join(sub_path, '**', '*.wav'), recursive=True)
    audio_files.extend(found)

audio_files = sorted(audio_files)

if not audio_files:
    print(f"No WAV audio files found in subfolders of '{AUDIO_DIR}' (recursive search). Please check the path and file extensions.")
else:
    print(f"Found {len(audio_files)} audio files:")
    for af in audio_files:
        print(f"- {os.path.relpath(af, AUDIO_DIR)}")

print("\nAttempting to load central diarization summary from 'pyannote_summary.csv'...")

summary_csv_path = os.path.join(DIARIZATION_OUTPUT_DIR, 'pyannote_summary.csv')

if os.path.exists(summary_csv_path):
    diarization_summary_df = pd.read_csv(summary_csv_path)
    print("Diarization summary 'pyannote_summary.csv' loaded successfully!")
    print(diarization_summary_df.head())

    if 'output_file' in diarization_summary_df.columns and not diarization_summary_df.empty:
        sample_json_path = diarization_summary_df['output_file'].iloc[0]
        # If path is already relative to project root, use as-is
        if sample_json_path.startswith("../results/"):
            full_sample_json_path = os.path.normpath(sample_json_path)
        else:
            full_sample_json_path = os.path.normpath(os.path.join(DIARIZATION_OUTPUT_DIR, sample_json_path))
        print(f"Resolved sample JSON path: {full_sample_json_path}")

        if os.path.exists(full_sample_json_path):
            print(f"\nSuccessfully located a sample JSON file: {full_sample_json_path}")
        else:
            print(f"\nWARNING: Could not find sample JSON file at '{full_sample_json_path}'.")
            print("Please ensure 'output_file' paths in pyannote_summary.csv are correct relative to the project root or DIARIZATION_OUTPUT_DIR.")
    else:
        print("\nWARNING: 'output_file' column not found or summary CSV is empty.")

else:
    print(f"ERROR: 'pyannote_summary.csv' not found at '{summary_csv_path}'.")
    print("Please ensure your Task 2 output structure matches this expectation.")

Resolved AUDIO_DIR: /Users/s.n.h/Voice-AI/Audio-AI/audios-wav/12-audios-ar-en
Resolved DIARIZATION_OUTPUT_DIR: /Users/s.n.h/Voice-AI/Audio-AI/results/Task 2 Results
Found 12 audio files:
- 6-audios-ar/1_speaker_ar/solo10_ar.wav
- 6-audios-ar/1_speaker_ar/solo3_ar.wav
- 6-audios-ar/2_speakers_ar/two_speakers10_ar.wav
- 6-audios-ar/2_speakers_ar/two_speakers7_ar.wav
- 6-audios-ar/3_speakers_ar/three_speakers1_ar.wav
- 6-audios-ar/3_speakers_ar/three_speakers5_ar.wav
- 6-audios-en/1_speaker_en/solo2_en.wav
- 6-audios-en/1_speaker_en/solo3_en.wav
- 6-audios-en/2_speakers_en/two_speakers7_en.wav
- 6-audios-en/2_speakers_en/two_speakers8_en.wav
- 6-audios-en/3_speakers_en/three_speakers2_en.wav
- 6-audios-en/3_speakers_en/three_speakers8_en.wav

Attempting to load central diarization summary from 'pyannote_summary.csv'...
Diarization summary 'pyannote_summary.csv' loaded successfully!
                                               audio  n_segments  runtime_sec  \
0  ../audios-wav/12-audios-

In [22]:
os.makedirs(OUTPUT_TRANSCRIPTS_DIR, exist_ok=True)
print(f"Ensured output directory for transcripts exists: {os.path.abspath(OUTPUT_TRANSCRIPTS_DIR)}")

# Iterate through each audio file found
for audio_path in audio_files:
    audio_filename = os.path.basename(audio_path)
    print(f"\nProcessing audio file: {audio_filename}")

    # Define output path for the transcript
    transcript_output_path = os.path.join(OUTPUT_TRANSCRIPTS_DIR, f"{os.path.splitext(audio_filename)[0]}.txt")

    try:
        # Transcribe the audio file
        segments, info = model.transcribe(audio_path, beam_size=5)

        # Prepare text for saving
        full_transcript = []
        for segment in segments:
            full_transcript.append(segment.text)

        # Join segments and save to a text file
        with open(transcript_output_path, 'w', encoding='utf-8') as f:
            f.write("\n".join(full_transcript))

        print(f"Transcription for '{audio_filename}' completed and saved to: {transcript_output_path}")

    except Exception as e:
        print(f"Error transcribing '{audio_filename}': {e}")

print("\nTranscription process complete for all found audio files.")


Ensured output directory for transcripts exists: /Users/s.n.h/Voice-AI/Audio-AI/Task 3 Results

Processing audio file: solo10_ar.wav
Error transcribing 'solo10_ar.wav': name 'model' is not defined

Processing audio file: solo3_ar.wav
Error transcribing 'solo3_ar.wav': name 'model' is not defined

Processing audio file: two_speakers10_ar.wav
Error transcribing 'two_speakers10_ar.wav': name 'model' is not defined

Processing audio file: two_speakers7_ar.wav
Error transcribing 'two_speakers7_ar.wav': name 'model' is not defined

Processing audio file: three_speakers1_ar.wav
Error transcribing 'three_speakers1_ar.wav': name 'model' is not defined

Processing audio file: three_speakers5_ar.wav
Error transcribing 'three_speakers5_ar.wav': name 'model' is not defined

Processing audio file: solo2_en.wav
Error transcribing 'solo2_en.wav': name 'model' is not defined

Processing audio file: solo3_en.wav
Error transcribing 'solo3_en.wav': name 'model' is not defined

Processing audio file: two_s