In [2]:
import os
from jiwer import wer

speakers = ['F01', 'F03', 'F04', 'M01', 'M02', 'M03', 'M04', 'M05']

# Iterate over each speaker
for speaker_id in speakers:
    # Assume recognized_texts and ground_truth_texts were already defined earlier
    recognized_texts = []
    ground_truth_texts = []

    # id = "wav2vec2"
    id="wav2vec2-TORGO-original"

    # Ensure the directory exists
    output_dir = f'runs/{id}'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Read recognized texts from file
    recognized_texts_path = f'{output_dir}/{id}_{speaker_id}_recognized_texts.txt'
    with open(recognized_texts_path, 'r') as f:
        recognized_texts = f.readlines()

    # Read ground truth texts from file
    ground_truth_texts_path = f'{output_dir}/{id}_{speaker_id}_ground_truth_texts.txt'
    with open(ground_truth_texts_path, 'r') as f:
        ground_truth_texts = f.readlines()

    # Remove trailing newline characters from the lists
    recognized_texts = [text.strip() for text in recognized_texts]
    ground_truth_texts = [text.strip() for text in ground_truth_texts]

    # Initialize empty lists
    recognized_words = []
    recognized_sentences = []
    ground_truth_words = []
    ground_truth_sentences = []

    # Iterate through recognized_texts and ground_truth_texts simultaneously
    for recognized_text, ground_truth_text in zip(recognized_texts, ground_truth_texts):
        if len(ground_truth_text.split()) == 1:
            recognized_words.append(recognized_text.strip())
            ground_truth_words.append(ground_truth_text.strip())
        else:
            recognized_sentences.append(recognized_text.strip())
            ground_truth_sentences.append(ground_truth_text.strip())

    # Calculate WER for word-level and sentence-level cases
    word_wer = wer(ground_truth_words, recognized_words)
    sentence_wer = wer(ground_truth_sentences, recognized_sentences)

    print(f"WER for speaker {speaker_id} using {id}:")
    print(f"WER for word-level data: {word_wer}")
    print(f"WER for sentence-level data: {sentence_wer}")

    # Save recognized_words and ground_truth_words to a file
    words_file_path = f'{output_dir}/{id}_{speaker_id}_words.txt'
    with open(words_file_path, 'w') as f:
        for rw, gw in zip(recognized_words, ground_truth_words):
            f.write(f"Recognized: {rw}\n")
            f.write(f"Ground Truth: {gw}\n\n")

    # Save recognized_sentences and ground_truth_sentences to a file
    sentences_file_path = f'{output_dir}/{id}_{speaker_id}_sentences.txt'
    with open(sentences_file_path, 'w') as f:
        for rs, gs in zip(recognized_sentences, ground_truth_sentences):
            f.write(f"Recognized: {rs}\n")
            f.write(f"Ground Truth: {gs}\n\n")

    # Output the saved paths and lengths
    print(f"Saved recognized words and ground truth words to: {words_file_path}")
    print(f"Length of recognized words: {len(recognized_words)}")
    print(f"Length of ground truth words: {len(ground_truth_words)}")

    print(f"Saved recognized sentences and ground truth sentences to: {sentences_file_path}")
    print(f"Length of recognized sentences: {len(recognized_sentences)}")
    print(f"Length of ground truth sentences: {len(ground_truth_sentences)}")


WER for speaker F01 using wav2vec2-TORGO-original:
WER for word-level data: 0.9574468085106383
WER for sentence-level data: 0.6820652173913043
Saved recognized words and ground truth words to: runs/wav2vec2-TORGO-original/wav2vec2-TORGO-original_F01_words.txt
Length of recognized words: 188
Length of ground truth words: 188
Saved recognized sentences and ground truth sentences to: runs/wav2vec2-TORGO-original/wav2vec2-TORGO-original_F01_sentences.txt
Length of recognized sentences: 40
Length of ground truth sentences: 40
WER for speaker F03 using wav2vec2-TORGO-original:
WER for word-level data: 0.8295739348370927
WER for sentence-level data: 0.2967644084934277
Saved recognized words and ground truth words to: runs/wav2vec2-TORGO-original/wav2vec2-TORGO-original_F03_words.txt
Length of recognized words: 798
Length of ground truth words: 798
Saved recognized sentences and ground truth sentences to: runs/wav2vec2-TORGO-original/wav2vec2-TORGO-original_F03_sentences.txt
Length of recogniz