In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Step 2: Set Working Directory
import os
BASE_DIR = '/content/drive/MyDrive/speech_understanding_project'
RESULTS_DIR = os.path.join(BASE_DIR, 'results')
os.makedirs(RESULTS_DIR, exist_ok=True)

PRED_FILE = os.path.join(RESULTS_DIR, 'preds.txt')
REF_FILE = os.path.join(RESULTS_DIR, 'refs.txt')

In [None]:
# Step 3: Install Required Packages
!pip install sacrebleu jiwer

In [None]:
import sacrebleu
from jiwer import wer


In [None]:
# Step 4: Load Predictions and References
# Format: One sentence per line in each file

def load_lines(path):
    with open(path, 'r', encoding='utf-8') as f:
        return [line.strip() for line in f.readlines()]

predictions = load_lines(PRED_FILE)
references = load_lines(REF_FILE)

assert len(predictions) == len(references), "Mismatch in number of predictions and references"
print(f"Loaded {len(predictions)} prediction-reference pairs.")


In [None]:
# 📍 Step 5: BLEU Score
bleu = sacrebleu.corpus_bleu(predictions, [references])
print(f" BLEU Score: {bleu.score:.2f}")


In [None]:
# 📍 Step 6: CHRF Score
chrf = sacrebleu.corpus_chrf(predictions, [references])
print(f"CHRF Score: {chrf.score:.2f}")


In [None]:
# 📍 Step 7: Word Error Rate (WER)
total_wer = sum([wer(r, p) for r, p in zip(references, predictions)]) / len(predictions)
print(f"Word Error Rate (WER): {total_wer:.3f}")


In [None]:
# Step 8: Phoneme Error Rate (PER)

def phoneme_error_rate(refs, preds):
    import editdistance
    total_dist = 0
    total_len = 0
    for ref, pred in zip(refs, preds):
        ref_seq = ref.strip().split()
        pred_seq = pred.strip().split()
        total_dist += editdistance.eval(ref_seq, pred_seq)
        total_len += len(ref_seq)
    return total_dist / total_len if total_len > 0 else 0


per = phoneme_error_rate(refs=phoneme_refs, preds=phoneme_preds)
print(f"Phoneme Error Rate (PER): {per:.3f}")


In [None]:
# Step 9: Save Evaluation Report
with open(os.path.join(RESULTS_DIR, "evaluation_report.txt"), "w") as f:
    f.write(f"BLEU: {bleu.score:.2f}\n")
    f.write(f"CHRF: {chrf.score:.2f}\n")
    f.write(f"WER: {total_wer:.3f}\n")
