

>Evaluating the performance of models using:



In [None]:
!pip install jiwer

In [None]:
def load_transcriptions(filename):
    # Check if the file exists
    if not os.path.exists(filename):
        # If the file does not exist, return an empty dictionary
        return {}
    with open(filename, 'r') as f:
        return json.load(f)

# Load Wav2Vec2 transcriptions
wav2vec2bert_transcriptions = load_transcriptions("/content/drive/MyDrive/STT/wav2vec2bert_transcriptions20240828-095455.json")

In [None]:
print(wav2vec2bert_transcriptions)

In [None]:
transcriptions_dict = {

    "Wav2Vec2-BERT": wav2vec2bert_transcriptions
}

In [None]:
print(transcriptions_dict)

Accuracy Metrics: add the validation data

In [None]:
from datasets import load_metric
import pandas as pd
from google.colab import files

# Load the TSV file with reference transcriptions
uploaded = files.upload()
file_paths = list(uploaded.keys())
data = pd.read_csv(file_paths[0], delimiter='\t')
reference_transcriptions = dict(zip(data['path'], data['sentence']))

In [None]:
print(reference_transcriptions)

In [None]:
print(set(transcriptions_dict['Wav2Vec2-BERT'].keys()))
print(set(reference_transcriptions.keys()))


In [None]:
print(transcriptions_dict)

In [None]:
# Initialize metrics
wer_metric = load_metric("wer")
bleu_metric = load_metric("bleu")
cer_metric = load_metric("cer")

In [None]:
def evaluate_transcriptions(transcriptions, reference_transcriptions):
    wer_scores, bleu_scores, cer_scores = {}, {}, {}
    for sentence_id, predicted_transcription in transcriptions.items():
        reference_transcription = reference_transcriptions.get(sentence_id)
        if reference_transcription:
          print(f"Evaluating: {sentence_id}")
          print(f"Predicted: {predicted_transcription}")
          print(f"Reference: {reference_transcription}")
          wer = wer_metric.compute(predictions=[predicted_transcription], references=[reference_transcription])
          bleu = bleu_metric.compute(predictions=[predicted_transcription.split()], references=[[reference_transcription.split()]])
          cer = cer_metric.compute(predictions=[predicted_transcription], references=[reference_transcription])

          wer_scores[sentence_id] = wer
          bleu_scores[sentence_id] = bleu['bleu']
          cer_scores[sentence_id] = cer
    return wer_scores, bleu_scores, cer_scores

In [None]:
# Evaluate all models
results = {}

for model_name, transcriptions in transcriptions_dict.items():
  print(f"Model: {model_name}, Transcriptions: {transcriptions}")
  wer_scores, bleu_scores, cer_scores = evaluate_transcriptions(transcriptions, reference_transcriptions)
  results[model_name] = {
      "WER": wer_scores,
      "BLEU": bleu_scores,
      "CER": cer_scores
  }

In [None]:
print(f"Reference Transcriptions Keys: {set(reference_transcriptions.keys())}")


In [None]:
transcription_ids = set(transcriptions_dict['Wav2Vec2-BERT'].keys())
reference_ids = set(reference_transcriptions.keys())

print(f"Transcription IDs: {transcription_ids}")
print(f"Reference IDs: {reference_ids}")
print(f"Common IDs: {transcription_ids.intersection(reference_ids)}")

In [None]:
import pandas as pd

def create_comparison_table(results):
  records = []
  for model_name, metrics in results.items():
    for sentence_id in metrics["WER"].keys():
      records.append({
          "Model": model_name,
          "Sentence ID": sentence_id,
          "WER": metrics["WER"].get(sentence_id, None),
          "BLEU": metrics["BLEU"].get(sentence_id, None),
          "CER": metrics["CER"].get(sentence_id, None),
        })
    df = pd.DataFrame(records)
  return df

# Create and display the comparison table
print(results)
comparison_df = create_comparison_table(results)
print(comparison_df)

# Optionally, save the comparison to a CSV file
comparison_df.to_csv('model_comparison.csv', index=False)
