### Evaluation of transcriptions

- 5 random transcriptions chosen
- Correct lyrics researched online
- Lyrics combined in pairs
- Metrics calculated for each pair comparison:
    - Word Error Rate
    - Rouge Score
    - Cosine Similarity

In [3]:
from rouge_score import rouge_scorer
import jiwer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
# Initialise lyric pairs
lyric_pairs = [
    ("transcribed lyrics 1", "reference lyrics 1"),
    ("transcribed lyrics 2", "reference lyrics 2"),
    ("transcribed lyrics 3", "reference lyrics 3"),
    ("transcribed lyrics 4", "reference lyrics 4"),
    ("transcribed lyrics 5", "reference lyrics 5")
]

# Function for retrieving the text from the .txt files
def read_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return file.read()

# Define file paths for the transcribed and reference lyrics
transcribed_files = [r'..\transcriptions\favourites\03.txt', r'..\transcriptions\favourites\04.txt', r'..\transcriptions\recommend\007.txt', r'..\transcriptions\recommend\012.txt', r'..\transcriptions\Doomsday.txt']
reference_files = [r'..\transcriptions\favourites\03 reference.txt', r'..\transcriptions\favourites\04 reference.txt', r'..\transcriptions\recommend\007 reference.txt', r'..\transcriptions\recommend\012 reference.txt', r'..\transcriptions\Doomsday reference.txt']

# Add lyric pairs to the list
lyric_pairs = []

for t_file, r_file in zip(transcribed_files, reference_files):
    transcribed_lyrics = read_file(t_file)
    reference_lyrics = read_file(r_file)
    lyric_pairs.append((transcribed_lyrics, reference_lyrics))


In [None]:
# Calculate Word Error Rate (WER) for each pair (lower is better)

# Iterate over each pair of lyrics and calculate WER scores
for i, (transcribed, reference) in enumerate(lyric_pairs, start=1):
    wer = jiwer.wer(reference, transcribed)

    print(f"Pair {i}:")
    print(f"  WER: {wer:.2f}")

Pair 1:
  WER: 0.86
Pair 2:
  WER: 0.88
Pair 3:
  WER: 0.36
Pair 4:
  WER: 0.52
Pair 5:
  WER: 0.30


In [None]:
# Calculate Rouge Score for each pair (higher is better)

# Rouge L = convert lyrics into lowercase, remove punctuation 

# Initialize the scorer for Rouge
rouge_scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)

# Iterate over each pair of lyrics and calculate Rouge scores
for i, (transcribed, reference) in enumerate(lyric_pairs, start=1):
    rouge_scores = rouge_scorer.score(transcribed, reference)

    print(f"Pair {i}:")
    print(f"  ROUGE-1: {rouge_scores['rouge1'].fmeasure:.2f}, ROUGE-L: {rouge_scores['rougeL'].fmeasure:.2f}")

Pair 1:
  ROUGE-1: 0.63, ROUGE-L: 0.58
Pair 2:
  ROUGE-1: 0.69, ROUGE-L: 0.61
Pair 3:
  ROUGE-1: 0.88, ROUGE-L: 0.86
Pair 4:
  ROUGE-1: 0.84, ROUGE-L: 0.82
Pair 5:
  ROUGE-1: 0.90, ROUGE-L: 0.90


In [None]:
# Calculate cosine similarity for each pair (higher is better)

# Iterate over each pair of lyrics and calculate cosine similarity scores
for i, (transcribed, reference) in enumerate(lyric_pairs, start=1):
    vectorizer = TfidfVectorizer().fit_transform([transcribed, reference])
    vectors = vectorizer.toarray()
    cosine_sim = cosine_similarity([vectors[0]], [vectors[1]])[0][0]

    print(f"Pair {i}:")
    print(f"  Cosine Similarity: {cosine_sim:.2f}\n")

Pair 1:
  Cosine Similarity: 0.61

Pair 2:
  Cosine Similarity: 0.66

Pair 3:
  Cosine Similarity: 0.91

Pair 4:
  Cosine Similarity: 0.91

Pair 5:
  Cosine Similarity: 0.98

