In [None]:
def character_error_rate(original_text, predicted_text):
    # Tokenize the texts into lists of characters
    original_chars = list(original_text.lower())
    predicted_chars = list(predicted_text.lower())

    # Create sets to calculate insertions, deletions, and substitutions
    original_set = set(original_chars)
    predicted_set = set(predicted_chars)

    # Calculate CER
    total_chars = len(original_chars)
    incorrect_chars = len(original_set.symmetric_difference(predicted_set))

    # Avoid division by zero
    if total_chars == 0:
        return 0.0

    cer = (incorrect_chars / total_chars) * 100

    return cer

# Example usage:
original_text = "This is the original text."
predicted_text = "This is the predicted text."
cer = character_error_rate(original_text, predicted_text)

print(f"Character Error Rate: {cer}%")


In [None]:
def character_error_rate_list(original_texts, predicted_texts):
    # Ensure the input lists have the same length
    if len(original_texts) != len(predicted_texts):
        raise ValueError("Input lists must have the same length.")

    total_errors = 0
    total_chars = 0

    for original_text, predicted_text in zip(original_texts, predicted_texts):
        # Tokenize the texts into lists of characters
        original_chars = list(original_text.lower())
        predicted_chars = list(predicted_text.lower())

        # Calculate the number of insertions, deletions, and substitutions
        errors = sum(1 for char in original_chars if char not in predicted_chars) + \
                 sum(1 for char in predicted_chars if char not in original_chars)

        total_errors += errors
        total_chars += max(len(original_chars), len(predicted_chars))

    # Avoid division by zero
    if total_chars == 0:
        return 0.0

    cer = (total_errors / total_chars) * 100

    return cer

# Example usage:
original_texts = ["This is the original text."]
predicted_texts = ["This is the predicted text."]
average_cer = character_error_rate_list(original_texts, predicted_texts)

print(f"Average Character Error Rate: {average_cer}%")


In [None]:
def levenshtein_distance(original_text, predicted_text):
    m = len(original_text)
    n = len(predicted_text)

    # Initialize a matrix to store distances
    distance_matrix = [[0] * (n + 1) for _ in range(m + 1)]

    # Fill in the matrix
    for i in range(m + 1):
        for j in range(n + 1):
            if i == 0:
                distance_matrix[i][j] = j
            elif j == 0:
                distance_matrix[i][j] = i
            else:
                cost = 0 if original_text[i - 1] == predicted_text[j - 1] else 1
                distance_matrix[i][j] = min(
                    distance_matrix[i - 1][j] + 1,        # Deletion
                    distance_matrix[i][j - 1] + 1,        # Insertion
                    distance_matrix[i - 1][j - 1] + cost  # Substitution
                )

    return distance_matrix[m][n]

def character_error_rate(original_text, predicted_text):
    total_chars = len(original_text)
    distance = levenshtein_distance(original_text, predicted_text)
    
    
    print(f"Levenshtein Distance (Single): {distance}")
    print(f"Total Characters (Single): {total_chars}")
    
    # Avoid division by zero
    if total_chars == 0:
        return 0.0

    cer = (distance / total_chars) * 100

    return cer

def character_error_rate_list(original_texts, predicted_texts):
    # Ensure the input lists have the same length
    if len(original_texts) != len(predicted_texts):
        raise ValueError("Input lists must have the same length.")

    total_errors = 0
    total_chars = 0

    for original_text, predicted_text in zip(original_texts, predicted_texts):
        distance = levenshtein_distance(original_text, predicted_text)
        total_chars += max(len(original_text), len(predicted_text))
        total_errors += distance

    # Avoid division by zero
    if total_chars == 0:
        return 0.0

    cer = (total_errors / total_chars) * 100

    return cer

# Example usage:
original_text = "This is the original text."
predicted_text = "This is the predicted text."

cer_single = character_error_rate(original_text, predicted_text)
print(f"Character Error Rate (Single): {cer_single}%")

original_texts = ["This is the original text."]
predicted_texts = ["This is the predicted text."]

# original_text = ["kitten is on the wall"]
# predicted_text = ["sitting is on the car"]

average_cer = character_error_rate_list(original_texts, predicted_texts)
print(f"Average Character Error Rate: {average_cer}%")


In [None]:
from torchmetrics.text import CharErrorRate
original_text = ["This is the original text."]
predicted_text = ["This is the predicted text."]
cer = CharErrorRate()
cer(predicted_text, original_text)

In [None]:
def word_error_rate(original_text, predicted_text):
    # Tokenize the texts into lists of words
    original_words = original_text.lower().split()
    predicted_words = predicted_text.lower().split()

    # Create sets to calculate insertions, deletions, and substitutions
    original_set = set(original_words)
    predicted_set = set(predicted_words)

    # Calculate WER
    total_words = len(original_words)
    incorrect_words = len(original_set.symmetric_difference(predicted_set))

    # Avoid division by zero
    if total_words == 0:
        return 0.0

    wer = (incorrect_words / total_words) * 100

    return wer

# Example usage:
original_text = "This is the original text."
predicted_text = "This is the predicted text."
wer = word_error_rate(original_text, predicted_text)

print(f"Word Error Rate: {wer}%")


In [None]:
def calculate_wer(original_texts, predicted_texts):
    total_substitutions = 0
    total_deletions = 0
    total_insertions = 0
    total_words = 0

    for original_text, predicted_text in zip(original_texts, predicted_texts):
        ref_words = original_text.split()
        hyp_words = predicted_text.split()

        substitutions = sum(1 for ref, hyp in zip(ref_words, hyp_words) if ref != hyp)
        deletions = len(ref_words) - len(hyp_words)
        insertions = len(hyp_words) - len(ref_words)

        total_substitutions += substitutions
        total_deletions += deletions
        total_insertions += insertions
        total_words += len(ref_words)

    # Avoid division by zero
    if total_words == 0:
        return 0.0

    wer = (total_substitutions + total_deletions + total_insertions) / total_words

    return wer

average_wer = calculate_wer(original_texts, predicted_texts)

print(f"Average Word Error Rate: {average_wer}%")

In [None]:
import numpy as np

def calculate_wer_list(original_texts, predicted_texts):
    total_errors = 0
    total_words = 0

    for original_texts, predicted_texts in zip(original_texts, predicted_texts):
        # Split the reference and hypothesis sentences into words
        ref_words = original_texts.split()
        hyp_words = predicted_texts.split()
        # Initialize a matrix with size |ref_words|+1 x |hyp_words|+1
        # The extra row and column are for the case when one of the strings is empty
        d = np.zeros((len(ref_words) + 1, len(hyp_words) + 1))
        # The number of operations for an empty hypothesis to become the reference
        # is just the number of words in the reference (i.e., deleting all words)
        for i in range(len(ref_words) + 1):
            d[i, 0] = i
        # The number of operations for an empty reference to become the hypothesis
        # is just the number of words in the hypothesis (i.e., inserting all words)
        for j in range(len(hyp_words) + 1):
            d[0, j] = j
        # Iterate over the words in the reference and hypothesis
        for i in range(1, len(ref_words) + 1):
            for j in range(1, len(hyp_words) + 1):
                # If the current words are the same, no operation is needed
                # So we just take the previous minimum number of operations
                if ref_words[i - 1] == hyp_words[j - 1]:
                    d[i, j] = d[i - 1, j - 1]
                else:
                    # If the words are different, we consider three operations:
                    # substitution, insertion, and deletion
                    # And we take the minimum of these three possibilities
                    substitution = d[i - 1, j - 1] + 1
                    insertion = d[i, j - 1] + 1
                    deletion = d[i - 1, j] + 1
                    d[i, j] = min(substitution, insertion, deletion)
        # The minimum number of operations to transform the hypothesis into the reference
        # is in the bottom-right cell of the matrix
        # We add this to the total errors
        total_errors += d[len(ref_words), len(hyp_words)]
        total_words += len(ref_words)

    # Avoid division by zero
    if total_words == 0:
        return 0.0

    # Calculate the average WER
    wer = total_errors / total_words

    return wer

average_wer = calculate_wer(original_texts, predicted_texts)

print(f"Average Word Error Rate: {average_wer}%")

In [None]:
import nltk
from nltk.translate.bleu_score import sentence_bleu

def calculate_bleu(original_text, predicted_text):
    # Tokenize the texts into lists of words
    original_tokens = nltk.word_tokenize(original_text.lower())
    predicted_tokens = nltk.word_tokenize(predicted_text.lower())

    # Calculate BLEU score
    bleu_score = sentence_bleu([original_tokens], predicted_tokens)

    return bleu_score

# Example usage:
original_text = "This is the original text."
predicted_text = "This is the predicted text."
bleu_score = calculate_bleu(original_text, predicted_text)

print(f"BLEU Score: {bleu_score}")

In [None]:
import nltk
from nltk.translate.bleu_score import sentence_bleu

def calculate_bleu_list(original_texts, predicted_texts):
    # Ensure the input lists have the same length
    if len(original_texts) != len(predicted_texts):
        raise ValueError("Input lists must have the same length.")

    total_bleu = 0.0

    for original_text, predicted_text in zip(original_texts, predicted_texts):
        # Tokenize the texts into lists of words
        original_tokens = nltk.word_tokenize(original_text.lower())
        predicted_tokens = nltk.word_tokenize(predicted_text.lower())

        # Calculate BLEU score for the current pair of texts
        bleu_score = sentence_bleu([original_tokens], predicted_tokens)

        total_bleu += bleu_score

    # Calculate the average BLEU score across all pairs
    average_bleu = total_bleu / len(original_texts)

    return average_bleu

# Example usage:
original_texts = ["This is the original text.", "Another example."]
predicted_texts = ["This is the predicted text.", "Different example."]
average_bleu = calculate_bleu_list(original_texts, predicted_texts)

print(f"Average BLEU Score: {average_bleu}")


In [None]:
def jaccard_similarity(original_text, predicted_text):
    # Tokenize the texts into sets of characters
    original_set = set(original_text.lower())
    predicted_set = set(predicted_text.lower())

    # Calculate Jaccard Similarity
    intersection_size = len(original_set.intersection(predicted_set))
    union_size = len(original_set.union(predicted_set))

    # Avoid division by zero
    if union_size == 0:
        return 0.0

    similarity = intersection_size / union_size
    return similarity

# Example usage:
original_text = "This is the original text."
predicted_text = "This is the predicted text."
jaccard_score = jaccard_similarity(original_text, predicted_text)

print(f"Jaccard Similarity: {jaccard_score}")

In [None]:
def levenshtein_distance(original_text, predicted_text):
    m = len(original_text)
    n = len(predicted_text)

    # Initialize a matrix to store distances
    distance_matrix = [[0] * (n + 1) for _ in range(m + 1)]

    # Initialize the first row and column
    for i in range(m + 1):
        distance_matrix[i][0] = i
    for j in range(n + 1):
        distance_matrix[0][j] = j

    # Fill in the matrix
    for i in range(1, m + 1):
        for j in range(1, n + 1):
            cost = 0 if original_text[i - 1] == predicted_text[j - 1] else 1
            distance_matrix[i][j] = min(
                distance_matrix[i - 1][j] + 1,        # Deletion
                distance_matrix[i][j - 1] + 1,        # Insertion
                distance_matrix[i - 1][j - 1] + cost  # Substitution
            )
    
    # Calculate Levenshtein Distance
    levenshtein_distance = distance_matrix[m][n]

    # Calculate the maximum length
    max_length = max(m, n)

    # Calculate error rate percentage
    error_rate_percentage = (levenshtein_distance / max_length) * 100

    # The bottom-right cell contains the Levenshtein Distance
    return levenshtein_distance,error_rate_percentage

# Example usage:
original_text = "kitten is on the wall"
predicted_text = "sitting is on the car"
distance = levenshtein_distance(original_text, predicted_text)

print(f"Levenshtein Distance: {distance}")


In [17]:
import Levenshtein

def text_similarity_evaluation(original_texts, predicted_texts, threshold=0.8):
    tp, fp, fn = 0, 0, 0

    for label, pred in zip(original_texts, predicted_texts):
        similarity_score = 1 - Levenshtein.distance(label, pred) / max(len(label), len(pred))
        print(similarity_score)
        if similarity_score >= threshold:
            tp += 1
        else:
            fp += 1

    fn = len(original_texts) - tp
    
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    f1_score = 2 * (precision * recall) / (precision + recall)

    return precision, recall, f1_score

# Example usage
original_texts = ["Imagine a vast sheet of paper on which straight",
"Lines Triangles Squares Pentagons Hexagons and other",
"figures instead of remaining fixed in their places",
"move freely about on or in the surface but without",
"the power of rising above or sinking below it"]            

predicted_texts = ["Imagine a vast shell of paper on which straight",
"dines Triangle Squares Pentagons Halagous and older",
"figures instead of remaining fixed in their places",
"move freely about on or in the surface but without",
"the power of rising above or sinking below it"]

precision, recall, f1_score = text_similarity_evaluation(original_texts, predicted_texts, threshold=0.8)
print("Precision:", precision)
print("Recall:", recall)
print("F1-Score:", f1_score)

0.9574468085106383
0.8653846153846154
1.0
1.0
1.0
Precision: 1.0
Recall: 1.0
F1-Score: 1.0


In [None]:
def calculate_f1(original_text, predicted_text):
    original_words = set(original_text.lower().split())
    predicted_words = set(predicted_text.lower().split())

    true_positives = len(original_words.intersection(predicted_words))
    false_positives = len(predicted_words - original_words)
    false_negatives = len(original_words - predicted_words)

    precision_denominator = true_positives + false_positives if true_positives + false_positives != 0 else 1
    recall_denominator = true_positives + false_negatives if true_positives + false_negatives != 0 else 1

    precision = true_positives / precision_denominator
    recall = true_positives / recall_denominator

    f1_denominator = precision + recall if precision + recall != 0 else 1
    f1_score = 2 * (precision * recall) / f1_denominator

    return f1_score

# Example usage:
original_text = "This is the original text."
predicted_text = "This is the predicted text."
f1_score = calculate_f1(original_text, predicted_text)

print(f"F1 Score: {f1_score:.2f}")


In [None]:
def calculate_f1_list(original_texts, predicted_texts):
    # Ensure the input lists have the same length
    if len(original_texts) != len(predicted_texts):
        raise ValueError("Input lists must have the same length.")

    total_f1 = 0.0

    for original_text, predicted_text in zip(original_texts, predicted_texts):
        # Tokenize the texts into sets of words
        original_words = set(original_text.lower().split())
        predicted_words = set(predicted_text.lower().split())

        true_positives = len(original_words.intersection(predicted_words))
        false_positives = len(predicted_words - original_words)
        false_negatives = len(original_words - predicted_words)

        precision_denominator = true_positives + false_positives if true_positives + false_positives != 0 else 1
        recall_denominator = true_positives + false_negatives if true_positives + false_negatives != 0 else 1

        precision = true_positives / precision_denominator
        recall = true_positives / recall_denominator

        f1_denominator = precision + recall if precision + recall != 0 else 1
        f1_score = 2 * (precision * recall) / f1_denominator

        total_f1 += f1_score

    # Calculate the average F1 across all pairs
    average_f1 = total_f1 / len(original_texts)

    return average_f1

# Example usage:
original_texts = ["This is the original text.", "Another example."]
predicted_texts = ["This is the predicted text.", "Different example."]
average_f1 = calculate_f1_list(original_texts, predicted_texts)

print(f"Average F1 Score: {average_f1:.2f}")


In [None]:
def calculate_f1_char(original_text, predicted_text):
    original_chars = set(original_text.lower())
    predicted_chars = set(predicted_text.lower())

    true_positives = len(original_chars.intersection(predicted_chars))
    false_positives = len(predicted_chars - original_chars)
    false_negatives = len(original_chars - predicted_chars)

    precision_denominator = true_positives + false_positives if true_positives + false_positives != 0 else 1
    recall_denominator = true_positives + false_negatives if true_positives + false_negatives != 0 else 1

    precision = true_positives / precision_denominator
    recall = true_positives / recall_denominator

    f1_denominator = precision + recall if precision + recall != 0 else 1
    f1_score = 2 * (precision * recall) / f1_denominator

    return f1_score

def calculate_f1_list_char(original_texts, predicted_texts):
    # Ensure the input lists have the same length
    if len(original_texts) != len(predicted_texts):
        raise ValueError("Input lists must have the same length.")

    total_f1 = 0.0

    for original_text, predicted_text in zip(original_texts, predicted_texts):
        # Calculate F1 for the current pair of texts
        f1_score = calculate_f1_char(original_text, predicted_text)
        total_f1 += f1_score

    # Calculate the average F1 across all pairs
    average_f1 = total_f1 / len(original_texts)

    return average_f1

# Example usage:
original_texts = ["This is the original text.", "Another example."]
predicted_texts = ["This is the predicted text.", "Different example."]
average_f1_char = calculate_f1_list_char(original_texts, predicted_texts)

print(f"Average F1 Score (Character): {average_f1_char:.2f}")


In [None]:
def calculate_precision(original_text, predicted_text):
    # Tokenize the texts into sets of words
    original_words = set(original_text.lower().split())
    predicted_words = set(predicted_text.lower().split())

    # Calculate true positives (intersection of sets)
    true_positives = len(original_words.intersection(predicted_words))

    # Calculate false positives
    false_positives = len(predicted_words - original_words)

    # Avoid division by zero
    denominator = true_positives + false_positives if true_positives + false_positives != 0 else 1

    # Calculate precision
    precision = true_positives / denominator

    return precision

# Example usage:
original_text = "This is the original text."
predicted_text = "This is the predicted text."
precision = calculate_precision(original_text, predicted_text)

print(f"Precision: {precision:.2f}")

In [None]:
def calculate_precision_list(original_texts, predicted_texts):
    # Ensure the input lists have the same length
    if len(original_texts) != len(predicted_texts):
        raise ValueError("Input lists must have the same length.")

    total_precision = 0.0

    for original_text, predicted_text in zip(original_texts, predicted_texts):
        # Tokenize the texts into sets of words
        original_words = set(original_text.lower().split())
        predicted_words = set(predicted_text.lower().split())

        # Calculate true positives (intersection of sets)
        true_positives = len(original_words.intersection(predicted_words))

        # Calculate false positives
        false_positives = len(predicted_words - original_words)

        # Avoid division by zero
        denominator = true_positives + false_positives if true_positives + false_positives != 0 else 1

        # Calculate precision for the current pair of texts
        precision = true_positives / denominator
        total_precision += precision

    # Calculate the average precision across all pairs
    average_precision = total_precision / len(original_texts)

    return average_precision

# Example usage:
original_texts = ["This is the original text.", "Another example."]
predicted_texts = ["This is the predicted text.", "Different example."]
average_precision = calculate_precision_list(original_texts, predicted_texts)

print(f"Average Precision: {average_precision:.2f}")


In [None]:
def calculate_precision_char(original_text, predicted_text):
    # Tokenize the texts into sets of characters
    original_chars = set(original_text.lower())
    predicted_chars = set(predicted_text.lower())

    # Calculate true positives (intersection of sets)
    true_positives = len(original_chars.intersection(predicted_chars))

    # Calculate false positives
    false_positives = len(predicted_chars - original_chars)

    # Avoid division by zero
    denominator = true_positives + false_positives if true_positives + false_positives != 0 else 1

    # Calculate precision
    precision = true_positives / denominator

    return precision

def calculate_precision_list_char(original_texts, predicted_texts):
    # Ensure the input lists have the same length
    if len(original_texts) != len(predicted_texts):
        raise ValueError("Input lists must have the same length.")

    total_precision = 0.0

    for original_text, predicted_text in zip(original_texts, predicted_texts):
        # Calculate precision for the current pair of texts
        precision = calculate_precision_char(original_text, predicted_text)
        total_precision += precision

    # Calculate the average precision across all pairs
    average_precision = total_precision / len(original_texts)

    return average_precision

# Example usage:
original_texts = ["This is the original text.", "Another example."]
predicted_texts = ["This is the predicted text.", "Different example."]
average_precision_char = calculate_precision_list_char(original_texts, predicted_texts)

print(f"Average Precision (Character): {average_precision_char:.2f}")


In [None]:
def calculate_recall(original_text, predicted_text):
    # Tokenize the texts into sets of words
    original_words = set(original_text.lower().split())
    predicted_words = set(predicted_text.lower().split())

    # Calculate true positives (intersection of sets)
    true_positives = len(original_words.intersection(predicted_words))

    # Calculate false negatives
    false_negatives = len(original_words - predicted_words)

    # Avoid division by zero
    denominator = true_positives + false_negatives if true_positives + false_negatives != 0 else 1

    # Calculate recall
    recall = true_positives / denominator

    return recall

# Example usage:
original_text = "This is the original text."
predicted_text = "This is the predicted text."
recall = calculate_recall(original_text, predicted_text)

print(f"Recall: {recall:.2f}")


In [None]:
def calculate_recall_list(original_texts, predicted_texts):
    # Ensure the input lists have the same length
    if len(original_texts) != len(predicted_texts):
        raise ValueError("Input lists must have the same length.")

    total_recall = 0.0

    for original_text, predicted_text in zip(original_texts, predicted_texts):
        # Tokenize the texts into sets of words
        original_words = set(original_text.lower().split())
        predicted_words = set(predicted_text.lower().split())

        # Calculate true positives (intersection of sets)
        true_positives = len(original_words.intersection(predicted_words))

        # Calculate false negatives
        false_negatives = len(original_words - predicted_words)

        # Avoid division by zero
        denominator = true_positives + false_negatives if true_positives + false_negatives != 0 else 1

        # Calculate recall for the current pair of texts
        recall = true_positives / denominator
        total_recall += recall

    # Calculate the average recall across all pairs
    average_recall = total_recall / len(original_texts)

    return average_recall

# Example usage:
original_texts = ["This is the original text.", "Another example."]
predicted_texts = ["This is the predicted text.", "Different example."]
average_recall = calculate_recall_list(original_texts, predicted_texts)

print(f"Average Recall: {average_recall:.2f}")


In [None]:
def calculate_recall_char(original_text, predicted_text):
    # Tokenize the texts into sets of characters
    original_chars = set(original_text.lower())
    predicted_chars = set(predicted_text.lower())

    # Calculate true positives (intersection of sets)
    true_positives = len(original_chars.intersection(predicted_chars))

    # Calculate false negatives
    false_negatives = len(original_chars - predicted_chars)

    # Avoid division by zero
    denominator = true_positives + false_negatives if true_positives + false_negatives != 0 else 1

    # Calculate recall
    recall = true_positives / denominator

    return recall

def calculate_recall_list_char(original_texts, predicted_texts):
    # Ensure the input lists have the same length
    if len(original_texts) != len(predicted_texts):
        raise ValueError("Input lists must have the same length.")

    total_recall = 0.0

    for original_text, predicted_text in zip(original_texts, predicted_texts):
        # Calculate recall for the current pair of texts
        recall = calculate_recall_char(original_text, predicted_text)
        total_recall += recall

    # Calculate the average recall across all pairs
    average_recall = total_recall / len(original_texts)

    return average_recall

# Example usage:
original_texts = ["This is the original text.", "Another example."]
predicted_texts = ["This is the predicted text.", "Different example."]
average_recall_char = calculate_recall_list_char(original_texts, predicted_texts)

print(f"Average Recall (Character): {average_recall_char:.2f}")


In [None]:
from evaluate import load
cer = load("cer")
original_text = ["This is the original text."]
predicted_text = ["This is the predicted text."]
cer_score = cer.compute(predictions=predicted_text, references=original_text)
print(cer_score)

In [None]:
from evaluate import load
wer = load("wer")
predictions = ["this is the prediction", "there is an other sample"]
references = ["this is the reference", "there is another one"]
wer_score = wer.compute(predictions=predictions, references=references)
print(wer_score)