# Function for GLEU

In [1]:
import csv
import nltk.translate.gleu_score as gleu

# Function to calculate GLEU score
def calculate_gleu(original, corrected, reference):
    original_tokens = original.split()
    corrected_tokens = corrected.split()
    reference_tokens = [reference.split()]

    gleu_score = gleu.corpus_gleu([reference_tokens], [corrected_tokens])
    return gleu_score

# Function to get the gleu score from the given csv with format:
- $1^{st}$ colum (Input Sentence) : sentence passed for correction with potential errors
- $2^{nd}$ column (Ground Truth): The correct correction of the corresponding input sentence
- $3^{rd}$ column (Generated sentence): Model corrected generated sentence

In [2]:
# Read CSV file
def gleu_from_csv(csv_file: str) -> float:
  """
  # Function to get the gleu score from the given csv with format:
- $1^{st}$ colum (Input Sentence) : sentence passed for correction with potential errors
- $2^{nd}$ column (Ground Truth): The correct correction of the corresponding input sentence
- $3^{rd} column (Generated sentence): Model corrected generated sentence

  Parameters
  -----------
  csv_file: the path to the csv file

  Returns
  -----------
  gleu_score: Average gleu score
  """

  original_sentences = []
  ground_truth_corrections = []
  model_corrected_sentences = []

  with open(csv_file, newline='', encoding='utf-8') as csvfile:
      csv_reader = csv.reader(csvfile)
      next(csv_reader)  # Skip header if exists
      for row in csv_reader:
          original_sentences.append(row[0])
          ground_truth_corrections.append(row[1])
          model_corrected_sentences.append(row[2])

  # Calculate GLEU scores
  total_gleu_score = 0
  num_sentences = len(original_sentences)

  for i in range(num_sentences):
      gleu_score = calculate_gleu(original_sentences[i], model_corrected_sentences[i], ground_truth_corrections[i])
      total_gleu_score += gleu_score

  average_gleu_score = total_gleu_score / num_sentences
  return average_gleu_score

In [3]:
print("Average GLEU score for norvig spell checking:", gleu_from_csv("norvig_pred.csv"))

Average GLEU score for norvig spell checking: 0.20518247006575785


# Precision

In [7]:
def calculate_metrics(ground_truth, input_sentence, generated_sentence):
    ground_truth = ground_truth.tolist()
    input_sentence = input_sentence.tolist()
    generated_sentence = generated_sentence.tolist()

    # Identify wrong words in input sentence and ground truth
    wrong_words = [(gt_word, input_word) for gt_word, input_word in zip(ground_truth, input_sentence) if gt_word != input_word]

    # Count the number of wrong words corrected by the model
    corrected_by_model = sum(1 for gt_word, input_word in wrong_words if generated_sentence[input_sentence.index(input_word)] == gt_word)

    # Calculate metrics
    total_wrong_words = len(wrong_words)
    precision = corrected_by_model / total_wrong_words if total_wrong_words > 0 else 0
    recall = corrected_by_model / len(ground_truth)
    f1_score = 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0

    return {
        "Total Wrong Words": total_wrong_words,
        "Corrected by Model": corrected_by_model,
        "Precision": precision,
        "Recall": recall,
        "F1 Score": f1_score
    }

In [8]:
import pandas as pd
df = pd.read_csv("norvig_pred.csv")

ground_truth = df["Grount Truth"]
input_sentence = df["Input Sentence"]
generated_sentence = df["Generated Sentence"]

metrics = calculate_metrics(ground_truth, input_sentence, generated_sentence)
print(metrics)

{'Total Wrong Words': 5372, 'Corrected by Model': 0, 'Precision': 0.0, 'Recall': 0.0, 'F1 Score': 0}
