In [19]:
import re
from metaphone import doublemetaphone

def load_dictionary(file_path):
  dictionary = {}
  with open(file_path, 'r') as f:
    for line in f:
      word = line.strip().lower()
      phonetic = doublemetaphone(word)[0]
      dictionary[word] = {
          "original" : line.strip(),
          "phonetic" : phonetic
      }
  return dictionary

def load_errors(file_path):
  errors = []
  with open(file_path, 'r') as f:
    for line in f:
      word = line.strip()
      if word:
        errors.append(word)
  return errors

In [20]:
from fuzzywuzzy import fuzz

def get_best_correction(word, dictionary):
  word_phonetic = doublemetaphone(word)[0]
  best_match = None
  highest_score = 0

  for ref_word, ref_data in dictionary.items():

    phonetic_score = 100 if word_phonetic == ref_data["phonetic"] else 0

    edit_score = fuzz.ratio(word, ref_word)

    final_score = 0.6 * phonetic_score + 0.4 * edit_score

    if final_score > highest_score:
      highest_score = final_score
      best_match = ref_data["original"]

  return best_match


In [21]:
def correct_words(error_words, dictionary):
  corrections = []
  for word in error_words:
    corrected = get_best_correction(word, dictionary)
    corrections.append((word, corrected))
  return corrections

In [22]:
def write_output(corrections, output_file = "corrected_output.txt"):
  with open(output_file, 'w', encoding='utf-8') as f:
        f.write("File_Error\tCorrected\n")
        for error, corrected in corrections:
            if corrected:
                f.write(f"{error}\t{corrected}\n")
            else:
                f.write(f"{error}\t<NO MATCH>\n")


if __name__ == "__main__":
    reference_dict = load_dictionary('reference.txt')
    error_words = load_errors('errors.txt')
    corrections = correct_words(error_words, reference_dict)
    write_output(corrections)
    print("✅ Correction completed. Output saved to 'corrected_output.txt'.")

✅ Correction completed. Output saved to 'corrected_output.txt'.
