In [None]:
import pandas as pd
from metaphone import doublemetaphone

In [None]:
# Define file paths
input_file_path = " "
output_file_path = " "

In [None]:
# Read JSON file with error handling
try:
    data = pd.read_json(input_file_path)
    required_columns = {'original', 'transformed'}

    if not required_columns.issubset(data.columns):
        raise ValueError(f"Missing required columns: {required_columns - set(data.columns)}")

except Exception as e:
    print(f"Error reading input file: {e}")
    exit()

In [None]:
def is_similar_pronunciation(word1, word2):
    """
    Determine if two words have similar pronunciation using the Double Metaphone algorithm.

    Args:
        word1 (str): The first word to compare.
        word2 (str): The second word to compare.

    Returns:
        bool: True if the words have similar pronunciation, False otherwise.

    Explanation:
        - Uses the Double Metaphone algorithm to generate primary and secondary phonetic codes for each word.
        - Considers the words similar if any of their primary or secondary codes match.
    """
    primary1, secondary1 = doublemetaphone(word1)
    primary2, secondary2 = doublemetaphone(word2)
    # Consider similarity if primary or secondary values match
    return primary1 == primary2 or primary1 == secondary2 or secondary1 == primary2 or secondary1 == secondary2

In [None]:
# Add similarity result
data['Pronunciation_Similar'] = data.apply(
    lambda row: is_similar_pronunciation(str(row['original']), str(row['transformed'])), axis=1
)

In [None]:
# Filter only similar pronunciation data
filtered_data = data[data['Pronunciation_Similar']]

# Save to JSON file if data exists
if not filtered_data.empty:
    filtered_data.to_json(output_file_path, orient='records', lines=True)
    print(f"Results saved to {output_file_path}.")
else:
    print("No pronunciation matches found. Output file was not created.")