In [1]:
import json

def convert_nupe_dictionary(txt_file_path, json_file_path):
    """
    Converts a Nupe dictionary text file into JSON format, handling multi-line entries properly.
    
    :param txt_file_path: Path to the input .txt file
    :param json_file_path: Path to save the output .json file
    """
    with open(txt_file_path, "r", encoding="utf-8") as file:
        lines = file.readlines()

    entries = []
    current_entry = None  # To track multi-line Additional Info

    for line in lines:
        # Remove extra spaces and newlines
        line = line.strip()
        
        # Skip headers and empty lines
        if not line or "Nupe  PoS  C  English" in line:
            continue

        # Split line based on multiple spaces
        parts = [p.strip() for p in line.split("  ") if p.strip()]

        if len(parts) >= 3:  # New dictionary entry found
            if current_entry:  
                entries.append(current_entry)  # Save previous entry before starting a new one

            current_entry = {
                "Nupe": parts[0],
                "PoS": parts[1],
                "English": parts[2],
                "Additional Info": " ".join(parts[3:]) if len(parts) > 3 else ""
            }

        elif current_entry:  
            # Append multi-line additional info to the last entry
            current_entry["Additional Info"] += " " + line  

    # Save the last entry if it exists
    if current_entry:
        entries.append(current_entry)

    # Convert to JSON and save to file
    with open(json_file_path, "w", encoding="utf-8") as json_file:
        json.dump(entries, json_file, indent=4, ensure_ascii=False)

    print(f"Conversion complete! JSON saved at: {json_file_path}")

# Example usage
txt_file_path = "data/nupe_dictionary_raw.txt"  # Change to your file path
json_file_path = "data/nupe_dictionary.json"  # Output JSON file name
convert_nupe_dictionary(txt_file_path, json_file_path)

Conversion complete! JSON saved at: data/nupe_dictionary.json
