# Imports

In [31]:
import json
from pathlib import Path

# Helpers

In [32]:
def load_json_or_init(path: Path):
    """
    Load the JSON file if it exists.
    If not, initialize a new structure with empty tags.
    """
    if path.exists():
        with path.open("r", encoding="utf-8") as f:
            return json.load(f), False
    else:
        return {
            "term_tag": "",
            "defn_tag": "",
            "data": []
        }, True


def load_txt_or_init(path: Path):
    """
    Load Quizlet txt if it exists.
    Returns a dict keyed by term.
    """
    terms = {}
    if path.exists():
        with path.open("r", encoding="utf-8") as f:
            for line in f:
                if not line.strip():
                    continue
                term, rest = line.rstrip("\n").split("\t", 1)
                parts = rest.split(" ", 1)
                defn = parts[0]
                comment = parts[1] if len(parts) > 1 else ""
                terms[term] = {
                    "term": term,
                    "defn": defn,
                    "comment": comment.replace(", ", "\n"),
                    "printed": False
                }
    return terms

def normalize_terms(input_json_path, input_txt_path):
    json_path = Path(input_json_path)
    txt_path = Path(input_txt_path)

    json_data, json_created = load_json_or_init(json_path)
    txt_terms = load_txt_or_init(txt_path)

    json_terms = {item["term"]: item for item in json_data["data"]}

    # Union of all terms
    all_terms = sorted(set(json_terms) | set(txt_terms))

    normalized = []

    for term in all_terms:
        if term in json_terms:
            normalized.append(json_terms[term])
        elif term in txt_terms:
            normalized.append(txt_terms[term])
        else:
            # Should never happen, but be explicit
            normalized.append({
                "term": term,
                "defn": "",
                "comment": "",
                "printed": False
            })

    # Write JSON (preserve tags if file existed)
    json_data["data"] = normalized
    with json_path.open("w", encoding="utf-8") as f:
        json.dump(json_data, f, ensure_ascii=False, indent=2)

    # Write Quizlet TXT
    with txt_path.open("w", encoding="utf-8") as f:
        for item in normalized:
            comment = item.get("comment", "").replace("\n", ", ")
            line = f"{item['term']}\t{item.get('defn', '')}"
            if comment:
                line += f" [{comment}]"
            f.write(line + "\n")

# Normalize

In [33]:
normalize_terms("vocab/verbs.json", "vocab/verbs.txt")