In [7]:
import pandas as pd
import re
from collections import defaultdict

# Load the CSV
df = pd.read_csv("dictionary.csv")

# Filter single lowercase words
def is_single_lowercase_word(word):
    return bool(re.fullmatch(r"[a-z]+", str(word).lower()))

df['Word'] = df['Word'].astype(str)
df = df[df['Word'].apply(is_single_lowercase_word)]

# Collect word-meaning map
word_meaning_map = defaultdict(list)
for _, row in df.iterrows():
    word = row['Word'].strip().lower()
    meaning = str(row['Definition']).strip().lower()
    word_meaning_map[word].append(meaning)

# Escape for C++
def cpp_escape(s):
    return s.replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n')

# Generate header lines
lines = []
lines.append("#ifndef DICTIONARY_DATA_H")
lines.append("#define DICTIONARY_DATA_H\n")
lines.append("struct Entry {")
lines.append("    const char* word;")
lines.append("    const char* meaning;")
lines.append("};\n")
lines.append("static const Entry dictionary_entries[] = {")

for word, meanings in word_meaning_map.items():
    # Join meanings with a separator that includes a newline for C++ output
    joined = "\\n".join(meanings)
    lines.append(f'    {{"{cpp_escape(word)}", "{cpp_escape(joined)}"}},')

lines.append("};\n")
lines.append("static const int dictionary_size = sizeof(dictionary_entries) / sizeof(dictionary_entries[0]);\n")
lines.append("#endif // DICTIONARY_DATA_H")

# Write to file
with open("dictionary_data.h", "w", encoding="utf-8") as f:
    f.write("\n".join(lines))

print("✅ 'dictionary_data.h' with C-style literals generated.")

✅ 'dictionary_data.h' with C-style literals generated.
