In [10]:
import json
import re
import os
from symspellpy import SymSpell, Verbosity

def is_word(s: str) -> bool:
    # Проверяет, что строка содержит только буквы кириллицы или дефисы и имеет хотя бы одну букву
    return bool(re.match(r'^[а-яА-Я\-]*[а-яА-Я][а-яА-Я\-]*$', s))

def merge_words(words: list[str]) -> list[str]:
    # Возвращает новый список, объединяя слова, где первое заканчивается на дефис
    result = []
    i = 0
    while i < len(words):
        if not is_word(words[i]):
            result.append(words[i])
            i += 1
            continue
        if words[i].endswith('-') and i + 1 < len(words) and is_word(words[i + 1]):
            merged_word = words[i][:-1] + words[i + 1]
            if is_word(merged_word):
                result.append(merged_word)
                i += 2  # Пропускаем два слова
            else:
                result.append(words[i])
                i += 1
        else:
            result.append(words[i])
            i += 1
    return result


def read_json(path):
    with open(path, "r", encoding="utf-8") as f:
        data = json.load(f)
        main_text = data["data"]["text"]
        ans1 = [elem.rstrip().strip('.') for elem in main_text.split()]
        ans2 = [elem for elem in ans1 if is_word(elem)]
        text = merge_words(ans2)
    return text

def finder_symspell(word: str, sym_spell: SymSpell, max_edit_distance: int = 3) -> str:
    word_lower = word.lower()
    suggestions = sym_spell.lookup(word_lower, Verbosity.TOP, max_edit_distance=0)
    if suggestions:
        return word
    suggestions = sym_spell.lookup(word_lower, Verbosity.TOP, max_edit_distance)
    if suggestions:
        return suggestions[0].term
    return word_lower

def init_symspell(path: str, max_edit_distance: int = 3) -> SymSpell:
    sym_spell = SymSpell(max_dictionary_edit_distance=max_edit_distance)
    with open(path, 'r', encoding='utf-8') as f:
        for word in f:
            word = word.rstrip()
            if is_word(word):
                sym_spell.create_dictionary_entry(word, 1)
    return sym_spell

def func(json_path, sym_spell):
    text = read_json(json_path)
    edit_dict = dict()
    for word in text:
        cor = finder_symspell(word, sym_spell)
        if word != cor:
            edit_dict[word] = cor

    with open(json_path, "r", encoding="utf-8") as f:
            data = json.load(f)
            main_text = data["data"]["text"]
        
    for key, value in edit_dict.items():
        main_text = main_text.replace(key, value)

    with open(json_path + ".txt", 'w') as f:
        f.write(main_text)


sym_spell = init_symspell('russian.txt')

In [None]:
func("res.json", sym_spell)