In [1]:
import re
import slists
import Utils
from SpellChecker.SimpleSpellChecker import SimpleSpellChecker
from Corpus.Sentence import Sentence
from Dictionary.Word import Word
from MorphologicalAnalysis.FsmMorphologicalAnalyzer import FsmMorphologicalAnalyzer
from NGram.NGram import NGram
from SpellChecker.NGramSpellChecker import NGramSpellChecker
from NGram.NoSmoothing import NoSmoothing

class Duzelt ():
    def __init__(self, text=""):     
        self.fsm = FsmMorphologicalAnalyzer("data/turkish_dictionary.txt", "data/turkish_misspellings.txt","data/turkish_finite_state_machine.xml")   
        paragraphs = self.paragraphSeperation(text)
        self.wordCounter(text)
        self.searchfile = open("data/VERB_TS_Corpus_Frequency_List.txt", "r", encoding="utf8")
        self.simpleSpellChecker = SimpleSpellChecker(self.fsm)
        self.findSentences(paragraphs)
        self.turkishWords = open("data/kelimeler.txt", "r", encoding="utf8")

    def abbreviationCleaning(self, text):
        abbreviations = slists.abbreviations
        for abbreviation in abbreviations:
            text = text.replace(abbreviation, abbreviation + "*")
        return text
  
    def punctuationCleaning(self, w):
        regex = r"(?<!\d)[.,;:?)(](?!\d)"
        result = re.sub(regex, "", w, 0)
        return result

    def paragraphSeperation(self, text):
        paragraphs = text.split("\n")
        self.numberOfParagraph = len(paragraphs)
        return paragraphs

    def findSentences(self, paragraphs):
        sent2 = []
        for counter, parag in enumerate(paragraphs):
            if parag != "":
                parag = self.abbreviationCleaning(parag)
                senten = re.compile( """(?<=['""a-zıüöşğç\""\]\)][\!\?\:\.\…\n\r\n\t])\s+(?=[""A-ZİÜÖŞĞÇ0-9\(\-\(\''\‘\““\""\[\+])""")
                sent = re.split(senten, parag)
                sent2.extend(sent)
                sent2.extend("\n")
        a = 0
        for i in sent2:
            i = i.replace ("*", '')
            sent2[a] = i
            a += 1
        self.sentens = sent2
        self.numberOfSentences = len (self.sentens)
        return self.sentens

    def wordCounter(self, text):
        text = re.sub (r"\n", "  ", text)
        text = re.sub (r"  ", " ", text)
        text = re.sub (r"I", "ı", text)
        text = Utils.utils.toLowercase (text)
        text = Utils.utils.removePunction (text)
        ws = text.split (" ")
        self.numberOfWord = len (ws)
        self.numberOfCharacter = len (text)
        return None

    def sentenceSplit(self, text):
        ws = text.split (" ")
        if len(ws)>10:
            for conjunction in slists.conjunctions:
                if conjunction in ws:
                    index = ws.index (conjunction)
                    preW = Utils.utils.removePunction (ws[index - 1]).lower ()
                    for line in self.searchfile:
                        result = re.findall ('\\b' + preW + '\\b', str (line.split (" ")[1]))
                        if len(result)>0:
                            return 1
                    self.searchfile.seek(0)
        return 0
    
    def spellCheck(self, sentence):
        simpleSpellChecker = SimpleSpellChecker(self.fsm)
        return (self.simpleSpellChecker.spellCheck (Sentence (sentence)).toString ())

    def isCorrect(self,w: str) -> bool: 
        fsmParses = self.fsm.morphologicalAnalysis (w)
        return fsmParses.size () != 0

    def checkSpell(self, text):    
        ws = []
        res = []
        for paragraph in text.split ("\n"):
            num = len(paragraph.split (" "))
            for counter, w in enumerate(paragraph.split (" ")):
                if counter == num-1:
                    if len(w)>0:
                        if w[-1]!=".":
                            variable =3
                elif counter == 0:
                    variable = self.spellChecker (w, "")    
                else:
                    variable = self.spellChecker (w,paragraph.split (" ")[counter-1])
                res.append (variable)    
        return res  
                              
    def spellChecker(self, w, preW=""):
        if w in slists.abbreviations:
            return 0
        punctuation = [".", ",", "?", "!", "...", ":", "(", ")"]
        if  len(w)==1:
            if w in punctuation:
                return 1 
        if len(preW)>=1:
            if preW[-1] in [".", "?", "!", "...", ":"]:
                if not(w.istitle()):
                    return 5 
        w = Utils.utils.removePunctionEnd(w)  
        for i in punctuation:
            if i in w:
                if not(w[w.index(i)-1].isnumeric()): 
                    return 4
        if self.isCorrect(w):
            if len(self.turkishWordSuggestion(w))>0:
                return 6 
            else:
                return 0 
        return 2 

    def wordSuggestion(self, w):
        return (self.simpleSpellChecker.candidateList (Word (w)))
    
    def turkishWordSuggestion(self, w):
        firstW =[]
        w  = Utils.utils.toLowercase(w)
        w = Utils.utils.removePunction(w)
        for line in self.turkishWords:
            need = str(line.split(":")[1])
            need = need.replace(" ","")
            if re.search (r'\b' + w + r'\b', need):
                firstW.append(Utils.utils.removePunction(str(line.split (" ")[0])))
        self.turkishWords.seek (0)
        return firstW