In [None]:
import re
import sys
import math
import pandas as pd
from urllib import request
from bs4 import BeautifulSoup
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize, sent_tokenize

class Node:
    def __init__(self, value):
        self.value = value
        self.children = dict()
        self.end = False # indicates if this is an exit node

    def __getitem__(self, key):
        if key in self.children:
            return self.children[key]
        return None

    def __setitem__(self, key, value):
        self.children[key] = value
   
    def __contains__(self, value):
        return value in self.children

    def __str__(self):
        return str(self.value)

class Trie:
    def __init__(self):
        self.root = Node('')

    def add(self, word):
        word = word.strip()
        n = self.root
        for l in word:
            nxt = n[l]
            if nxt is not None:
                n = nxt
            else:
                n[l] = Node(l)
                n = n[l]
        n.end = True

    def __contains__(self, word):
        n = self.root
        for l in word:
            if l not in n:
                return False
            n = n[l]
        if n.end == True:
            return True
        return False

class SpellCheck:

    def __init__(self):
        self.words = Trie()
        with open('words1.txt','r') as f:
            for word in f:
                self.words.add(word)
        with open('words2.txt','r') as g:
            for word in g:
                self.words.add(word)

    def spellcheck(self, word):
       
        if word in self.words:
            return word

        word = word.lower()
        if word in self.words:
            return word

        vowels = 'aeiou'

        def recurse(path, word, node):
           
            if node is None:
                return None
            if word == '':
                if node.end == True:
                    return path
                if node.end == False:
                    return None
            ltr = word[0]
            if ltr in node:
                result = recurse(path + ltr, word[1:], node[ltr])
                if result:
                    return result
            ltr = ltr.lower()
            if ltr in node:
                result = recurse(path + ltr, word[1:], node[ltr])
                if result:
                    return result
            if len(word) > 1 and ltr == word[1]:
                result = recurse(path, word[1:], node)
                if result:
                    return result

            # try replacing vowels
            if ltr in vowels:
                for v in vowels:
                    if v != ltr:
                        result = recurse(path + v, word[1:], node[v])
                        if result:
                            return result

            return None

        result = recurse('', word, self.words.root)
        if result:
            return result
        return 'NO SUGGESTION'


if __name__ == '__main__':

   
    term=[]
    url="https://en.wikipedia.org/wiki/Web_mining"
    html=request.urlopen(url).read().decode('utf8')
    soup1=BeautifulSoup(html, 'html.parser')
    l1=[]
    term1=[]
    for titles in soup1.find_all(['h1', 'h2','h3','h4','h5','h6', 'p']):
        l1=word_tokenize(titles.text)
        term1=term1+l1
    term.append(term1)
       
    url="https://en.wikipedia.org/wiki/Data_mining"
    html=request.urlopen(url).read().decode('utf8')
    soup2=BeautifulSoup(html, 'html.parser')
    l2=[]
    term2=[]
    for titles in soup2.find_all(['h1', 'h2','h3','h4','h5','h6', 'p']):
        l2=word_tokenize(titles.text)
        term2=term2+l2
    term.append(term2)
   
    stop_words=list(set(stopwords.words('english')))
    stop_words_add=[ ',','.', '-', '(', ')', '[', ']', ':', ';', '\'','&','_']
    for words in stop_words_add:
        stop_words.extend(words)
   
    final_term_stop=[]
    for i in term:
        final=[]
        for w in i:
            if w not in stop_words:
                w=w.lower()
                final.append(w)
        final_term_stop.append(final)
   
    lemmatizer=WordNetLemmatizer()
    final_term=[]
    for i in final_term_stop:
        lemm=[]
        for w in i:
            lemm.append(lemmatizer.lemmatize(w))
        final_term.append(lemm)
   
    fopen=open('words1.txt','w', encoding='utf-8')
    for element in final_term[0]:
        fopen.write(element)
        fopen.write('\n')
    fopen.close()
    fopen=open('words2.txt','w', encoding='utf-8')
    for element in final_term[1]:
        fopen.write(element)
        fopen.write('\n')
    fopen.close()
    if len(sys.argv) > 1 and sys.argv[1] == '-t':
        t = Trie()
        # Test trie
        with open('/usr/share/dict/words','r') as f:
            for word in f:
                word = word.strip()
                t.add(word)
                try:
                    assert(word in t)
                except AssertionError:
                    print(word, "not in trie")
                    sys.exit(1)
       
        # Run doctests
        s = SpellCheck()
        import doctest
        doctest.testmod(extraglobs={'s': s })
        sys.exit(0)

    s = SpellCheck()
    c=0
    while c==0:
        word = input('Enter a word: ')
        print(s.spellcheck(word))
        l=input('Would you like to check another word? yes/no: ' )
        if inputchoice=='yes':
            c=1


> web
web
> weeb
web
> mm
NO SUGGESTION
> miin
NO SUGGESTION
