In [1]:

class LinkedList:
    class Node:
        """
        Lightweight, nonpublic class for storing a doubly linked node.
        """
        __slots__ = 'element', 'prev', 'after', 'upside_arrow', 'downside_arrow'  # streamline memory

        def __init__(self, element, prev, after):  # initialize node's fields
            self.element = element  # user's element
            self.prev = prev  # previous node reference
            self.after = after

    def __init__(self):
        """
        Create an empty list.
        """
        self.header = self.Node(None, None, None)
        self.trailer = self.Node(None, None, None)
        self.header.after = self.trailer  # trailer is after header
        self.trailer.prev = self.header  # header is before trailer
        self.size = 0

    def __len__(self):
        return self.size

    def __iter__(self):
        if self.is_empty():
            yield self.Node(None, None, None)
        current = self.header
        while current is not None:
            yield current
            current = current.after

    def is_empty(self) -> bool:
        return self.size == 0

    def insert_between(self, element, predecessor: Node, successor: Node) -> Node:
        newest = self.Node(element, predecessor, successor)  # linked to neighbors
        predecessor.after = newest
        successor.prev = newest
        self.size += 1
        return newest

    def delete_node(self, node: Node) -> type(Node.element):
        predecessor = node.prev
        successor = node.after
        predecessor.after = successor
        successor.prev = predecessor
        self.size -= 1
        element = node.element  # record deleted element
        node.prev = node.after = node.element = None  # deprecate node
        return element

    # -----------------------------------------------------------------------------------------------------------------

    def add_first(self, element: type(Node.element)) -> Node:
        return self.insert_between(element, self.header, self.header.after)

    def add_last(self, element: type(Node.element)) -> Node:
        return self.insert_between(element, self.trailer.prev, self.trailer)

    def add_before(self, prevElement: type(Node.element), element: type(Node.element)) -> Node:
        original = self.search(prevElement)
        return self.insert_between(element, original.prev, original)

    def add_after(self, nextElement: type(Node.element), element: type(Node.element)) -> Node:
        original = self.search(nextElement)
        return self.insert_between(element, original, original.after)

    # ------------------------------------------------------------------------------------------------------------------

    def delete(self, undesireElement) -> type(Node.element):
        original = self.search(undesireElement)
        if original is None:
            return
        return self.delete_node(original)

    def search(self, desireElement: type(Node.element)) -> Node:
        head = self.header
        while head.after is not None:
            head = head.after
            if head.element == desireElement:
                return head
        return None


In [2]:
"""
ll = LinkedList()
ll.add_last('h')
ll.add_last('o')
ll.add_last('s')
ll.add_last('s')
ll.add_last('i')
ll.add_last('n')
for l in ll:
    print(l.element)

ll.delete('h')
ll.delete('s')
ll.delete('m')
for l in ll:
    print(l.element)"""

"\nll = LinkedList()\nll.add_last('h')\nll.add_last('o')\nll.add_last('s')\nll.add_last('s')\nll.add_last('i')\nll.add_last('n')\nfor l in ll:\n    print(l.element)\n\nll.delete('h')\nll.delete('s')\nll.delete('m')\nfor l in ll:\n    print(l.element)"

In [3]:

class LinkedStack():

    # ---------------------------------------------- Nested Class --------------------------------------------------
    class Node:

        __slots__ = 'element', 'next'  # streamline memory usage

        def __init__(self, element, next):  # initialize node field
            self.element = element  # reference to current element
            self.next = next  # reference to the next node

    # --------------------------------------------- Stack Methods --------------------------------------------------
    def __init__(self):
        self.head = self.Node(None, None)  # head is a kind of node
        self.size = 0

    def __len__(self) -> int:
        return self.size

    def __iter__(self):
        if self.is_empty():
            yield
        current = self.head
        while current is not None:
            yield current
            current = current.next

    def is_empty(self) -> bool:
        return self.size == 0

    def push(self, element: type(Node.element)):
        self.head = self.Node(element, self.head)  # created and linked a new node
        self.size += 1  # size is incremented

    def top(self) -> type(Node.element):
        if self.is_empty():
            raise Exception("stack is empty")
        return self.head.element

    def pop(self) -> Node:
        if self.is_empty():
            raise Exception("stack is empty")
        answer = self.head.element
        self.head = self.head.next  # bypass the former node :)
        self.size -= 1  # size decremented
        return answer

    def querry(self):
        """
        for TTD and debuging.
        """
        for k in self:
            print(k.element)


In [4]:
"""
stack = LinkedStack()
for s in "saint_helen_mountain":
    stack.push(s)

stack.querry()"""

'\nstack = LinkedStack()\nfor s in "saint_helen_mountain":\n    stack.push(s)\n\nstack.querry()'

In [5]:

class LinkedQueue:
    """
    FIFO implementation of queue with using linked list as internal storage
    """

    # ---------------------------------------------- Nested Class --------------------------------------------------
    class Node:
        "light weight class for storing liked node"
        __slots__ = 'element', 'next'                                              # streamline memory usage

        def __init__(self, element, next):                                         # initialize node field
            self.element = element                                                 # reference to current element
            self.next = next                                                       # reference to the next node

    # --------------------------------------------- Stack Methods --------------------------------------------------
    def __init__(self):
        self._head = self.Node(None, None)
        self._tail = self.Node(None, None)
        self._size = 0

    def __len__(self) -> int:
        """
        return the number of elements in the linked list
        :return: integer
        """
        return self._size

    def __iter__(self):
        """
        iterate thorough the linked list
        """
        if self.is_empty():
            yield
        current = self._head
        while current is not None:
            yield current
            current = current.next

    def is_empty(self) -> bool:
        """

        :return: bool True if list is empty and False otherwise
        """
        return self._size == 0

    def first(self):
        """
        just Return the first element in the queue
        """
        if self.is_empty():
            raise Exception("empty Error")
        return self._head.element

    def dequeue(self):
        """
        remove and return the first element
        """
        if self.is_empty():
            raise Exception("empty Error")
        var = self._head.element
        self._head = self._head.next
        self._size -= 1
        if self.is_empty():
            self._tail = None
        return var

    def enqueue(self, element):
        newest = self.Node(element, None)
        if self.is_empty():
            self._head = newest
        else:
            self._tail.next = newest
        self._tail = newest
        self._size += 1

    def querry(self):
        """
        for TTD and debuging.
        """
        for k in self:
            print(k.element)


In [6]:
"""
if __name__ == '__main__':
    q = LinkedQueue()
    for i in range(12):
        q.enqueue(i)

    for j in range(4):
        q.dequeue()

    for i in range(10):
        q.enqueue(i)

    q.querry()

    print()
    print("\/\/\/\/\/\/\//new test with adding capacity//\/\/\/\/\/\/\/")
    print()

    for i in "kilimanjaro":
        q.enqueue(i)

    q.querry()

    print()
    print("\/\/\/\/\/\/\//new test with adding capacity//\/\/\/\/\/\/\/")
    print()

    for i in range(25):
        q.dequeue()

    q.querry()"""

'\nif __name__ == \'__main__\':\n    q = LinkedQueue()\n    for i in range(12):\n        q.enqueue(i)\n\n    for j in range(4):\n        q.dequeue()\n\n    for i in range(10):\n        q.enqueue(i)\n\n    q.querry()\n\n    print()\n    print("\\/\\/\\/\\/\\/\\/\\//new test with adding capacity//\\/\\/\\/\\/\\/\\/\\/")\n    print()\n\n    for i in "kilimanjaro":\n        q.enqueue(i)\n\n    q.querry()\n\n    print()\n    print("\\/\\/\\/\\/\\/\\/\\//new test with adding capacity//\\/\\/\\/\\/\\/\\/\\/")\n    print()\n\n    for i in range(25):\n        q.dequeue()\n\n    q.querry()'

In [7]:
from abc import ABCMeta, abstractmethod


class ABCNode(metaclass=ABCMeta):
    @abstractmethod
    def insert(self, key, NodeType):
        pass

    @abstractmethod
    def find(self, key):
        pass

    @abstractmethod
    def minimum(self):
        pass

    @abstractmethod
    def successor(self):
        pass

    @abstractmethod
    def delete(self):
        pass


class BST(object):
    class Node(ABCNode):
        __slots__ = "key", "parent", "left", "right", "size"

        # --------------------------------------------------------------------------------------------------------------

        def __init__(self, parent, key):
            """Create a new leaf with key t."""
            self.key = key
            self.parent = parent
            self.left = None
            self.right = None
            self.size = 1

        # --------------------------------------------------------------------------------------------------------------

        def update_stats(self):
            """Updates this node's size based on its children's sizes."""
            self.size = (0 if self.left is None else self.left.size) + (0 if self.right is None else self.right.size)

        # --------------------------------------------------------------------------------------------------------------

        def insert(self, key, NodeType) -> ABCNode:
            self.size += 1
            if key < self.key:
                if self.left is None:
                    self.left = NodeType(self, key)
                    return self.left
                else:
                    return self.left.insert(key, NodeType)
            else:
                if self.right is None:
                    self.right = NodeType(self, key)
                    return self.right
                else:
                    return self.right.insert(key, NodeType)

        # --------------------------------------------------------------------------------------------------------------

        def find(self, key) -> ABCNode:
            """Return the node for key if it is in this tree, or None otherwise."""
            if key == self.key:
                return self
            elif key < self.key:
                if self.left is None:
                    return None
                else:
                    return self.left.find(key)
            else:
                if self.right is None:
                    return None
                else:
                    return self.right.find(key)

        # --------------------------------------------------------------------------------------------------------------

        def rank(self, key) -> int:
            """Return the number of keys <= key in the subtree rooted at this node."""
            left_size = 0 if self.left is None else self.left.size
            if key == self.key:
                return left_size + 1
            elif key < self.key:
                if self.left is None:
                    return 0
                else:
                    return self.left.rank(key)
            else:
                if self.right is None:
                    return left_size + 1
                else:
                    return self.right.rank(key) + left_size + 1

        def minimum(self) -> ABCNode:
            """Returns the node with the smallest key in the subtree rooted by this node."""
            current = self
            while current.left is not None:
                current = current.left
            return current

        def successor(self) -> ABCNode:
            """
            Returns the node with the smallest key larger than this node's key,
            or None if this has the largest key in the tree.
            """
            if self.right is not None:
                return self.right.minimum()
            current = self
            while current.parent is not None and current.parent.right is current:
                current = current.parent
            return current.parent

        # --------------------------------------------------------------------------------------------------------------

        def delete(self) -> ABCNode:
            """"Delete this node from the tree."""
            if self.left is None or self.right is None:
                if self is self.parent.left:
                    self.parent.left = self.left or self.right
                    if self.parent.left is not None:
                        self.parent.left.parent = self.parent
                else:
                    self.parent.right = self.left or self.right
                    if self.parent.right is not None:
                        self.parent.right.parent = self.parent
                current = self.parent
                while current.key is not None:
                    current.update_stats()
                    current = current.parent
                return self
            else:
                s = self.successor()
                self.key, s.key = s.key, self.key
                return s.delete()

        # ---------------------------------------------check for error--------------------------------------------------

        def check(self, lower_key, higher_key):
            """Checks that the subtree rooted at key is a valid BST and all keys are between (lower_key, higher_key)."""
            if lower_key is not None and self.key <= lower_key:
                raise Exception("BST RI violation")
            if higher_key is not None and self.key >= higher_key:
                raise Exception("BST RI violation")
            if self.left is not None:
                if self.left.parent is not self:
                    raise Exception("BST RI violation")
                self.left.check(lower_key, self.key)
            if self.right is not None:
                if self.right.parent is not self:
                    raise Exception("BST RI violation")
                self.right.check(self.key, higher_key)
            if self.size != 1 + (0 if self.left is None else self.left.size) + (
                    0 if self.right is None else self.right.size):
                raise Exception("BST RI violation")

        def __repr__(self) -> str:
            return "<BST Node, key:" + str(self.key) + ">"

    def __init__(self, NodeType=Node):
        self.root = None
        self.NodeType = NodeType
        self.psroot = self.NodeType(None, None)

    # ------------------------------------------------------------------------------------------------------------------

    def reroot(self):
        self.root = self.psroot.left

    def insert(self, key) -> Node:
        """Insert key into this BST, modifying it in-place."""
        if self.root is None:
            self.psroot.left = self.NodeType(self.psroot, key)
            self.reroot()
            return self.root
        else:
            return self.root.insert(key, self.NodeType)

    # ------------------------------------------------------------------------------------------------------------------

    def find(self, key) -> Node:
        """Return the node for key if is in the tree, or None otherwise."""
        if self.root is None:
            return None
        else:
            return self.root.find(key)

    def rank(self, key) -> int:
        """The number of keys <= key in the tree."""
        if self.root is None:
            return 0
        else:
            return self.root.rank(key)

    def traverse(self, node: Node = None) -> type(Node.key):
        if node is None:
            node = self.root

        print(node.key)
        if node.left is not None:
            self.traverse(node=node.left)
        if node.right is not None:
            self.traverse(node=node.right)

    def delete(self, key) -> Node:
        node = self.find(key)
        if node is None:
            raise Exception("nadari in klid ro")
        deleted = node.delete()
        self.reroot()
        return deleted

    # ------------------------------------------------------------------------------------------------------------------

    def check(self):
        if self.root is not None:
            self.root.check(None, None)  # check in the Node class

    def __str__(self) -> str:
        if self.root is None:
            return '<empty tree>'

        def recurse(node):
            if node is None: return [], 0, 0
            label = str(node.key)
            left_lines, left_pos, left_width = recurse(node.left)
            right_lines, right_pos, right_width = recurse(node.right)
            middle = max(right_pos + left_width - left_pos + 1, len(label), 2)
            pos = left_pos + middle // 2
            width = left_pos + middle + right_width - right_pos
            while len(left_lines) < len(right_lines):
                left_lines.append(' ' * left_width)
            while len(right_lines) < len(left_lines):
                right_lines.append(' ' * right_width)
            if (middle - len(label)) % 2 == 1 and node.parent is not None and \
                            node is node.parent.left and len(label) < middle:
                label += '.'
            label = label.center(middle, '.')
            if label[0] == '.': label = ' ' + label[1:]
            if label[-1] == '.': label = label[:-1] + ' '
            lines = [' ' * left_pos + label + ' ' * (right_width - right_pos),
                     ' ' * left_pos + '/' + ' ' * (middle - 2) +
                     '\\' + ' ' * (right_width - right_pos)] + \
                    [left_line + ' ' * (width - left_width - right_width) + right_line

                     for left_line, right_line in zip(left_lines, right_lines)]
            return lines, pos, width

        return '\n'.join(recurse(self.root)[0])


In [8]:

if __name__ == '__main__':
    test1 = range(0, 100, 10)
    test2 = [31, 41, 59, 26, 53, 58, 97, 93, 23]
    test3 = ["how", "perfect", "I", "am", "hey"]
    list_word = [0, 4, -1, -6, 10, 19, 27, -31]

    bst = BST()
    fp = open("StopWords.txt", '+r')
    for line in fp.readlines():
        key = (line.rstrip('\n'))
        bst.insert(str(key))
    fp.close()
    bst.traverse()
    print("---------------------------------------------------------------------")
    bst2 = BST()
    for w in test3:
        bst2.insert(w)
    print(str(bst2))

about
above
according
across
after
afterwards
again
against
albeit
all
almost
alone
along
already
also
although
always
am
among
amongst
an
and
another
any
anybody
anyhow
anyone
anything
anyway
anywhere
apart
are
around
as
at
av
be
became
because
become
becomes
becoming
been
before
beforehand
behind
being
below
beside
besides
between
beyond
both
but
by
can
cannot
canst
certain
cf
choose
contrariwise
cos
could
cu
day
do
does
doesn't
doing
dost
doth
double
down
dual
during
each
either
else
elsewhere
enough
et
etc
even
ever
every
everybody
everyone
everything
everywhere
except
excepted
excepting
exception
exclude
excluding
exclusive
far
farther
farthest
few
ff
first
for
formerly
forth
forward
from
front
further
furthermore
furthest
get
go
had
halves
hardly
has
hast
hath
have
he
hence
henceforth
her
here
hereabouts
hereafter
hereby
herein
hereto
hereupon
hers
herself
him
himself
hindmost
his
hither
hitherto
how
however
howsoever
i
ie
if
in
inasmuch
inc
include
included
including
indeed
indo

In [9]:


class TST:
    __slots__ = 'size', 'root', '_valid_words'

    # ------------------------------------------------ inner class -----------------------------------------------------

    class Node:
        __slots__ = 'key_char', 'left', 'mid', 'right', 'value'

        def __init__(self, key_char: str):
            self.key_char = key_char
            self.left = None
            self.mid = None
            self.right = None
            self.value = str()

    # -------------------------------------------- end of inner class --------------------------------------------------

    def __init__(self):
        self.root = TST.Node(" ")
        self.size = 0
        self._valid_words = LinkedQueue()

    def __sizeof__(self) -> int:
        return self.size

    def __contains__(self, item: str) -> bool:
        if item is None:
            raise Exception("nothing to be contained!!!")
        return self.__getitem__(item) is not None

    def __getitem__(self, item: str) -> str:
        if item is None:
            raise Exception("call __getitem__ with None argument")
        if len(item) == 0:
            raise Exception("item must have length >= 1")
        x = self.get(self.root, item, 0)
        if x is None:
            return None
        return x.value

    def intable(self, stream: object) -> tuple:
        try:
            integer = int(stream)
            return True, integer
        except Exception as err:
            return False, err

    def get(self, x: Node, item: str, d: int) -> Node:
        """
        return sub-trie corresponding to given key
        """
        if x is None:
            return None
        if len(item) == 0:
            raise Exception("item must have length >= 1")
        char = item[d]
        if char < x.key_char:
            return self.get(x.left, item, d)
        elif char > x.key_char:
            return self.get(x.right, item, d)
        elif d < len(item) - 1:
            return self.get(x.mid, item, d + 1)
        else:
            return x

    def put(self, item: str, value: int) -> None:
        """
        Inserts the key-value pair into the symbol table
        """
        if item is None:
            raise Exception("call __setitem__ with None argument")
        if item not in self:
            self.size += 1
        self.root = self.set(self.root, item, value, 0)

    def set(self, x: Node, item: str, value: int, d: int) -> Node:
        char = item[d]
        if x is None:
            x = TST.Node(char)
        if char < x.key_char:
            x.left = self.set(x.left, item, value, d)
        elif char > x.key_char:
            x.right = self.set(x.right, item, value, d)
        elif d < len(item) - 1:
            x.mid = self.set(x.mid, item, value, d + 1)
        else:
            x.value = value
        return x

    def longestPrefixOf(self, query: str) -> str:
        if query is None:
            raise Exception("call longestPrefixOf() with None argument")
        if len(query) == 0:
            return None
        length = int(0)
        x = self.root
        i = 0
        while x is not None and i < len(query):
            char = query[i]
            if char < x.key_char:
                x = x.left
            elif char > x.key_char:
                x = x.right
            else:
                i += 1
                if x.value is not None:
                    length = i
                x = x.mid

        return query[0:length]          # testing required

    def keys(self) -> LinkedQueue:
        queue = LinkedQueue()
        self.collect(self.root, str(), queue)
        return queue                    # queue is iterable?

    def keysWithPrefix(self, prefix: str) -> LinkedQueue:

        if prefix is None:
            raise Exception("call keysWithPrefix() with None argument")
        queue = LinkedQueue()
        x = self.root
        x = self.get(x, prefix, 0)
        if x is None:
            return queue
        if x.value is not None:
            queue.enqueue(prefix)
        self.collect(x.mid, str(prefix), queue)
        return queue

    def collect(self, x: Node, prefix: str, queue: LinkedQueue) -> None:
        if x is None:
            return None
        self.collect(x.left, prefix, queue)
        if x.value is not None:
            queue.enqueue(str(prefix) + x.key_char)
        self.collect(x.mid, str(prefix) + str(x.key_char), queue)
        prefix = prefix[:-1]
        self.collect(x.right, prefix, queue)

    def keysThatMatch(self, pattern: str) -> LinkedQueue:
        queue = LinkedQueue()
        self.patternMatching(self.root, str(), 0, pattern, queue)
        return queue

    def patternMatching(self, x: Node, prefix: str, i: int, pattern: str, queue: LinkedQueue):
        """
        some kind of collector
        """
        if x is None:
            return
        char = pattern[i]
        if char == '.' or char < x.key_char:
            self.patternMatching(x.left, prefix, i, pattern, queue)
        if char == '.' or char == x.key_char:
            if i == len(pattern) - 1 and x.value is not None:
                queue.enqueue(str(prefix) + str(x.key_char))
            if i < len(pattern) - 1:
                self.patternMatching(x.mid, str(prefix) + str(x.key_char), i + 1, pattern, queue)
                prefix = prefix[:-1]

        if char == '.' or char > x.key_char:
            self.patternMatching(x.right, prefix, i, pattern, queue)
    
    def traverse(self):
        if self.size == 0:
            raise Exception("empty tst can't be traversed")
        try:
            for q in self._valid_words:
                yield q.element
        except Exception as err:
            print(err)

    def validation(self, word_queue: LinkedQueue):
        # iterate through the tst and find the keysthatmatch with stopwords each word
        for wq in word_queue:
            try:
                correct_word = self.keysThatMatch(wq.element)
                for cw in correct_word:
                    if self.intable(self[str(cw.element)])[0]:
                        self._valid_words.enqueue(cw.element)

            except Exception:
                raise

    def get_valid_words(self, word_queue: LinkedQueue) -> LinkedQueue:
        if self.size == 0:
            raise Exception
        self.validation(word_queue)
        return self._valid_words



In [10]:
"""

if __name__ == '__main__':
    tst = TrieST()
    tstStp = TST()

    fileQueue = LinkedQueue()
    fp = open("StopWords.txt", '+r')
    for line in fp.readlines():
        key = (line.rstrip('\n'))
        fileQueue.enqueue(key)
    fp.close()

    i = 0
    for q in fileQueue:
        tstStp.put(str(q.element), i)
        i += 1
    
    for subdir, dirs, files in os.walk("/home/maometto/Documents//Untitled Folder/docs/"):
        for file in files:
            if file.endswith('.txt'):
                fp = open(os.path.join(subdir, file), 'r+', errors='ignore')
                i = 0
                DATA = fp.read().replace('\n', ' ')
                for key in re.findall(r"[\w']+", DATA):   
                    for j in tstStp.get_valid_words(fileQueue):
                        if j == str(key):
                            continue
                    tst.put(str(key), i)
                    i += 1
                    if i > 1000:
                        raise
                fp.close()    

    # print("-------------------------Test keys")
    # for q in tst.keys():
    #     print(str(q.element) + " " + str(tst[q.element]))

    # print("-------------------------Test keysWithPrefix")
    # for q in tst.keysWithPrefix("afte"):
    #     if tst[q.element] is not None:
    #         print(q.element + "\t" + str(tst[q.element]))

    # print("-------------------------Test mid.key_char")
    # print(tst.root.right.right.right.right.right.mid.mid.key_char)

    # print("------------------------- Test keysThatMatch(\"our\"):")
    # q = LinkedQueue()
    # try:
    #     q = tst.keysThatMatch("asb")
    #     for s in q:
    #         print(s.element)
    # except:
    #     print("No such thing")
    
    print("--------------------------Test traverse and correct words")
    tst.get_valid_words(fileQueue)
    for i in tst.traverse():
        print(i)

    
"""

'\n\nif __name__ == \'__main__\':\n    tst = TrieST()\n    tstStp = TST()\n\n    fileQueue = LinkedQueue()\n    fp = open("StopWords.txt", \'+r\')\n    for line in fp.readlines():\n        key = (line.rstrip(\'\n\'))\n        fileQueue.enqueue(key)\n    fp.close()\n\n    i = 0\n    for q in fileQueue:\n        tstStp.put(str(q.element), i)\n        i += 1\n    \n    for subdir, dirs, files in os.walk("/home/maometto/Documents//Untitled Folder/docs/"):\n        for file in files:\n            if file.endswith(\'.txt\'):\n                fp = open(os.path.join(subdir, file), \'r+\', errors=\'ignore\')\n                i = 0\n                DATA = fp.read().replace(\'\n\', \' \')\n                for key in re.findall(r"[\\w\']+", DATA):   \n                    for j in tstStp.get_valid_words(fileQueue):\n                        if j == str(key):\n                            continue\n                    tst.put(str(key), i)\n                    i += 1\n                    if i > 1000:\n

In [26]:


class TrieST:
    R = 256
    __slots__ = 'root', 'number_of_keys', 'valid_words'

    # ------------------------------------------------------------------------------------------------------------------

    class Node:
        __slots__ = 'value', 'next'

        def __init__(self):
            self.value = str()
            self.next = [None] * TrieST.R

    def __init__(self):
        self.root = self.Node()
        self.number_of_keys = 0
        self.valid_words = LinkedQueue()
        

    def __sizeof__(self) -> int:
        return self.number_of_keys

    def __len__(self) -> int:
        return self.__sizeof__()

    def is_empty(self) -> bool:
        return self.__sizeof__() == 0

    def __getitem__(self, key: str) -> str:
        x = self.get(self.root, key, 0)
        if x is None:
            return None
        return str(x.value)

    def __contains__(self, key: str) -> bool:
        return self.__getitem__(key) is not None

    def get(self, x: Node, key: str, d: int) -> Node:
        if x is None:
            return None
        if d == len(key):
            return x
        char = key[d]
        return self.get(x.next[int(ord(char))], key, d + 1)

    def put(self, key: str, value: int):
        if value is None:
            del key
        else:
            self.root = self.set(self.root, key, value, 0)

    def set(self, x: Node, key: str, value: int, d: int) -> Node:
        if x is None:
            x = self.Node()
        if d == len(key):
            if x.value is None:
                self.number_of_keys += 1
            x.value = value
            return x
        char = key[d]
        x.next[int(ord(char))] = self.set(x.next[int(ord(char))], key, value, d + 1)
        return x

    def keys(self) -> LinkedQueue:
        return self.keysWithPrefix("")

    def keysWithPrefix(self, prefix: str) -> LinkedQueue:
        result = LinkedQueue()
        x = self.get(self.root, prefix, 0)
        self.collect(x, str(prefix), result)
        return result

    def collect(self, x: Node, prefix: str, result: LinkedQueue) -> None:
        if x is None:
            return
        if x.value is not None:
            result.enqueue(str(prefix))
        for i in range(self.R):
            prefix += chr(i)
            self.collect(x.next[i], prefix, result)
            prefix = prefix[:-1]

    def keysThatMatch(self, pattern: str) -> LinkedQueue:
        result = LinkedQueue()
        self.patternMatching(self.root, str(), pattern, result)
        return result

    def patternMatching(self, x: Node, prefix: str, pattern: str, result: LinkedQueue) -> None:
        if x is None:
            return
        d = len(prefix)
        if d == len(pattern) and x.value is not None:
            result.enqueue(str(prefix))
        if d == len(pattern):
            return

        char = pattern[d]
        if char == '.':
            for i in range(self.R):
                prefix += str(i)
                self.patternMatching(x.next[i], prefix, pattern, result)
                prefix = prefix[:-1]
        else:
            prefix += str(char)
            self.patternMatching(x.next[int(ord(char))], prefix, pattern, result)
            prefix = prefix[:-1]

    def longestPrefix(self, query: str) -> str:
        length = self.longestPrefixOf(self.root, query, 0, -1)
        if length == -1:
            return None
        else:
            return query[:length]

    def longestPrefixOf(self, x: Node, query: str, d: int, length: int) -> int:
        if x is None:
            return length
        if x.value is not None:
            length = d
        if d == len(query):
            return length
        char = query[d]
        return self.longestPrefixOf(x.next[int(ord(char))], query, d + 1, length)
    
    def traverse(self):
        query = self.keys()
        for q in query:
            if self[q.element] is not "":
                yield(q.element)
    
    def validation(self):
        for v in self.traverse():
            self.valid_words.enqueue(v)
    
    def delete(self, key: str, x: Node=None, d: int=None) -> Node:
        if x is None and d is None:
            self.root = self.delete(key, x=self.root, d=0)
        if x is None:
            return None
        if d == len(key):
            if x.value is not None:
                self.number_of_keys -= 1
            x.value = None
        else:
            char = key[d]
            x.next[int(ord(char))] = self.delete(x.next[int(ord(char))], key, d + 1)

        # remove subtrie rooted at x if it is completely empty
        if x.value is not None:
            return x
        for i in range(self.R):
            if x.next[i] is not None:
                return x
        return None


In [53]:
import os
import re


if __name__ == '__main__':
    trie = TrieST()
    trieStp = TrieST()
    
    fileQueue = LinkedQueue()
    fp = open("StopWords.txt", '+r')
    for line in fp.readlines():
        key = (line.rstrip('\n'))
        fileQueue.enqueue(key)
    fp.close()
    i = 0
    for q in fileQueue:
        trieStp.put(str(q.element), i)
        i += 1
    trieStp.validation()
    
    counter = 0
    for subdir, dirs, files in os.walk("/home/maometto/Documents/Untitled Folder/docs/"):
        for file in files:
            if file.endswith('.txt'):
                fp = open(os.path.join(subdir, file), 'r+', errors='ignore')
                DATA = fp.read().replace('\n', ' ')
                for key in re.findall(r"[\w']+", DATA):   
                    if len(trieStp.keysWithPrefix(key)) == 0:
                        trie.put(str(key), counter)
                        counter += 1
                fp.close()
    trie.validation()
    print(counter)
    

    print("-------------------------Test keys")
    for q in trie.keys():
        print(str(q.element) + " " + str(trieStp[q.element]))

    print("-------------------------Test keysWithPrefix")
    i = (trieStp.keysWithPrefix("rojin"))
    print(len(i))
    for q in trie.keysWithPrefix("year"):
        print(q.element)

    print("-------------------------Test longestPrefixOf")
    print(trie.longestPrefix("moreover"))

    print("-------------------------Test keysThatMatch(\"are\"):")
    try :
        for s in trie.keysThatMatch("afterward"):
            print(s.element)
    except Exception as err:
        print(err)
    
    print("-------------------------Test traverse and correct words")
    for t in trie.traverse():
        print(t)
    print("-------------------------Number of valid words")
    print(len(trie.valid_words))
    
    print("-------------------------stopwords traverse")
    for t in trieStp.traverse():
        print(t)

248982
-------------------------Test keys
 
' None
's None
A None
AB None
ABO None
ABOU None
ABOUT None
AF None
AFT None
AFTE None
AFTER None
AG None
AGA None
AGAI None
AGAIN None
AGAINS None
AGAINST None
AL None
ALL None
ALS None
ALSO None
AM None
AME None
AMER None
AMERI None
AMERIC None
AMERICA None
AMERICAN None
AN None
AND None
ANO None
ANOT None
ANOTH None
ANOTHE None
ANOTHER None
ANY None
AR None
ARE None
ARO None
AROU None
AROUN None
AROUND None
AS None
AT None
Ab None
Abo None
Abou None
About None
Af None
Aft None
Afte None
After None
Ag None
Aga None
Agai None
Again None
Agains None
Against None
Al None
All None
Als None
Also None
Am None
Ame None
Amer None
Ameri None
Americ None
America None
American None
Amo None
Amon None
Among None
An None
And None
Ano None
Anot None
Anoth None
Anothe None
Another None
Any None
Ar None
Are None
Aro None
Arou None
Aroun None
Around None
As None
At None
B None
BA None
BAC None
BACK None
BE None
BEE None
BEEN None
BEI None
BEIN None
BEING No

In [None]:
import tkinter
import os
import re
import tkinter.filedialog

Stack = LinkedStack

global resultText
global files_list
global stopwordsTST
global stopwordsBST
global stopwordsTrie
global words_tree

main_command_line = tkinter.Entry
files_list = list()
tree_type_global = None
main_stack = Stack()
secondary_stack = Stack()
words_tree = None
directory_text_field_global = None
resultText = tkinter.Text()
directory_var = ""

stopwordsTST = TST()
stopwordsBST = BST()
stopwordsTrie = TrieST()
directory_var = ""


def ask_directory(text_label):
    folder = tkinter.filedialog.askdirectory(initialdir='/')
    text_label.delete(0, tkinter.END)
    text_label.insert(0, str(folder))
    return folder


def tab(arg):
    print("tab pressed")
    return 'break'


def write_result(inputStr):
    resultText.config(state=tkinter.NORMAL)
    resultText.insert(tkinter.INSERT, inputStr)
    resultText.config(state=tkinter.DISABLED)


def enter(arg):
    while not secondary_stack.is_empty():
        main_stack.push(secondary_stack.pop())
    command_line_content = main_command_line.get()
    main_stack.push(command_line_content)
    print(command_line_content)
    main_command_line.delete(0, tkinter.END)

    sytax_of_command_line(command_line_content)

    return 'break'


def reset():
    words_tree = None
    resultText.config(state=tkinter.NORMAL)
    resultText.delete('1.0', tkinter.END)
    resultText.config(state=tkinter.DISABLED)
    return 'break'


def callback(sv: tkinter.StringVar, event):
    if len(sv.get()) > 0 and sv.get()[-1] == '':
        if not main_stack.is_empty():
            event.delete(0, tkinter.END)
            secondary_stack.push(main_stack.pop())
            event.insert(0, secondary_stack.top())
        elif not secondary_stack.is_empty():
            event.delete(0, tkinter.END)
            event.insert(0, secondary_stack.top())
        else:
            event.delete(0, tkinter.END)
            event.insert(0, '')
    elif len(sv.get()) > 0 and sv.get()[-1] == '':
        if not secondary_stack.is_empty():
            event.delete(0, tkinter.END)
            main_stack.push(secondary_stack.pop())
            if secondary_stack.is_empty():
                event.insert(0, '')
            else:
                event.insert(0, secondary_stack.top())
        else:
            event.delete(0, tkinter.END)
            event.insert(0, '')


def stopwords_init():
    fileQueue = LinkedQueue()
    fp = open("StopWords.txt", '+r')
    for line in fp.readlines():
        key = (line.rstrip('\n'))
        fileQueue.enqueue(key)
    fp.close()

    # listing stop-words in TST
    # stopwordsTST
    i = 0
    for q in fileQueue:
        stopwordsTST.put(str(q.element), i)
        i += 1

    # listing stop-words in BST
    # stopwordsBST
    for q in fileQueue:
        stopwordsBST.insert(str(q.element))

    # listing stop-words in TrieST
    # stopwordsTrie
    i = 0
    for q in fileQueue:
        stopwordsTrie.put(str(q.element), i)
        i += 1


def build(directory_entered: tkinter.Entry, tree_type):
    global tree_type_global
    tree_type_global = tree_type
    if os.path.isdir(directory_entered.get()):
        if tree_type.get() == 1:
            # TST Tree
            words_tree = TST()
            i = 0
            for subdir, dirs, files in os.walk(directory_entered.get()):
                for file in files:
                    if file.endswith('.txt'):
                        fileQueue = LinkedQueue()
                        fp = open(os.path.join(subdir, file), 'r+', errors='ignore')
                        for line in fp.readlines():
                            key = (line.rstrip('\n'))
                        fileQueue.enqueue(key)
                        fp.close()
                        words_tree.get_valid_words(fileQueue)
                        files_list.append(fileQueue)

                        for q in fileQueue:
                            if stopwordsTST.get(q.element) == None:
                                words_tree.put(str(q.element), i)
                                i += 1

        elif tree_type.get() == 2:
            # BST Search
            words_tree = BST()
            for subdir, dirs, files in os.walk(directory_entered.get()):
                for file in files:
                    if file.endswith('.txt'):
                        fileQueue = LinkedQueue()
                        fp = open(os.path.join(subdir, file), 'r+', errors='ignore')
                        fileQueue = LinkedQueue()
                        for line in fp.readlines():
                            key = (line.rstrip('\n'))
                        fileQueue.enqueue(key)
                        fp.close()

                        files_list.append(fileQueue)
                        for q in fileQueue:
                            if stopwordsTST.get(q.element) == None:
                                words_tree.insert(str(q.element))

        elif tree_type.get() == 3:
            # Trie Search
            i = 0
            words_tree = TrieST()
            for subdir, dirs, files in os.walk(directory_entered.get()):
                for file in files:
                    if file.endswith('.txt'):
                        fileQueue = LinkedQueue()
                        fp = open(os.path.join(subdir, file), 'r+', errors='ignore')
                        fileQueue = LinkedQueue()
                        for line in fp.readlines():
                            key = (line.rstrip('\n'))
                        fileQueue.enqueue(key)
                        fp.close()

                        files_list.append(fileQueue)
                        for q in fileQueue:
                            if stopwordsTST.get(q.element) == None:
                                words_tree.put(str(q.element), i)
                                i += 1

    else:
        tkinter.messagebox.showinfo("Directory", "The Directory Entered doesn't Exist")


def sytax_of_command_line(command):
    # <-- It checks the sytax of input command by automata -->
    command_words = command.split()
    current_state = 0
    max_non_error_state = 15
    i = 0
    while i < len(command_words) and current_state < max_non_error_state:
        if current_state == 0:
            if command_words[0].lower() == 'add':
                current_state = 1
            elif command_words[0].lower() == 'del':
                current_state = 2
            elif command_words[0].lower() == 'update':
                current_state = 3
            elif command_words[0].lower() == 'list':
                current_state = 4
            elif command_words[0].lower() == 'search':
                current_state = 5
            else:
                current_state = 14
        elif current_state == 1:
            first_quote = re.match(r'^"(.*)', command_words[1])
            second_quote = re.match(r'(.*)"$', command_words[-1])
            if first_quote and second_quote:
                if command_words[1] == '\"':
                    del command_words[1]
                else:
                    command_words[1] = command_words[1].replace('\"', '')
                if command_words[-1] == '\"':
                    del command_words[-1]
                else:
                    command_words[-1] = command_words[-1].replace('\"', '')
                name_of_file = ''
                for separate_word in command_words[1:]:
                    name_of_file = name_of_file + separate_word
                file_exist = False
                for subdir, dirs, files in os.walk(directory_text_field_global.get()):
                    for file in files:
                        if name_of_file in file:
                            file_exist = True
                if [name_of_file_added for name_of_file_added in files_list if
                    name_of_file_added.documentName == name_of_file].__len__() > 0:
                    resultText.config(state=tkinter.NORMAL)
                    resultText.insert(tkinter.INSERT, 'Error : Document already Exists!!!\n---------------\n')
                    resultText.config(state=tkinter.DISABLED)
                elif not file_exist:
                    resultText.config(state=tkinter.NORMAL)
                    resultText.insert(tkinter.INSERT, 'Error : Document not Found!!!\n---------------\n')
                    resultText.config(state=tkinter.DISABLED)
                else:
                    build(directory_text_field_global, tree_type_global)

                    resultText.config(state=tkinter.NORMAL)
                    resultText.insert(tkinter.INSERT, 'File' + name_of_file + 'Added\n---------------\n')
                    resultText.config(state=tkinter.DISABLED)
            else:
                resultText.config(state=tkinter.NORMAL)
                resultText.insert(tkinter.INSERT, 'Error Happend')
                resultText.config(state=tkinter.DISABLED)
            return True
        elif current_state == 2:
            first_quote = re.match(r'^"(.*)', command_words[1])
            second_quote = re.match(r'(.*)"$', command_words[-1])
            if first_quote and second_quote:
                if command_words[1] == '\"':
                    del command_words[1]
                else:
                    command_words[1] = command_words[1].replace('\"', '')
                if command_words[-1] == '\"':
                    del command_words[-1]
                else:
                    command_words[-1] = command_words[-1].replace('\"', '')
                name_of_file = ''
                for separate_word in command_words[1:]:
                    name_of_file = name_of_file + separate_word
                file_name_found = False
                files_to_delete = [name_of_file_added for name_of_file_added in files_list if
                                   name_of_file_added.documentName == name_of_file]
                for file_going_to_delete in files_to_delete:
                    file_name_found = True
                    file_going_to_delete.removeAll()
                    files_list.remove(file_going_to_delete)
                    del file_going_to_delete
                    resultText.config(state=tkinter.NORMAL)
                    resultText.insert(tkinter.INSERT, 'File ' + name_of_file + ' Deleted\n---------------\n')
                    resultText.config(state=tkinter.DISABLED)
                if not file_name_found:
                    write_result(resultText.insert(tkinter.INSERT, 'Error : Document not Found!!!\n---------------\n'))
            else:
                resultText.config(state=tkinter.NORMAL)
                resultText.insert(tkinter.INSERT, 'Error Happend\n---------------\n')
                resultText.config(state=tkinter.DISABLED)

            return True
        elif current_state == 3:
            first_quote = re.match(r'^"(.*)', command_words[1])
            second_quote = re.match(r'(.*)"$', command_words[-1])
            if first_quote and second_quote:
                if not first_quote.group(1) == '\"':
                    command_words[1] = command_words[1].replace('\"', '')
                if not second_quote.group(1) == '\"':
                    command_words[-1] = command_words[-1].replace('\"', '')
            else:
                resultText.config(state=tkinter.NORMAL)
                resultText.insert(tkinter.INSERT, 'Error Happend\n---------------\n')
                resultText.config(state=tkinter.DISABLED)
            return True
        elif current_state == 4:
            if command_words[1] == '-w':
                current_state = 9
            elif command_words[1] == '-l':
                current_state = 10
            elif command_words[1] == '-f':
                current_state = 11
        elif current_state == 5:
            if command_words[1] == '-s':
                current_state = 12
            elif command_words[1] == '-w':
                current_state = 13
        elif current_state == 6:
            return True
        elif current_state == 7:
            return True
        elif current_state == 8:
            return True
        elif current_state == 9:
            write_result(words_tree.traverse_words_documents())
            return True
        elif current_state == 10:
            for file in files_list:
                write_result(file.documentName + ' ')
            write_result('\nNumber of listed Docs = ' + files_list.__len__().__str__() + '\n---------------\n')
            return True
        elif current_state == 11:
            number_of_files = 0
            for subdir, dirs, files in os.walk(directory_text_field_global.get()):
                for file in files:
                    if file.endswith('.txt'):
                        write_result(file[:-4] + ' ')
                        number_of_files += 1
            write_result('\nNumber of all Docs = ' + number_of_files.__str__() + '\n---------------\n')
            return True

        elif current_state == 12:
            first_quote = re.match(r'^"(.*)', command_words[2])
            print('1')
            second_quote = re.match(r'(.*)"$', command_words[-1])
            print('2')
            if first_quote and second_quote:
                print('3')
                if not first_quote.group(1) == '\"':
                    command_words[2] = command_words[2].replace('\"', '')
                print('4')
                if not second_quote.group(1) == '\"':
                    command_words[-1] = command_words[-1].replace('\"', '')
                print('5')
            else:
                resultText.config(state=tkinter.NORMAL)
                resultText.insert(tkinter.INSERT, 'Error Happend\n---------------\n')
                resultText.config(state=tkinter.DISABLED)
            if command_words[-1] is command_words[2]:
                if not words_tree.get(command_words[-1]):
                    write_result('\nAny word found !!!\n---------------\n')
                else:
                    write_result(words_tree.get(command_words[-1]).refrence.getAll())
            current_state = 20  # <-- This live has to change -->
            return True
        elif current_state == 13:
            first_quote = re.match(r'^"(.*)', command_words[2])
            second_quote = re.match(r'(.*)"$', command_words[-1])
            if first_quote and second_quote:
                if not first_quote.group(1) == '\"':
                    command_words[2] = command_words[2].replace('\"', '')
                if not second_quote.group(1) == '\"':
                    command_words[-1] = command_words[-1].replace('\"', '')
            else:
                resultText.config(state=tkinter.NORMAL)
                resultText.insert(tkinter.INSERT, 'Error Happend')
                resultText.config(state=tkinter.DISABLED)
            if command_words[-1] is command_words[2]:
                if not words_tree.get(command_words[-1]):
                    write_result('\nAny word found !!!\n')
                else:
                    write_result(words_tree.get(command_words[-1]).refrence.getAll())
            current_state = 20  # <-- This live has to change -->
            return True
        else:
            resultText.config(state=tkinter.NORMAL)
            resultText.insert(tkinter.INSERT, 'Error : Unkown Command\n')
            resultText.config(state=tkinter.DISABLED)
            return True
    return True


if __name__ == '__main__':

    directory_var = ""
    root = tkinter.Tk()
    frame = tkinter.Frame(root)
    root.resizable(width=False, height=False)
    root.geometry('1200x1500')
    frame.pack()

    directory_frame = tkinter.Frame(root, height=1200, width=1500)
    directory_frame.pack(side=tkinter.TOP)

    directory_label = tkinter.Label(directory_frame, font=("Courier", 20), text="Please Enter the Directory: ")
    directory_label.pack(side=tkinter.TOP)

    directory_text_field = tkinter.Entry(directory_frame, font=("Courier", 20), width=50, textvariable=directory_var)
    directory_text_field.pack(side=tkinter.LEFT)
    directory_text_field_global = directory_text_field

    brows_button = tkinter.Button(directory_frame, font=("Courier", 20), text="Browse",
                                  command=lambda: ask_directory(directory_text_field))
    brows_button.pack(side=tkinter.RIGHT)

    result_frame = tkinter.LabelFrame(root, font=("Courier", 20), text='Result:')
    result_frame.pack()

    text = tkinter.Text(result_frame, font=("Courier", 20), height=40, width=70)
    scroll = tkinter.Scrollbar(result_frame, command=text.yview)
    text.pack()

    text.configure(yscrollcommand=scroll.set)
    text.config(state=tkinter.DISABLED)
    resultText = text

    search_options = tkinter.LabelFrame(root, font=("Courier", 20), text='choose Data Structure:')
    search_options.pack()

    var = tkinter.IntVar()

    rad1 = tkinter.Radiobutton(search_options, font=("Courier", 20), text="TST", variable=var, value=1)
    rad1.pack(side=tkinter.LEFT)
    rad1.invoke()

    rad2 = tkinter.Radiobutton(search_options, font=("Courier", 20), text="BST", variable=var, value=2)
    rad2.pack(side=tkinter.LEFT)

    rad3 = tkinter.Radiobutton(search_options, font=("Courier", 20), text="Trie", variable=var, value=3)
    rad3.pack(side=tkinter.LEFT)

    command_line_frame = tkinter.LabelFrame(root, font=("Courier", 20), text='Command Line : ')
    command_line_frame.pack()

    sv = tkinter.StringVar()
    command_line = tkinter.Entry(command_line_frame, font=("Courier", 20), width=40, textvariable=sv)
    sv.trace("w", lambda name, index, mode, sv=sv: callback(sv, command_line))
    command_line.bind('<Tab>', tab)
    command_line.bind('<Return>', enter)
    command_line.pack(side=tkinter.RIGHT)
    main_command_line = command_line

    buttons_frame = tkinter.Frame(root)
    buttons_frame.pack()

    build_button = tkinter.Button(buttons_frame, font=("Courier", 20), text="Build",
                                  command=lambda: build(directory_text_field, var))
    build_button.pack(side=tkinter.LEFT)

    build_button = tkinter.Button(buttons_frame, font=("Courier", 20), text="Reset", command=lambda: reset())
    build_button.pack(side=tkinter.LEFT)

    build_button = tkinter.Button(buttons_frame, font=("Courier", 20), text="Exit", command=lambda: root.destroy())
    build_button.pack(side=tkinter.RIGHT)

    build_button = tkinter.Button(buttons_frame, font=("Courier", 20), text="Help",
                                  command=lambda: ask_directory(directory_text_field))
    build_button.pack(side=tkinter.RIGHT)

    copy_right_frame = tkinter.Frame(root)
    copy_right_frame.pack(side=tkinter.BOTTOM)

    copy_right_lable1 = tkinter.Label(copy_right_frame, font=("Courier", 20), text="powerd by ")
    copy_right_lable2 = tkinter.Label(copy_right_frame, font=("Courier", 30), text="Mohammad Hossein Forouhesh Tehrani")

    copy_right_lable1.pack(side=tkinter.TOP)
    copy_right_lable2.pack(side=tkinter.BOTTOM)

    stopwordsTST = TrieST()
    fileQueue = LinkedQueue()
    fp = open("StopWords.txt", '+r')
    for line in fp.readlines():
        key = (line.rstrip('\n'))
        fileQueue.enqueue(key)
    fp.close()
    i = 0
    for q in fileQueue:
        stopwordsTrie.put(str(q.element), i)
        i += 1
    # s = stopwordsTST.get('about').refrence.getAll()
    # write_result(s)
    # write_result(stopwordsTST.traverse())

    root.mainloop()
