In [18]:
""" XML to Tree """

class XMLElementType :
    ELEMENT_UNKNOWN = 0 
    ELEMENT_TEXT = 1
    ELEMENT_OPENING_TAG = 2
    ELEMENT_CLOSING_TAG = 3

class XMLElement:
    def __init__(self):
        self.element_node_name = ""
        self.element_type = XMLElementType.ELEMENT_UNKNOWN

class Node:
    def __init__(self, val):
        self.node_name = val
        self.children = []

class XMLTokenizer:
    def __init__(self, xml_string):
        self.xml = xml_string
        self.index = 0
    
    def get_next_element(self, element):
        i = self.xml.find("<", self.index)
        
        if i == -1:
            return False
        
        temp = self.xml[self.index : i]
        temp = temp.strip()
        
        if len(temp) != 0:
            element.element_node_name = temp
            element.element_type = XMLElementType.ELEMENT_TEXT
            self.index = i
            return True
        
        j = self.xml.find(">", i)
        
        if self.xml[i + 1] == "/":
            element.element_node_name = self.xml[i + 2 : j]
            element.element_type = XMLElementType.ELEMENT_CLOSING_TAG
        else:
            element.element_node_name = self.xml[i + 1 : j]
            element.element_type = XMLElementType.ELEMENT_OPENING_TAG
        
        self.index = j + 1
        return True

def create_xml_tree(xml_string):
    tok = XMLTokenizer(xml_string)
    element = XMLElement()
    
    if tok.get_next_element(element) == False:
        return None
    
#     print(element.element_node_name)
    
    stack = []
    root = Node(element.element_node_name)
    stack.append(root)
    
    while tok.get_next_element(element) == True:
#         print(element.element_node_name)
        node = None
        if (element.element_type == XMLElementType.ELEMENT_OPENING_TAG or \
            element.element_type == XMLElementType.ELEMENT_TEXT):
            node = Node(element.element_node_name)
            stack[-1].children.append(node)
        
        if (element.element_type == XMLElementType.ELEMENT_OPENING_TAG):
            stack.append(node)
        elif (element.element_type == XMLElementType.ELEMENT_CLOSING_TAG):
            stack.pop()
            
    return root

def print_tree(root, depth):
    if root is None:
        return
    
    for _ in range(0, depth):
        print(end="\t")
    
    print(root.node_name)
    
    for x in range(0, len(root.children)):
        print_tree(root.children[x], depth + 1)

if __name__ == "__main__":
    xml_string = "<html>" + \
          "<body>" + \
            "<div>" + \
              "<h1>CodeRust</h1>" + \
              "<a>http://coderust.com</a> " + \
            "</div>" + \
            "<div>" + \
                "<h2>Chapter 1</h2>" + \
            "</div>" + \
            "<div>" +  \
                "<h3>Chapter 2</h3>" + \
                "<h4>Chapter 2.1</h4>" + \
            "</div>" + \
          "</body>" + \
        "</html>"
    head = create_xml_tree(xml_string)
    print_tree(head, 0)
    print()


html
	body
		div
			h1
				CodeRust
			a
				http://coderust.com
		div
			h2
				Chapter 1
		div
			h3
				Chapter 2
			h4
				Chapter 2.1



In [19]:
import re

def regx_match_rec(text, pattern):
  if not text and not pattern:
    return True

  if text and not pattern:
    return False

  if len(pattern) > 1 and pattern[1] == '*':
    
    remaining_pattern = pattern[2:]
    remaining_text = text

    for i in range(0, len(text) + 1):

      if regx_match_rec(remaining_text, remaining_pattern):
        return True

      if not remaining_text:
        return False

      if pattern[0] != '.' and remaining_text[0] != pattern[0]:
        return False

      remaining_text = remaining_text[1:]
  
  if not text or not pattern:
    return False

  if pattern[0] == '.' or pattern[0] == text[0]:
    remaining_text = ""
    if len(text) >= 2:
      remaining_text = text[1:]

    remaining_pattern = ""
    if len(pattern) >= 2:
      remaining_pattern = pattern[1:]

    return regx_match_rec(remaining_text, remaining_pattern)

  return False

def regx_match(text, pattern):
  return regx_match_rec(text, pattern)

def test(s, p):
  output = regx_match(s, p)

  pt = re.compile(p)

  output3 = False
  m = pt.match(s)
  if m and m.end() == len(s):
    output3 = True

  if output == True:    
    print(s, p, output, output3, end = " ")
    print("Matched!")
    
  else:
    print(s, p, output, output3, end = " ")
    print("Did not match!")

  assert output == output3  

def main():
  test("abb", "abb")
  test("abbc", "ab*c")
  test("abb", "ab*")
  test("", "b*c")
  test("a", "ab*")  
  test("aaabbbbbcccd", "a*bbb*c*d")
  test("a", "ab*")
  test("aaabbbbbcccd", "a*bbb*.*d")
  test("aaabbbbbcccde", "a*bbb*.*d")
  test("b", "b*")
  test("aabcd", "aa*d")
  test("", "aa*d")
  test("", "a*")
  test("b", "b*c")

main()

abb abb True True Matched!
abbc ab*c True True Matched!
abb ab* True True Matched!
 b*c False False Did not match!
a ab* True True Matched!
aaabbbbbcccd a*bbb*c*d True True Matched!
a ab* True True Matched!
aaabbbbbcccd a*bbb*.*d True True Matched!
aaabbbbbcccde a*bbb*.*d False False Did not match!
b b* True True Matched!
aabcd aa*d False False Did not match!
 aa*d False False Did not match!
 a* True True Matched!
b b*c False False Did not match!


In [29]:
import re
m = re.match("c", "c")
if not m:
    print("None")
if m:
    print(m.end())

1
