# Compiler Construction Project

# Scanner Program Code

# In Scanner we are taking user input from read.py file

In [2]:
import re
import pandas as pd
from IPython.display import display, HTML

class Token:
    def __init__(self, type, value, position):
        self.type = type
        self.value = value
        self.position = position

    def __repr__(self):
        return f'({self.type}, {self.value})'

class TokenType:
    Identifier = 'Identifier'
    Number = 'Number'
    Operator = 'Operator'
    Separator = 'Separator'
    Keyword = 'Keyword'
    Comment = 'Comment'

class Scanner:
    identifier_regex = re.compile(r'^[a-zA-Z_]\w*$')
    number_regex = re.compile(r'^\d+$')
    operators = ['==', '<=', '>=', '!=', '=', '<', '>']
    separators = ['()', '();', '(', ')']
    keywords = ['if', 'else', 'while', 'int', 'string']

    def __init__(self):
        self.tokens = []
        self.code_buffer = ''

    def display_buffer(self):
        html_buffer = '<table style="width:100%; border: 1px solid black;">'
        html_buffer += '<tr><th style="border: 1px solid black; text-align: left;">Token Type</th>'
        html_buffer += '<th style="border: 1px solid black; text-align: left;">Value</th>'
        html_buffer += '<th style="border: 1px solid black; text-align: left;">Memory Address</th></tr>'

        for token in self.tokens:
            type_style = 'color: green;' if token.type == TokenType.Identifier else 'color: red;'
            value_style = 'color: blue;' if token.type == TokenType.Number else 'color: black;'
            address_style = 'color: purple;'

            html_buffer += f'<tr><td style="border: 1px solid black; {type_style}; text-align: left;">{token.type}</td>'
            html_buffer += f'<td style="border: 1px solid black; {value_style}; text-align: left;">{token.value}</td>'
            html_buffer += f'<td style="border: 1px solid black; {address_style}; text-align: left;">{hex(id(token))}</td></tr>'

        html_buffer += '</table>'
        return HTML(html_buffer)

    def tokenize(self, line):
        self.code_buffer += line
        matches = re.finditer(r'([a-zA-Z_]\w*|\d+|==|<=|>=|!=|[<>]=|[();]+|\/\/.*)', line)
        current_position = 0

        for match in matches:
            word = match.group()
            if word.startswith('//'):
                self.tokens.append(Token(TokenType.Comment, word[2:], current_position))
            else:
                self.tokens.append(self.get_token(word, current_position))
            current_position += len(word)

    def get_token(self, word, position):
        if self.identifier_regex.match(word):
            return Token(TokenType.Identifier, word, position)
        elif self.number_regex.match(word):
            return Token(TokenType.Number, word, position)
        elif word in self.operators:
            return Token(TokenType.Operator, word, position)
        elif word in self.separators:
            return Token(TokenType.Separator, word, position)
        elif word in self.keywords:
            return Token(TokenType.Keyword, word, position)
        else:
            print(f'Warning: Unknown token - {word}')
            return Token(TokenType.Comment, f'Unknown token - {word}', position)

def display_token_table(tokens):
    data = {'Index': range(len(tokens)),
            'Token Type': [token.type for token in tokens],
            'Value': [token.value if hasattr(token, "value") else "" for token in tokens]}
    df = pd.DataFrame(data)
    df_html = df.style.set_table_styles([
        {'selector': 'th', 'props': [('background-color', 'lightgrey'), ('border', '1px solid black'), ('text-align', 'left')]},
        {'selector': 'td', 'props': [('border', '1px solid black'), ('text-align', 'left')]},
    ]).render()
    return HTML(df_html)

if __name__ == '__main__':
    file_path = 'read.py'  # Modify the file path accordingly

    with open(file_path, 'r') as run_file:
        print("\nCode In read.py:")
        code_in_run_html = f'<table style="width:100%; border: 1px solid black;"><tr><th style="border: 1px solid black; text-align: left;">Code In read.py</th></tr><tr><td style="border: 1px solid black; white-space: pre-wrap; text-align: left;">{run_file.read()}</td></tr></table>'
        display(HTML(code_in_run_html))

    scanner = Scanner()

    with open(file_path, 'r') as file:
        for line in file:
            scanner.tokenize(line)

    display(HTML("\nBuffer:"))
    display(scanner.display_buffer())

    display(HTML("\nToken Table:"))
    display(display_token_table(scanner.tokens))


Code In read.py:


Code In read.py
"def calculate_expression(expression):  try:  result = eval(expression)  return result  except Exception as e:  print(f""Error: {e}"")  return None if __name__ == ""__main__"":  print(""Simple Calculator using BODMAS rule"")  while True:  expression = input(""Enter a mathematical expression (or 'exit' to quit): "")  if expression.lower() == 'exit':  break  result = calculate_expression(expression)  if result is not None:  print(f""Result: {result}"")  else:  print(""Invalid expression. Please enter a valid mathematical expression."")"


Token Type,Value,Memory Address
Identifier,def,0x1fa0ed49e20
Identifier,calculate_expression,0x1fa0ed49e50
Separator,(,0x1fa0ed49eb0
Identifier,expression,0x1fa0ed49dc0
Separator,),0x1fa0ed49f40
Identifier,try,0x1fa0ed49fa0
Identifier,result,0x1fa0ed49fd0
Identifier,eval,0x1fa0ed41790
Separator,(,0x1fa0ed76370
Identifier,expression,0x1fa0ed76460


Unnamed: 0,Index,Token Type,Value
0,0,Identifier,def
1,1,Identifier,calculate_expression
2,2,Separator,(
3,3,Identifier,expression
4,4,Separator,)
5,5,Identifier,try
6,6,Identifier,result
7,7,Identifier,eval
8,8,Separator,(
9,9,Identifier,expression



# Parser Program Code

# 1): Top down Parser

# a): Recursive Parser (Top down Parser)

# Equation For Input i+(i+i)*i

In [7]:
import pandas as pd

grammar = ""
pt = 0
data = []

def E():
    global pt
    data.append([grammar[pt:], "E -> T E'"])
    if T():
        if Edash():
            return True
        else:
            return False
    else:
        return False

def Edash():
    global pt
    if pt < len(grammar) and grammar[pt] in {'+', '-'}:
        data.append([grammar[pt:], f"E' -> {grammar[pt]} T E'"])
        pt += 1
        if T():
            if Edash():
                return True
            else:
                return False
        else:
            return False
    else:
        data.append([grammar[pt:], "E' -> $"])
        return True

def T():
    global pt
    data.append([grammar[pt:], "T -> F T'"])
    if F():
        if Tdash():
            return True
        else:
            return False
    else:
        return False

def Tdash():
    global pt
    if pt < len(grammar) and grammar[pt] in {'*', '/'}:
        data.append([grammar[pt:], f"T' -> {grammar[pt]} F T'"])
        pt += 1
        if F():
            if Tdash():
                return True
            else:
                return False
        else:
            return False
    else:
        data.append([grammar[pt:], "T' -> $"])
        return True

def F():
    global pt
    if pt < len(grammar) and (grammar[pt] == '(' or grammar[pt].isalnum()):
        if grammar[pt] == '(':
            data.append([grammar[pt:], "F -> (E)"])
            pt += 1
            if E():
                if pt < len(grammar) and grammar[pt] == ')':
                    pt += 1
                    return True
                else:
                    return False
            else:
                return False
        elif grammar[pt].isalpha():
            pt += 1
            data.append([grammar[pt-1:], f"F -> {grammar[pt-1]}"])
            return True
        else:  # Handling digits
            pt += 1
            data.append([grammar[pt-1:], f"F -> {grammar[pt-1]}"])
            return True
    else:
        return False

def main():
    global pt, grammar, data
    grammar = input("Enter an arithmetic expression: ")
    pt = 0
    data = []
    print("\nInput\t\tAction")
    if E() and pt == len(grammar):
        df = pd.DataFrame(data, columns=["Input", "Action"])
        display(df)
        print('\nString is successfully parsed')
        return 0
    else:
        print("Error in parsing String")
        return 1

if __name__ == "__main__":
    main()

Enter an arithmetic expression: i+(i+i)*i

Input		Action


Unnamed: 0,Input,Action
0,i+(i+i)*i,E -> T E'
1,i+(i+i)*i,T -> F T'
2,i+(i+i)*i,F -> i
3,+(i+i)*i,T' -> $
4,+(i+i)*i,E' -> + T E'
5,(i+i)*i,T -> F T'
6,(i+i)*i,F -> (E)
7,i+i)*i,E -> T E'
8,i+i)*i,T -> F T'
9,i+i)*i,F -> i



String is successfully parsed


# b): Non-Recursive Parser (Top down Parser)

# Grammer Input S -> AB, A -> aA | ε, B -> bB | ε

In [10]:
class LL1Parser:
    def __init__(self, grammar):
        self.grammar = grammar
        self.first_sets = self.calculate_first_sets()
        self.follow_sets = self.calculate_follow_sets()
        self.parsing_table = self.build_parsing_table()

    def calculate_first_sets(self):
        first_sets = {}
        for non_terminal in self.grammar:
            first_sets[non_terminal] = set()

        # Calculate First sets
        for non_terminal in self.grammar:
            self.calculate_first_set(non_terminal, first_sets)

        return first_sets

    def calculate_first_set(self, symbol, first_sets):
        if symbol in self.grammar:
            for production in self.grammar[symbol]:
                if production:  # Check if the production is not an empty string
                    first_symbol = production[0]
                    if first_symbol.isalpha() and first_symbol.isupper():
                        self.calculate_first_set(first_symbol, first_sets)
                        first_sets[symbol] |= first_sets[first_symbol]
                    else:
                        first_sets[symbol].add(first_symbol)

    def calculate_follow_sets(self):
        follow_sets = {non_terminal: set() for non_terminal in self.grammar}
        start_symbol = list(self.grammar.keys())[0]
        follow_sets[start_symbol].add('$')

        # Calculate Follow sets
        for non_terminal in self.grammar:
            self.calculate_follow_set(non_terminal, follow_sets)

        return follow_sets

    def calculate_follow_set(self, symbol, follow_sets):
        for non_terminal in self.grammar:
            for production in self.grammar[non_terminal]:
                if symbol in production:
                    index = production.index(symbol)
                    if index < len(production) - 1:
                        follow_symbol = production[index + 1]
                        if follow_symbol.isalpha() and follow_symbol.isupper():
                            follow_sets[symbol] |= self.first_sets[follow_symbol]
                            if '' in self.first_sets[follow_symbol]:
                                follow_sets[symbol] -= {''}
                                follow_sets[symbol] |= follow_sets[non_terminal]
                        else:
                            follow_sets[symbol].add(follow_symbol)
                    elif non_terminal != symbol:
                        follow_sets[symbol] |= follow_sets[non_terminal]

    def build_parsing_table(self):
        parsing_table = {}

        for non_terminal in self.grammar:
            for production in self.grammar[non_terminal]:
                first_set = self.calculate_production_first_set(production)
                for terminal in first_set:
                    if terminal != '':
                        parsing_table[(non_terminal, terminal)] = production

                if '' in first_set or (len(first_set) == 0 and production == ['']):
                    for follow_terminal in self.follow_sets[non_terminal]:
                        parsing_table[(non_terminal, follow_terminal)] = production

        return parsing_table

    def calculate_production_first_set(self, production):
        first_set = set()
        for symbol in production:
            if symbol.isalpha() and symbol.isupper():
                first_set |= self.first_sets[symbol]
                if '' not in self.first_sets[symbol]:
                    break
            else:
                first_set.add(symbol)
                break
        return first_set

    def parse(self, input_string):
        stack = ['$']
        input_string += '$'
        input_index = 0
        error = False

        while stack[-1] != '$':
            top_of_stack = stack[-1]

            if top_of_stack in self.grammar:
                if (top_of_stack, input_string[input_index]) in self.parsing_table:
                    production = self.parsing_table[(top_of_stack, input_string[input_index])]
                    stack.pop()
                    if production != ['']:
                        stack.extend(reversed(production))
                else:
                    error = True
                    break
            elif top_of_stack == input_string[input_index]:
                stack.pop()
                input_index += 1
            else:
                error = True
                break

        return not error and input_index == len(input_string) - 1


if __name__ == "__main__":
    # Input Grammar
    grammar = {}
    while True:
        non_terminal = input("Enter a non-terminal (or 'exit' to finish): ").strip()
        if non_terminal.lower() == 'exit':
            break
        productions = input(f"Enter productions for {non_terminal} separated by '|': ").split('|')
        grammar[non_terminal] = [prod.strip() for prod in productions]

    # Input String
    input_string = input("Enter the input string to test: ").strip()

    # LL(1) Parser
    ll1_parser = LL1Parser(grammar)
    result = ll1_parser.parse(input_string)

    # Output Result
    if result:
        print(f'The input string "{input_string}" is accepted by the LL(1) grammar.')
    else:
        print(f'The input string "{input_string}" is not accepted by the LL(1) grammar.')

Enter a non-terminal (or 'exit' to finish): s
Enter productions for s separated by '|': a
Enter a non-terminal (or 'exit' to finish): a
Enter productions for a separated by '|': exit
Enter a non-terminal (or 'exit' to finish): s
Enter productions for s separated by '|': exit
Enter a non-terminal (or 'exit' to finish): exit
Enter the input string to test: s
The input string "s" is not accepted by the LL(1) grammar.
