In [3]:
import re

class LexicalAnalyzer:
    def __init__(self):
        self.tokens = []
        self.lines = []
        self.syntax_errors = []
        # Extended token patterns to include operators and punctuation
        self.token_pattern = r'(\bif\b|\belse\b|\bfor\b|\bwhile\b|\bint\b|\bfloat\b)|(\b\w+\b)|([+\-*\/=<>!]+)|(==|!=|<=|>=|:=)|([{}();,])'

    def analyze_text(self, text):
        self.lines = text.splitlines()
        for line_number, line in enumerate(self.lines, start=1):
            self.analyze_line(line, line_number)
        self.check_syntax()

    def analyze_line(self, line, line_number):
        matches = re.findall(self.token_pattern, line)
        for match in matches:
            token = next(t for t in match if t)
            self.tokens.append((token, line_number))

    def classify_token(self, token):
        if re.match(r'^\w+$', token):
            if token.isdigit():
                return f'<num, {token} >'
            elif token in {'int', 'float', 'if', 'else', 'for', 'while'}:
                return f'<keyword, "{token}" >'
            else:
                return f'<id, "{token}" >'
        elif re.match(r'^[+\-*\/=<>!]+$', token) or token in {'==', '!=', '<=', '>=', ':='}:
            return f'<operator, "{token}" >'
        elif token in {',', ';', '(', ')', '{', '}'}:
            return f'<punctuation, "{token}" >'
        else:
            return f'<unknown, "{token}" >'

    def check_syntax(self):
        # Improved syntax check for control structures and semicolon usage
        for i, (token, line_number) in enumerate(self.tokens):
            if token in {'if', 'for', 'while'}:
                # Check next token for proper structure
                if i + 1 < len(self.tokens) and not (self.tokens[i + 1][0] == '('):
                    self.syntax_errors.append(f"Syntax error: Expected '(' after '{token}' at line {line_number}")
            if token == ';':
                # Check semicolon placement
                if i > 0 and self.tokens[i - 1][0] in {'{', ';', '}', 'if', 'for', 'while'}:
                    self.syntax_errors.append(f"Syntax error: Misplaced ';' at line {line_number}")

    def print_tokens(self):
        return ' '.join([self.classify_token(token) for token, _ in self.tokens])

    def print_errors(self):
        if self.syntax_errors:
            for error in self.syntax_errors:
                print(error)
        else:
            print("No syntax errors detected.")

    def lookup_common_lexemes(self):
        common_lexemes = {}
        for token, line in self.tokens:
            lexeme = token
            if lexeme not in common_lexemes:
                common_lexemes[lexeme] = [line]
            else:
                common_lexemes[lexeme].append(line)
        return {lex: lines for lex, lines in common_lexemes.items() if len(lines) > 1}

def main():
    analyzer = LexicalAnalyzer()
    print("Please enter the code:")
    user_input = input()
    analyzer.analyze_text(user_input)
    print("Tokens: ", analyzer.print_tokens())
    analyzer.print_errors()
    common_lexemes = analyzer.lookup_common_lexemes()
    print("Common Lexemes and their indices: ", common_lexemes)

if __name__ == "__main__":
    main()


Please enter the code:
y : = 31 + 28*x
Tokens:  <id, "y" > <operator, "=" > <num, 31 > <operator, "+" > <num, 28 > <operator, "*" > <id, "x" >
No syntax errors detected.
Common Lexemes and their indices:  {}
