<a href="https://colab.research.google.com/github/Ahmad7862002/LISP-Compiler/blob/main/ScannerV2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [228]:
import re
from enum import Enum


class TokenType(Enum):
    DO_TIMES = 1
    IF_STATEMENT = 2
    WHEN = 3 
    READ = 4
    WRITE = 5 
    T = 6
    NIL = 7
    LET = 8
    SETQ = 9
    SETF = 10
    LIST = 11 
    ATOM = 12
    ADD = 13
    SUB = 14
    MULT = 15 
    DIV = 16
    GREATER_THAN_EQUAL = 17
    LESS_THAN_EQUAL = 18
    EQUAL = 19
    GREATER_THAN = 20
    LESS_THAN = 21
    IDENTIFIER = 22
    NUMERIC_LITERAL = 23
    CHARACTER_LITERAL = 24
    STRING_LITERAL = 25
    COMMA = 26
    SEMICOLON = 27
    LEFT_PAREN = 28
    RIGHT_PAREN = 29
    MOD = 30
    REM = 31
    INCF = 32
    DECF = 33
    DOT = 34
    DEFUN = 35
    PRINT = 36
    ERROR = 37


class Token:
    def __init__(self, lex, token_type):
        self.lex = lex
        self.token_type = token_type
    
    def to_dict(self):
        return {
            'Lex': self.lex,
            'token_type': self.token_type
        }


RESERVED_WORDS = {
    "DOTIMES": TokenType.DO_TIMES,
    "IF": TokenType.IF_STATEMENT,
    "WHEN": TokenType.WHEN,
    "READ": TokenType.READ,
    "WRITE": TokenType.WRITE,
    "T": TokenType.T,
    "NIL": TokenType.NIL,
    "LET": TokenType.LET,
    "LIST": TokenType.LIST,
    "ATOM": TokenType.ATOM,
    "SETQ": TokenType.SETQ,
    "SETF": TokenType.SETF,
    "DEFUN": TokenType.DEFUN,
    "PRINT": TokenType.PRINT
}


OPERATORS = {
    "+": TokenType.ADD,
    "-": TokenType.SUB,
    "*": TokenType.MULT,
    "/": TokenType.DIV,
    "<=": TokenType.LESS_THAN_EQUAL,
    ">=": TokenType.GREATER_THAN_EQUAL,
    "=": TokenType.EQUAL,
    "<": TokenType.LESS_THAN,
    ">": TokenType.GREATER_THAN,
    ",": TokenType.COMMA,
    "(": TokenType.LEFT_PAREN,
    ")": TokenType.RIGHT_PAREN,
    "MOD": TokenType.MOD,
    "REM": TokenType.REM,
    "INCF": TokenType.INCF,
    "DECF": TokenType.DECF,
    ".": TokenType.DOT,
}


def find_token(text):
    tokens = []
    text = text.split(" ")
    comment_flag = False
    for tok in text:
        if comment_flag:
            if tok == "\\n":
                comment_flag = False
            continue
        if tok == "\\n":
            continue
        tok = tok.upper()
        if re.match(r'^\s*$', tok):
            continue
        if tok in RESERVED_WORDS:
            tok = Token(tok, RESERVED_WORDS[tok])
        elif tok in OPERATORS:
            tok = Token(tok, OPERATORS[tok])
        elif re.match(r'^[-+]?[0-9]*\.?[0-9]+$', tok):
            tok = Token(tok, TokenType.NUMERIC_LITERAL)
        elif re.match(r"^#\S", tok):
            tok = Token(tok, TokenType.CHARACTER_LITERAL)
        elif re.match(r'^"[^"\\]*(?:\\.[^"\\]*)*"$',tok):
          tok = Token(tok,TokenType.STRING_LITERAL)
        elif re.match(r'^[^ ;():\'",`.\n]*(\\[;():\'",`][^ ;():\'",`.\n]*)*$',tok):
          tok = Token(tok,TokenType.IDENTIFIER)
        elif tok == ';':
          comment_flag = 1
          continue
        else :
          tok = Token(tok,TokenType.ERROR)
        Tokens.append(tok)
        

In [225]:
user_input = input("Enter a string: ")
Tokens = []
find_token(user_input)
for i in Tokens:
 print(i.lex + " " + i.token_type.name)


Enter a string: ( let ( ( (str ";joj" ) ) ; comment one  \n    ; comment two  \n ( print str ) ) \n
( LEFT_PAREN
LET LET
( LEFT_PAREN
( LEFT_PAREN
(STR IDENTIFIER
";JOJ" STRING_LITERAL
) RIGHT_PAREN
) RIGHT_PAREN
( LEFT_PAREN
PRINT PRINT
STR IDENTIFIER
) RIGHT_PAREN
) RIGHT_PAREN
