In [None]:
def tokenize(source_code):
    # Split the source code into individual tokens
    tokens = source_code.split()
    
    # Initialize an empty list to store the tokens
    token_list = []
    
    # Iterate through the tokens
    for token in tokens:
        # If the token is an operator, append it to the list
        if token in ['+', '-', '*', '/']:
            token_list.append(('OPERATOR', token))
        # If the token is a number, append it to the list
        elif token.isdigit():
            token_list.append(('CONSTANT', int(token)))
        # Otherwise, assume it is a variable and append it to the list
        else:
            token_list.append(('VARIABLE', token))
    
    # Return the list of tokens
    return token_list

# Test the tokenize function

print(tokenize("x = 5 + 3"))
# Output: [('VARIABLE', 'x'), ('OPERATOR', '='), ('NUMBER', 5), ('OPERATOR', '+'), ('NUMBER', 3)]


[('VARIABLE', 'x'), ('VARIABLE', '='), ('CONSTANT', 5), ('OPERATOR', '+'), ('CONSTANT', 3)]


In [None]:
def tokenize():
    # Get the source code from the user
    source_code = input("Enter your source code: ")
    
    # Split the source code into individual tokens
    tokens = source_code.split()
    
    # Initialize an empty list to store the tokens
    token_list = []
    
    # Iterate through the tokens
    i = 0
    while i < len(tokens):
        # If the token is an operator, append it to the list
        if tokens[i] in ['+', '-', '*', '/']:
            token_list.append(('OPERATOR', tokens[i]))
        # If the token is a number, append it to the list
        elif tokens[i].isdigit():
            token_list.append(('NUMBER', int(tokens[i])))
        # If the token is a float, append it to the list
        elif tokens[i] in ['print','if else','switch','def']:
            token_list.append(('NUMBER', float(tokens[i])))
        # If the token is a string constant, append it to the list
        elif tokens[i][0] == '"' and tokens[i][-1] == '"':
            token_list.append(('CONSTANT', tokens[i][1:-1]))
        # If the token is a function call, append it to the list
        elif tokens[i] == 'call':
            token_list.append(('FUNCTION', tokens[i+1]))
            i += 1
        # Otherwise, assume it is a variable and append it to the list
        else:
            token_list.append(('VARIABLE', tokens[i]))
        i += 1
    
    # Return the list of tokens
    return token_list

# Test the tokenize function
tokens = tokenize()

# Print out the operators, variables, constants, and function calls
for token in tokens:
    if token[0] == 'OPERATOR':
        print(f"Operator: {token[1]}")
    elif token[0] == 'VARIABLE':
        print(f"Variable: {token[1]}")
    elif token[0] == 'NUMBER':
        print(f"Constant: {token[1]} (Type: {type(token[1]).__name__})")
    elif token[0] == 'CONSTANT':
        print(f"Constant: {token[1]} (Type: string)")
    elif token[0] == 'FUNCTION':
        print(f"Function call: {token[1]}")


Enter your source code: a +  b = 5
Variable: a
Operator: +
Variable: b
Variable: =
Constant: 5 (Type: int)


In [None]:
import re
from pprint import pprint

class SymbolTable:
    def __init__(self):
        self.table = {}

    def add_symbol(self, symbol, symbol_type, symbol_scope, data_type, address):
        self.table[symbol] = {
            'type': symbol_type,
            'scope': symbol_scope,
            'data_type': data_type,
            'address': address
        }

    def update_symbol(self, symbol, symbol_type, symbol_scope, data_type, address):
        self.table[symbol]['type'] = symbol_type
        self.table[symbol]['scope'] = symbol_scope
        self.table[symbol]['data_type'] = data_type
        self.table[symbol]['address'] = address

    def get_symbol(self, symbol):
        return self.table[symbol]

    def symbol_exists(self, symbol):
        return symbol in self.table

def tokenize(input_string):
    # split the input string into tokens
    tokens = re.split(r'\s+', input_string)

    symbol_table = SymbolTable()

    # go through each token and classify it
    i = 0
    while i < len(tokens):
        token = tokens[i]
        if token.isalpha() and token[0].isupper():
            # variables are uppercase alphabetical strings
            data_type = None
            if re.match(r'int\s+[a-zA-Z]+', input_string[i:]):
                data_type = 'int'
            elif re.match(r'float\s+[a-zA-Z]+', input_string[i:]):
                data_type = 'float'
            elif re.match(r'string\s+[a-zA-Z]+', input_string[i:]):
                data_type = 'string'
            symbol_table.add_symbol(token, 'variable', 'global', data_type)
        elif token.isalpha() and token[0].islower():
            # functions are lowercase alphabetical strings
            symbol_table.add_symbol(token, 'function', 'global', 'string')
        elif token.isdigit():
            # constants are numerical values
            symbol_table.add_symbol(token, 'constant', 'global', 'int')
        else:
            # anything else is an operator
            symbol_table.add_symbol(token, 'operator', 'global', 'opr')
        i += 1

    return symbol_table


# get the input string from the user
input_string = input('Enter an expression: ')

# create the symbol table
symbol_table = tokenize(input_string)

# print the symbol table
print('Symbol Table:')
pprint(symbol_table.table)

Enter an expression: a * b = 6
Symbol Table:
{'*': {'data_type': 'opr', 'scope': 'global', 'type': 'operator'},
 '6': {'data_type': 'int', 'scope': 'global', 'type': 'constant'},
 '=': {'data_type': 'opr', 'scope': 'global', 'type': 'operator'},
 'a': {'data_type': 'string', 'scope': 'global', 'type': 'function'},
 'b': {'data_type': 'string', 'scope': 'global', 'type': 'function'}}
