In [1]:
import re

keywords = {"void", "main", "int", "if", "else", "char", "float"}
arith_ops = {'+', '-', '*', '/', '='}
logical_ops = {'>', '>=', '<', '<=', '==', '!='}
punctuations = {',', ';', ':'}
parentheses = {'(', ')', '{', '}', '[', ']'}

code = '''
/*Multi line comment
2nd line
*/

void main()
{
int a, b, c;
//comment
int a = b*c + 10;
if(a!=2)
a = 0;
}
'''
code = re.sub(r'/\*.*?\*/', '', code, flags=re.DOTALL)
code = re.sub(r'//.*', '', code)

tokens = re.findall(r"[a-zA-Z_]\w*|\d+|==|!=|>=|<=|[(){}[\],;:+\-*/=<>]", code)

result = {
    "Keyword": set(),
    "Identifier": set(),
    "Constant": set(),
    "Arithmetic Operator": set(),
    "Logical Operator": set(),
    "Punctuation": set(),
    "Parenthesis": set()
}

for token in tokens:
    if token in keywords:
        result["Keyword"].add(token)
    elif token in arith_ops:
        result["Arithmetic Operator"].add(token)
    elif token in logical_ops:
        result["Logical Operator"].add(token)
    elif token in punctuations:
        result["Punctuation"].add(token)
    elif token in parentheses:
        result["Parenthesis"].add(token)
    elif re.match(r'^\d+$', token):
        result["Constant"].add(token)
    elif re.match(r'[a-zA-Z_]\w*', token):
        result["Identifier"].add(token)

for category, items in result.items():
    print(f"{category} ({len(items)}): {', '.join(sorted(items))}")


Keyword (4): if, int, main, void
Identifier (3): a, b, c
Constant (3): 0, 10, 2
Arithmetic Operator (3): *, +, =
Logical Operator (1): !=
Punctuation (2): ,, ;
Parenthesis (4): (, ), {, }


In [2]:
import re

keywords = {"void", "main", "int", "if", "else", "char", "float"}
arith_ops = {'+', '-', '*', '/', '='}
logical_ops = {'>', '>=', '<', '<=', '==', '!='}
punctuations = {',', ';', ':'}
parentheses = {'(', ')', '{', '}', '[', ']'}

code = '''
void main()
{
int a, b, c;
float d = 3.14;
char ch = 'a';
//comment
int a = b*c + 10;
}
'''

code = re.sub(r'/\*.*?\*/', '', code, flags=re.DOTALL)
code = re.sub(r'//.*', '', code)

tokens = re.findall(r"[a-zA-Z_]\w*|\d+\.\d+|\d+|'[^']'|==|!=|>=|<=|[(){}[\],;:+\-*/=<>]", code)

result = {
    "Keyword": set(),
    "Identifier": set(),
    "Constant": set(),
    "Arithmetic Operator": set(),
    "Logical Operator": set(),
    "Punctuation": set(),
    "Parenthesis": set()
}

for token in tokens:
    if token in keywords:
        result["Keyword"].add(token)
    elif token in arith_ops:
        result["Arithmetic Operator"].add(token)
    elif token in logical_ops:
        result["Logical Operator"].add(token)
    elif token in punctuations:
        result["Punctuation"].add(token)
    elif token in parentheses:
        result["Parenthesis"].add(token)
    elif re.match(r'^\d+(\.\d+)?$', token) or re.match(r"'[^']'", token):
        result["Constant"].add(token)
    elif re.match(r'[a-zA-Z_]\w*', token):
        result["Identifier"].add(token)

for category, items in result.items():
    print(f"{category} ({len(items)}): {', '.join(sorted(items))}")


Keyword (5): char, float, int, main, void
Identifier (5): a, b, c, ch, d
Constant (3): 'a', 10, 3.14
Arithmetic Operator (3): *, +, =
Logical Operator (0): 
Punctuation (2): ,, ;
Parenthesis (4): (, ), {, }
