diff --git a/tokenizer/java/README.md b/tokenizer/java/README.md new file mode 100644 index 0000000..0ca96d9 --- /dev/null +++ b/tokenizer/java/README.md @@ -0,0 +1,9 @@ +Installation Instruction:- + +pip3 install javac_parser + +Usage:- + +To produce output in stdout consisting of tokens of the input file, just run- + +python3 java_tokenizer.py path/to/inputfile \ No newline at end of file diff --git a/tokenizer/java/expected_output/output.json b/tokenizer/java/expected_output/output.json new file mode 100644 index 0000000..c7c56d0 --- /dev/null +++ b/tokenizer/java/expected_output/output.json @@ -0,0 +1,482 @@ +[ + { + "char": 1, + "line": 1, + "type": "PUBLIC", + "value": "public" + }, + { + "char": 8, + "line": 1, + "type": "CLASS", + "value": "class" + }, + { + "char": 14, + "line": 1, + "type": "IDENTIFIER", + "value": "FunctionCall" + }, + { + "char": 27, + "line": 1, + "type": "LBRACE", + "value": "{" + }, + { + "char": 5, + "line": 3, + "type": "PUBLIC", + "value": "public" + }, + { + "char": 12, + "line": 3, + "type": "STATIC", + "value": "static" + }, + { + "char": 19, + "line": 3, + "type": "VOID", + "value": "void" + }, + { + "char": 24, + "line": 3, + "type": "IDENTIFIER", + "value": "funct1" + }, + { + "char": 31, + "line": 3, + "type": "LPAREN", + "value": "(" + }, + { + "char": 32, + "line": 3, + "type": "RPAREN", + "value": ")" + }, + { + "char": 34, + "line": 3, + "type": "LBRACE", + "value": "{" + }, + { + "char": 2, + "line": 4, + "type": "IDENTIFIER", + "value": "System" + }, + { + "char": 8, + "line": 4, + "type": "DOT", + "value": "." + }, + { + "char": 9, + "line": 4, + "type": "IDENTIFIER", + "value": "out" + }, + { + "char": 12, + "line": 4, + "type": "DOT", + "value": "." + }, + { + "char": 13, + "line": 4, + "type": "IDENTIFIER", + "value": "println" + }, + { + "char": 21, + "line": 4, + "type": "LPAREN", + "value": "(" + }, + { + "char": 22, + "line": 4, + "type": "STRINGLITERAL", + "value": "\"Inside funct1\"" + }, + { + "char": 37, + "line": 4, + "type": "RPAREN", + "value": ")" + }, + { + "char": 38, + "line": 4, + "type": "SEMI", + "value": ";" + }, + { + "char": 5, + "line": 5, + "type": "RBRACE", + "value": "}" + }, + { + "char": 5, + "line": 7, + "type": "PUBLIC", + "value": "public" + }, + { + "char": 12, + "line": 7, + "type": "STATIC", + "value": "static" + }, + { + "char": 19, + "line": 7, + "type": "VOID", + "value": "void" + }, + { + "char": 24, + "line": 7, + "type": "IDENTIFIER", + "value": "main" + }, + { + "char": 29, + "line": 7, + "type": "LPAREN", + "value": "(" + }, + { + "char": 30, + "line": 7, + "type": "IDENTIFIER", + "value": "String" + }, + { + "char": 36, + "line": 7, + "type": "LBRACKET", + "value": "[" + }, + { + "char": 37, + "line": 7, + "type": "RBRACKET", + "value": "]" + }, + { + "char": 39, + "line": 7, + "type": "IDENTIFIER", + "value": "args" + }, + { + "char": 44, + "line": 7, + "type": "LBRACE", + "value": "{" + }, + { + "char": 2, + "line": 8, + "type": "INT", + "value": "int" + }, + { + "char": 6, + "line": 8, + "type": "IDENTIFIER", + "value": "val" + }, + { + "char": 9, + "line": 8, + "type": "SEMI", + "value": ";" + }, + { + "char": 2, + "line": 9, + "type": "IDENTIFIER", + "value": "System" + }, + { + "char": 8, + "line": 9, + "type": "DOT", + "value": "." + }, + { + "char": 9, + "line": 9, + "type": "IDENTIFIER", + "value": "out" + }, + { + "char": 12, + "line": 9, + "type": "DOT", + "value": "." + }, + { + "char": 13, + "line": 9, + "type": "IDENTIFIER", + "value": "println" + }, + { + "char": 21, + "line": 9, + "type": "LPAREN", + "value": "(" + }, + { + "char": 22, + "line": 9, + "type": "STRINGLITERAL", + "value": "\"funct2 returned a value of \"" + }, + { + "char": 52, + "line": 9, + "type": "PLUS", + "value": "+" + }, + { + "char": 54, + "line": 9, + "type": "IDENTIFIER", + "value": "val" + }, + { + "char": 57, + "line": 9, + "type": "RPAREN", + "value": ")" + }, + { + "char": 58, + "line": 9, + "type": "SEMI", + "value": ";" + }, + { + "char": 2, + "line": 11, + "type": "IDENTIFIER", + "value": "val" + }, + { + "char": 6, + "line": 11, + "type": "EQ", + "value": "=" + }, + { + "char": 8, + "line": 11, + "type": "IDENTIFIER", + "value": "funct2" + }, + { + "char": 14, + "line": 11, + "type": "LPAREN", + "value": "(" + }, + { + "char": 15, + "line": 11, + "type": "SUB", + "value": "-" + }, + { + "char": 16, + "line": 11, + "type": "INTLITERAL", + "value": "3" + }, + { + "char": 17, + "line": 11, + "type": "RPAREN", + "value": ")" + }, + { + "char": 18, + "line": 11, + "type": "SEMI", + "value": ";" + }, + { + "char": 5, + "line": 12, + "type": "PUBLIC", + "value": "public" + }, + { + "char": 12, + "line": 12, + "type": "STATIC", + "value": "static" + }, + { + "char": 19, + "line": 12, + "type": "INT", + "value": "int" + }, + { + "char": 23, + "line": 12, + "type": "IDENTIFIER", + "value": "funct2" + }, + { + "char": 30, + "line": 12, + "type": "LPAREN", + "value": "(" + }, + { + "char": 31, + "line": 12, + "type": "INT", + "value": "int" + }, + { + "char": 35, + "line": 12, + "type": "IDENTIFIER", + "value": "param" + }, + { + "char": 40, + "line": 12, + "type": "RPAREN", + "value": ")" + }, + { + "char": 42, + "line": 12, + "type": "LBRACE", + "value": "{" + }, + { + "char": 2, + "line": 13, + "type": "IDENTIFIER", + "value": "System" + }, + { + "char": 8, + "line": 13, + "type": "DOT", + "value": "." + }, + { + "char": 9, + "line": 13, + "type": "IDENTIFIER", + "value": "out" + }, + { + "char": 12, + "line": 13, + "type": "DOT", + "value": "." + }, + { + "char": 13, + "line": 13, + "type": "IDENTIFIER", + "value": "println" + }, + { + "char": 21, + "line": 13, + "type": "LPAREN", + "value": "(" + }, + { + "char": 22, + "line": 13, + "type": "STRINGLITERAL", + "value": "\"Inside funct2 with param \"" + }, + { + "char": 50, + "line": 13, + "type": "PLUS", + "value": "+" + }, + { + "char": 52, + "line": 13, + "type": "IDENTIFIER", + "value": "param" + }, + { + "char": 57, + "line": 13, + "type": "RPAREN", + "value": ")" + }, + { + "char": 58, + "line": 13, + "type": "SEMI", + "value": ";" + }, + { + "char": 2, + "line": 14, + "type": "RETURN", + "value": "return" + }, + { + "char": 9, + "line": 14, + "type": "IDENTIFIER", + "value": "param" + }, + { + "char": 15, + "line": 14, + "type": "STAR", + "value": "*" + }, + { + "char": 17, + "line": 14, + "type": "INTLITERAL", + "value": "2" + }, + { + "char": 18, + "line": 14, + "type": "SEMI", + "value": ";" + }, + { + "char": 5, + "line": 15, + "type": "RBRACE", + "value": "}" + }, + { + "char": 1, + "line": 16, + "type": "RBRACE", + "value": "}" + } +] diff --git a/tokenizer/java/input_with_error.java b/tokenizer/java/input_with_error.java new file mode 100644 index 0000000..d130f46 --- /dev/null +++ b/tokenizer/java/input_with_error.java @@ -0,0 +1,16 @@ +public class FunctionCall { + // this is a comment + public static void funct1 () { + System.out.println ("Inside funct1"); + } + + public static void main (String[] args { + int val; + System.out.println ("funct2 returned a value of " + val); + + val = funct2(-3); + public static int funct2 (int param) { + System.out.println ("Inside funct2 with param " + param); + return param * 2; + } +} \ No newline at end of file diff --git a/tokenizer/java/java_tokenizer.py b/tokenizer/java/java_tokenizer.py new file mode 100644 index 0000000..1b9067f --- /dev/null +++ b/tokenizer/java/java_tokenizer.py @@ -0,0 +1,21 @@ +import javac_parser +import json +import sys + +java= javac_parser.Java() + +file = open(sys.argv[1],'r') +file_content = file.read() + +tokens = [] + +for token in java.lex(file_content): + tmp = dict() + tmp["line"]=token[2][0] + tmp["char"]=token[2][1]+1 + tmp["type"]=str(token[0]) + tmp["value"]=str(token[1]) + tokens.append(tmp) + +tokens.pop() +print ( json.dumps(tokens, indent=4, sort_keys=True) ) \ No newline at end of file