From 72c8a3b71e465f60cdaa27c035644030ecec4f51 Mon Sep 17 00:00:00 2001 From: tushargr Date: Tue, 3 Jul 2018 04:45:23 +0530 Subject: [PATCH 1/3] fix python_tokenizer tok_name error --- tokenizer/python/python_tokenizer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tokenizer/python/python_tokenizer.py b/tokenizer/python/python_tokenizer.py index ce4c586..10d813d 100644 --- a/tokenizer/python/python_tokenizer.py +++ b/tokenizer/python/python_tokenizer.py @@ -1,6 +1,6 @@ import parso from parso.python.tokenize import tokenize -from parso.python.token import tok_name +from parso.python import token import json import sys @@ -13,7 +13,7 @@ tmp = dict() tmp["line"]=(token.start_pos)[0]; tmp["char"]=((token.start_pos)[1])+1; - tmp["type"]=str(tok_name[token.type]) + tmp["type"]=((str(token.type))[10:]).strip(")") tmp["value"]=str(token.string) tokens.append(tmp) From c1d5672f2a4f6fff41390d03cc20191732693311 Mon Sep 17 00:00:00 2001 From: tushargr Date: Tue, 3 Jul 2018 04:54:52 +0530 Subject: [PATCH 2/3] remove import which is not required in python_tokenizer.py --- tokenizer/python/python_tokenizer.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tokenizer/python/python_tokenizer.py b/tokenizer/python/python_tokenizer.py index 10d813d..5f2a4a4 100644 --- a/tokenizer/python/python_tokenizer.py +++ b/tokenizer/python/python_tokenizer.py @@ -1,6 +1,5 @@ import parso -from parso.python.tokenize import tokenize -from parso.python import token +from parso.python.tokenize import tokenize import json import sys From 95b52ffbab3093d3323195eed42508f1b321b88d Mon Sep 17 00:00:00 2001 From: tushargr Date: Tue, 3 Jul 2018 05:15:14 +0530 Subject: [PATCH 3/3] remove empty value tokens --- tokenizer/python/python_tokenizer.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tokenizer/python/python_tokenizer.py b/tokenizer/python/python_tokenizer.py index 5f2a4a4..b1211dc 100644 --- a/tokenizer/python/python_tokenizer.py +++ b/tokenizer/python/python_tokenizer.py @@ -1,7 +1,7 @@ import parso -from parso.python.tokenize import tokenize +from parso.python.tokenize import tokenize import json -import sys +import sys file = open(sys.argv[1],'r') file_content = file.read() @@ -9,12 +9,12 @@ tokens = [] for token in tokenize(file_content, version_info=(3,6)): - tmp = dict() - tmp["line"]=(token.start_pos)[0]; - tmp["char"]=((token.start_pos)[1])+1; - tmp["type"]=((str(token.type))[10:]).strip(")") - tmp["value"]=str(token.string) - tokens.append(tmp) + if (str(token.string) != ""): + tmp = dict() + tmp["line"]=(token.start_pos)[0]; + tmp["char"]=((token.start_pos)[1])+1; + tmp["type"]=((str(token.type))[10:]).strip(")") + tmp["value"]=str(token.string) + tokens.append(tmp) print ( json.dumps(tokens, indent=4, sort_keys=True) ) -