From 2adfe6453734585a8f9b5f5e1c2fd142c8440f44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B8=AD=E5=B0=BE=E5=90=8F=E5=BF=97?= Date: Mon, 6 Jul 2020 22:22:14 +0900 Subject: [PATCH 1/4] add file(textfile_analysis.py) in projects/Textfile analysis folder --- projects/Textfile analysis/textfile_analysis.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 projects/Textfile analysis/textfile_analysis.py diff --git a/projects/Textfile analysis/textfile_analysis.py b/projects/Textfile analysis/textfile_analysis.py new file mode 100644 index 00000000..e69de29b From 24dc5b162ecc6553269e913776c89626d7b7f8ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B8=AD=E5=B0=BE=E5=90=8F=E5=BF=97?= Date: Tue, 7 Jul 2020 00:15:32 +0900 Subject: [PATCH 2/4] Add some return value --- .../Textfile analysis/textfile_analysis.py | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/projects/Textfile analysis/textfile_analysis.py b/projects/Textfile analysis/textfile_analysis.py index e69de29b..67900d4b 100644 --- a/projects/Textfile analysis/textfile_analysis.py +++ b/projects/Textfile analysis/textfile_analysis.py @@ -0,0 +1,33 @@ +# -*- cofing: utf-8 -*- +import os +import sys +import collections + +script_name = sys.argv[0] + +res = { + "total_lines":"", + "total_characters":"", + "total_words":"", + "unique_words":"", + "special_characters":"" +} + +try: + textfile = sys.argv[1] + with open(textfile) as f: + data = f.read() + + res["total_lines"] = data.count(os.linesep) + res["total_characters"] = len(data) + counter = collections.Counter(data.split()) + d = counter.most_common() + res["total_words"] = sum([i[1] for i in d]) + res["unique_words"] = len([i[0] for i in d]) + +except IndexError: + print('Usage: %s TEXTFILE' % script_name) +except IOError: + print('"%s" cannot be opened.' % textfile) + +print(res) \ No newline at end of file From 8facac4be1c102fd9917a8d85d1d5a7ab34ea14d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B8=AD=E5=B0=BE=E5=90=8F=E5=BF=97?= Date: Tue, 7 Jul 2020 22:50:45 +0900 Subject: [PATCH 3/4] add some return value --- projects/Textfile analysis/textfile_analysis.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/projects/Textfile analysis/textfile_analysis.py b/projects/Textfile analysis/textfile_analysis.py index 67900d4b..d606846b 100644 --- a/projects/Textfile analysis/textfile_analysis.py +++ b/projects/Textfile analysis/textfile_analysis.py @@ -2,6 +2,8 @@ import os import sys import collections +import string +import re script_name = sys.argv[0] @@ -15,15 +17,17 @@ try: textfile = sys.argv[1] - with open(textfile) as f: - data = f.read() + with open(textfile, "r", encoding = "utf_8") as f: + data = f.read() res["total_lines"] = data.count(os.linesep) - res["total_characters"] = len(data) + res["total_characters"] = len(data.replace(" ","")) - res["total_lines"] counter = collections.Counter(data.split()) d = counter.most_common() res["total_words"] = sum([i[1] for i in d]) res["unique_words"] = len([i[0] for i in d]) + special_chars = string.punctuation + res["special_characters"] = sum(v for k, v in collections.Counter(data).items() if k in special_chars) except IndexError: print('Usage: %s TEXTFILE' % script_name) From 99f2d14f75f4773bea1f8ba9fbf3117b39a1faa2 Mon Sep 17 00:00:00 2001 From: m044de <46442823+m044de@users.noreply.github.com> Date: Tue, 7 Jul 2020 23:00:12 +0900 Subject: [PATCH 4/4] Create README.md --- projects/Textfile analysis/README.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 projects/Textfile analysis/README.md diff --git a/projects/Textfile analysis/README.md b/projects/Textfile analysis/README.md new file mode 100644 index 00000000..a4b07a6e --- /dev/null +++ b/projects/Textfile analysis/README.md @@ -0,0 +1,3 @@ +# Textfile analysis +##### Execute +`python textfile_analysis.py `