diff --git a/projects/Textfile analysis/README.md b/projects/Textfile analysis/README.md new file mode 100644 index 00000000..a4b07a6e --- /dev/null +++ b/projects/Textfile analysis/README.md @@ -0,0 +1,3 @@ +# Textfile analysis +##### Execute +`python textfile_analysis.py ` diff --git a/projects/Textfile analysis/textfile_analysis.py b/projects/Textfile analysis/textfile_analysis.py new file mode 100644 index 00000000..d606846b --- /dev/null +++ b/projects/Textfile analysis/textfile_analysis.py @@ -0,0 +1,37 @@ +# -*- cofing: utf-8 -*- +import os +import sys +import collections +import string +import re + +script_name = sys.argv[0] + +res = { + "total_lines":"", + "total_characters":"", + "total_words":"", + "unique_words":"", + "special_characters":"" +} + +try: + textfile = sys.argv[1] + with open(textfile, "r", encoding = "utf_8") as f: + + data = f.read() + res["total_lines"] = data.count(os.linesep) + res["total_characters"] = len(data.replace(" ","")) - res["total_lines"] + counter = collections.Counter(data.split()) + d = counter.most_common() + res["total_words"] = sum([i[1] for i in d]) + res["unique_words"] = len([i[0] for i in d]) + special_chars = string.punctuation + res["special_characters"] = sum(v for k, v in collections.Counter(data).items() if k in special_chars) + +except IndexError: + print('Usage: %s TEXTFILE' % script_name) +except IOError: + print('"%s" cannot be opened.' % textfile) + +print(res) \ No newline at end of file