# Word Count - Standard

Count the number of words in a text file using the standard Python libraries.

## Import Libraries

In [70]:
import re
from typing import List

## Define the text cleaning operations

In [123]:
def remove_punctuation(value:str) -> str:
    return re.sub('[%!#?\:,*.\]\[(){};“”\'\"’_$\\ufeff]', '', value)

def remove_spaces(value:str) -> str:
    return re.sub('\s+', ' ', value)

def remove_digits(value:str) -> str:
    return re.sub('\d', '', value)

clean_ops = [str.strip, remove_digits, str.lower, remove_punctuation, remove_spaces]

def clean_string(value:str, ops:List) -> str:
    for function in ops:
        value = function(value)

    return value  

#text = "Release Brown Date: January’, 1991 * [eBook\". #11] the (brown) 'fox' {inside}\n"
#clean_string(text, clean_ops).split(' ')

## Define Word Count Functions

In [100]:
def count_words(value:str, word_list:dict):
    words = clean_string(value, clean_ops).split(' ')
    for word in words:
        word_list.setdefault(word, 0)
        word_list[word] = word_list[word] + 1

## Count the word frequencies

In [124]:
word_list = {}
file_path = "alice.txt"

# open the input file for reading
with open(file_path, "r", encoding="utf-8") as file:
    for line in file:
        count_words(line, word_list)

# show the word count
word_list.pop("")
sorted(word_list.items(), key=lambda x : x[1], reverse=True)[:20]

[('the', 1824),
 ('and', 912),
 ('to', 802),
 ('a', 689),
 ('of', 632),
 ('it', 537),
 ('she', 537),
 ('said', 458),
 ('you', 433),
 ('in', 432),
 ('i', 392),
 ('alice', 385),
 ('was', 359),
 ('that', 290),
 ('as', 271),
 ('her', 248),
 ('with', 227),
 ('at', 222),
 ('on', 204),
 ('all', 197),
 ('this', 179),
 ('for', 178),
 ('had', 178),
 ('not', 172),
 ('but', 167),
 ('be', 165),
 ('or', 153),
 ('so', 150),
 ('very', 145),
 ('what', 135),
 ('little', 128),
 ('they', 128),
 ('is', 123),
 ('he', 122),
 ('if', 116),
 ('out', 114),
 ('its', 114),
 ('down', 102),
 ('about', 102),
 ('one', 102),
 ('up', 100),
 ('no', 97),
 ('do', 96),
 ('his', 96),
 ('then', 90),
 ('project', 88),
 ('have', 87),
 ('were', 87),
 ('like', 85),
 ('by', 84),
 ('would', 83),
 ('went', 83),
 ('herself', 83),
 ('them', 83),
 ('know', 82),
 ('when', 80),
 ('again', 80),
 ('could', 78),
 ('there', 77),
 ('are', 76),
 ('any', 76),
 ('thought', 74),
 ('off', 72),
 ('your', 71),
 ('see', 69),
 ('time', 68),
 ('me', 68