# A simple functional Word Count Example

In [1]:
from itertools import groupby
import re 

In [2]:
# Split a line into words by seperating on ' '
# Input : "This is a string"
# Output: ["This", "is", "a", "string"]
def split_line(lines):
    return lines.split(' ')

# Convert the list of words to list of lower case words
# Input : ["Hello", "World", "YOLO"]
# Output: ["hello", "world", "yolo"]
def convert_lower(words_list):
    return list(map(lambda x: x.lower(), words_list))

In [3]:
# Remove any empty space from words
# Input: ["   This ", " is ", "a", "              string"]
# Output: ["This", "is", "a", "string"]
def trim_lines(lines_list):
    return list(map(lambda x: x.strip(), lines_list))

# Remove empty strings
# Input: ["", "This", "", "is", "a", "string"]
# Output: ["This", "is", "a", "string"]
def remove_empty(words_list):
    return list(filter(lambda x: x!='', words_list))

# Remove Punctuation from word/string
# Input : "Yolo!!! Hello,.#world?"
# Output: "Yolo Helloworld"
def remove_punctuation(my_string):
    return re.sub(r'[^\w\s]','',my_string)

In [4]:
# Group words together
# Input : ["hello", "world", "hello", "yolo", "world"]
# Output: An itertools.groupby object 
# Code : 
# for k, v in groupby(sorted(["hello", "world", "hello", "yolo", "world"])):
#    print(k, list(v))
# Output for above code
# hello ['hello', 'hello']
# world ['world', 'world']
# yolo ['yolo']
def group_words(words_list):
    return groupby(sorted(words_list))

In [13]:
# Returns count of each word
def get_word_count(grouped_words):
    return list(map(lambda x: (x[0], len(list(x[1]))),grouped_words))

In [14]:
my_str = """hello this is a string. This is a good String.
            hello world!!! """

In [15]:
# Remove Punctuation
remove_punct = remove_punctuation(my_str)

# Split the words
words_list = split_line(remove_punct)

# Lower and trim the words
lower_and_trim_list = trim_lines(convert_lower(words_list))

# Filter empty strings
filtered_words = remove_empty(lower_and_trim_list)

# Group the words
grouped_words_iterable = group_words(filtered_words)

# Retrieve the word count fromt the groupby iterable
word_count_dict = get_word_count(grouped_words_iterable)

In [16]:
word_count_dict

[('a', 2),
 ('good', 1),
 ('hello', 2),
 ('is', 2),
 ('string', 2),
 ('this', 2),
 ('world', 1)]

You can modify the data type as you want. 

# Alternate ways you can try
1. Make use of itertools.chain
2. Use functional compostion (discussed in next section)