In [29]:
#test string
test = 'This is my test text. We are keeping this text to keep this manageable.'

In [30]:
def count_words(text):
    '''Count the number of times each word occurs in text (str).
    Return dictionary where keys are unique words and values are
    word counts'''
    word_counts = {}
    for word in text.split(" "):
        #known word
        if word in word_counts:
            word_counts[word] +=1 
        #unknown word
        else:
            word_counts[word] = 1
    return word_counts

In [31]:
count_words(test)

{'This': 1,
 'We': 1,
 'are': 1,
 'is': 1,
 'keep': 1,
 'keeping': 1,
 'manageable.': 1,
 'my': 1,
 'test': 1,
 'text': 1,
 'text.': 1,
 'this': 2,
 'to': 1}

In [32]:
def count_words(text):
    '''Count the number of times each word occurs in text (str).
    Return dictionary where keys are unique words and values are
    word counts. Skips punctuation'''
    
    #lower case letters
    text = text.lower()
    
    #skip punctuation
    skips = ['.', ':', ';', "'", '"']
    for ch in skips:
        text = text.replace(ch, "")
    
    word_counts = {}
    for word in text.split(" "):
        #known word
        if word in word_counts:
            word_counts[word] +=1 
        #unknown word
        else:
            word_counts[word] = 1
    return word_counts

In [33]:
count_words(test)

{'are': 1,
 'is': 1,
 'keep': 1,
 'keeping': 1,
 'manageable': 1,
 'my': 1,
 'test': 1,
 'text': 2,
 'this': 3,
 'to': 1,
 'we': 1}

In [34]:
from collections import Counter

def count_words_fast(text):
    '''Count the number of times each word occurs in text (str).
    Return dictionary where keys are unique words and values are
    word counts. Skips punctuation'''
    
    #lower case letters
    text = text.lower()
    
    #skip punctuation
    skips = ['.', ':', ';', "'", '"']
    for ch in skips:
        text = text.replace(ch, "")
    
    word_counts = Counter(text.split(' '))
    return word_counts

In [35]:
count_words_fast(test)

Counter({'are': 1,
         'is': 1,
         'keep': 1,
         'keeping': 1,
         'manageable': 1,
         'my': 1,
         'test': 1,
         'text': 2,
         'this': 3,
         'to': 1,
         'we': 1})

In [36]:
count_words_fast(test) == count_words(test)

True

In [37]:
len(count_words("This comprehension check is to check for comprehension."))

6

In [40]:
text = 'This comprehension check is to check for comprehension.'
count_words(text) is count_words_fast(text)

False

In [41]:
text = 'This comprehension check is to check for comprehension.'
count_words(text) == count_words_fast(text)

True

#### Introduction to Language Processing: Question 1

What is Project Gutenberg?

- **An online repository of publically available books in many languages.**
- An online repository of electronically-scanned microfiche copies of the original works of Martin Luther.
- An online translation service that can be used for any text file, including entire books.

#### Counting Words: Question 1

The function ```count_words``` is as defined in Video 3.2.2.

Consider the following code:

```len(count_words("This comprehension check is to check for comprehension."))```

What does this return?

- 5
- **6**
- 7
- 8

#### Counting Words: Question 2

The functions ```count_words``` and ```count_words_fast``` are as defined in Video 3.2.2. Consider the following code:

```count_words(text) is count_words_fast(text)```

What does this return?

- True
- **False**