# Quantifying Shakespeare
Analyzing the frequencies of different letters in Shakespeare's work.

(Data from: bit.ly/shakespeare-txt)

In this Notebook, we will:
1. Read all of Shakespeare's works into a list of strings
2. Count the frequencies of letters used
3. Visualize the frequencies with a bar graph

## Read the lines of the data

In [17]:
from io import TextIOWrapper

def read_lines(filename: str) -> list[str]:
    """Read a .txt file into a list of strings for each line."""
    lines: list[str] = []
    file_handle = open(filename, "r")
    for line in file_handle:
        # strip leading and trailing whitespace "   a dog    " -> "a dog"
        line = line.strip()
        # make everything lowercase
        line = line.lower()
        lines.append(line)
    return lines

shakes_lines: list[str] = read_lines("../data/t8.shakespeare.txt")
print(len(shakes_lines))

124456


## Counting letters

In [18]:
def tally(counts: dict[str, int], key: str) -> None:
    """Mutate counts by increasing the value stored at the key by 1."""
    if key in counts:
        counts[key] += 1
    else:
        counts[key] = 1

In [19]:
d: dict[str, int] = {}
tally(d, "a")
# d = {"a": 1}
print(d)
tally(d, "a")
# d = {"a": 2}
print(d)
tally(d, "b")
#d = {"a": 2, "b": 1}
print(d)

{'a': 1}
{'a': 2}
{'a': 2, 'b': 1}


In [20]:
def count_letters(lines: list[str]) -> dict[str, int]:
    """Tallies every character in each line and counts them."""
    dicto: dict[str, int] = {}
    for i in lines:
        for j in range(len(i)):
            if ((i[j]).isalpha()):
                tally(dicto, i[j])
    return dicto

a = count_letters(shakes_lines)
a

{'t': 329775,
 'h': 236868,
 'i': 253990,
 's': 248989,
 'e': 447204,
 'x': 5294,
 'f': 80516,
 'l': 170019,
 'p': 58464,
 'r': 237864,
 'n': 243262,
 'd': 149462,
 'b': 61956,
 'y': 94370,
 'o': 314600,
 'j': 4779,
 'c': 88185,
 'g': 68199,
 'u': 128947,
 'a': 289150,
 'w': 89390,
 'm': 111452,
 'k': 35408,
 'v': 37569,
 'z': 1631,
 'q': 3582}