# Chapter xx

*Data Structures and Information Retrieval in Python*

Copyright 2021 Allen Downey

License: [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International](https://creativecommons.org/licenses/by-nc-sa/4.0/)

Here's an exercise from [Chapter 12 of Think Python](https://greenteapress.com/thinkpython/html/thinkpython013.html#toc141)

> Write a program that reads a word list from a file and prints all the sets of words that are anagrams. Here is an example of what the output might look like:

```
['deltas', 'desalt', 'lasted', 'salted', 'slated', 'staled']
['retainers', 'ternaries']
['generating', 'greatening']
['resmelts', 'smelters', 'termless']
```

> Modify the previous program so that it prints the largest set of anagrams first, followed by the second largest set, and so on.

>In Scrabble a “bingo” is when you play all seven tiles in your rack, along with a letter on the board, to form an eight-letter word. What set of 8 letters forms the most possible bingos? Hint: there are seven.

    Solution: http://thinkpython.com/code/anagram_sets.py.

In [14]:
def signature(s):
    """Returns the signature of this string.

    Signature is a string that contains all of the letters in order.

    s: string
    """
    # TODO: rewrite using sorted()
    t = list(s)
    t.sort()
    t = ''.join(t)
    return t

In [15]:
signature('retainers')

'aeeinrrst'

In [16]:
signature('retainers') == signature('ternaries')

True

In [17]:
signature('retainers') == signature('canaries')

False

In [18]:
def all_anagrams(word_iterator):
    """Finds all anagrams in a list of words.

    filename: string filename of the word list

    Returns: a map from each word to a list of its anagrams.
    """
    d = {}
    for word in word_iterator:
        t = signature(word)

        # TODO: rewrite using defaultdict
        if t not in d:
            d[t] = [word]
        else:
            d[t].append(word)
    return d

In [19]:

from os.path import basename, exists

def download(url):
    filename = basename(url)
    if not exists(filename):
        from urllib.request import urlretrieve
        local, _ = urlretrieve(url, filename)
        print('Downloaded ' + local)
    
download('https://github.com/AllenDowney/DSIRP/raw/main/american-english')

In [20]:
def iterate_words(filename):
    """Read lines from a file and split them into words."""
    for line in open(filename):
        for word in line.split():
            yield word.strip()

In [21]:
anagram_dict = all_anagrams(iterate_words('american-english'))

In [22]:
def print_anagram_sets(d):
    """Prints the anagram sets in d.

    d: map from words to list of their anagrams
    """
    for v in d.values():
        if len(v) > 1:
            print(len(v), v)

In [23]:
print_anagram_sets(anagram_dict)

2 ['Abner', 'Arneb']
2 ["Abner's", "Arneb's"]
2 ['Aires', 'Aries']
2 ["Aires's", "Aries's"]
2 ['Alar', 'Aral']
2 ["Alar's", "Aral's"]
2 ['Alberio', 'Albireo']
2 ["Alberio's", "Albireo's"]
2 ['Andre', 'Arden']
2 ["Andre's", "Arden's"]
2 ['Annmarie', 'Armenian']
2 ["Annmarie's", "Armenian's"]
2 ['Arno', 'Aron']
2 ["Arno's", "Aron's"]
2 ['Atari', 'Atria']
2 ["Atari's", "Atria's"]
2 ['Banneker', 'Bernanke']
2 ["Banneker's", "Bernanke's"]
2 ['Belgian', 'Bengali']
2 ["Belgian's", "Bengali's"]
2 ['Bellow', 'Bowell']
2 ["Bellow's", "Bowell's"]
2 ['Bert', 'Bret']
2 ["Bert's", "Bret's"]
2 ['Biogen', 'Boeing']
2 ["Biogen's", "Boeing's"]
2 ['Booker', 'Brooke']
2 ["Booker's", "Brooke's"]
2 ['Born', 'Brno']
2 ["Born's", "Brno's"]
2 ['Brain', 'Brian']
2 ["Brain's", "Brian's"]
2 ['Branden', 'Brendan']
2 ["Branden's", "Brendan's"]
2 ['Breton', 'Bronte']
2 ["Breton's", "Bronte's"]
2 ['Brut', 'Burt']
2 ["Brut's", "Burt's"]
2 ['Bryon', 'Byron']
2 ["Bryon's", "Byron's"]
2 ['Callisto', 'Castillo']
2 ["Calli

2 ['garner', 'ranger']
2 ['garners', 'rangers']
2 ['garnets', 'strange']
2 ['garnish', 'sharing']
2 ['garotes', 'storage']
3 ['garret', 'garter', 'grater']
3 ["garret's", "garter's", "grater's"]
3 ['garrets', 'garters', 'graters']
2 ['gas', 'sag']
2 ["gas's", "sag's"]
2 ['gases', 'sages']
3 ['gash', 'hags', 'shag']
2 ["gash's", "shag's"]
2 ['gates', 'stage']
2 ['gateway', 'getaway']
2 ["gateway's", "getaway's"]
2 ['gateways', 'getaways']
2 ['gazer', 'graze']
2 ["gazer's", "graze's"]
2 ['gazers', 'grazes']
2 ['gazing', 'niggaz']
2 ['gear', 'rage']
2 ["gear's", "rage's"]
3 ['gears', 'rages', 'sager']
2 ['gel', 'leg']
2 ["gel's", "leg's"]
2 ['gelding', 'niggled']
2 ['geldings', 'sledging']
2 ['gelid', 'glide']
2 ['gels', 'legs']
2 ['gem', 'meg']
2 ['gems', 'megs']
2 ['generate', 'teenager']
2 ['generates', 'teenagers']
2 ['genesis', 'seeings']
2 ['genial', 'linage']
2 ['genies', 'seeing']
2 ['genitals', 'stealing']
2 ['genre', 'green']
2 ["genre's", "green's"]
2 ['genres', 'greens']
3 ['g

In [24]:
def print_anagram_sets_in_order(d):
    """Prints the anagram sets in d in decreasing order of size.

    d: map from words to list of their anagrams
    """
    # make a list of (length, word pairs)
    t = []
    for v in d.values():
        if len(v) > 1:
            t.append((len(v), v))

    # sort in ascending order of length
    t.sort()

    # print the sorted list
    for x in t:
        print(x)

In [25]:
print_anagram_sets_in_order(anagram_dict)

(2, ['Abner', 'Arneb'])
(2, ["Abner's", "Arneb's"])
(2, ['Aires', 'Aries'])
(2, ["Aires's", "Aries's"])
(2, ['Alar', 'Aral'])
(2, ["Alar's", "Aral's"])
(2, ['Alberio', 'Albireo'])
(2, ["Alberio's", "Albireo's"])
(2, ['Andre', 'Arden'])
(2, ["Andre's", "Arden's"])
(2, ['Annmarie', 'Armenian'])
(2, ["Annmarie's", "Armenian's"])
(2, ['Arno', 'Aron'])
(2, ["Arno's", "Aron's"])
(2, ['Atari', 'Atria'])
(2, ["Atari's", "Atria's"])
(2, ['Banneker', 'Bernanke'])
(2, ["Banneker's", "Bernanke's"])
(2, ['Belgian', 'Bengali'])
(2, ["Belgian's", "Bengali's"])
(2, ['Bellow', 'Bowell'])
(2, ["Bellow's", "Bowell's"])
(2, ['Bert', 'Bret'])
(2, ["Bert's", "Bret's"])
(2, ['Biogen', 'Boeing'])
(2, ["Biogen's", "Boeing's"])
(2, ['Booker', 'Brooke'])
(2, ["Booker's", "Brooke's"])
(2, ['Born', 'Brno'])
(2, ["Born's", "Brno's"])
(2, ['Brain', 'Brian'])
(2, ["Brain's", "Brian's"])
(2, ['Branden', 'Brendan'])
(2, ["Branden's", "Brendan's"])
(2, ['Breton', 'Bronte'])
(2, ["Breton's", "Bronte's"])
(2, ['Brut', 'Bu

In [27]:
def filter_length(d, n):
    """Select only the words in d that have n letters.

    d: map from word to list of anagrams
    n: integer number of letters

    returns: new map from word to list of anagrams
    """
    res = {}
    for word, anagrams in d.items():
        if len(word) == n:
            res[word] = anagrams
    return res

In [29]:
eight_letters = filter_length(anagram_dict, 8)
print_anagram_sets_in_order(eight_letters)
    

(2, ['Annmarie', 'Armenian'])
(2, ['Banneker', 'Bernanke'])
(2, ["Bellow's", "Bowell's"])
(2, ["Biogen's", "Boeing's"])
(2, ["Booker's", "Brooke's"])
(2, ["Breton's", "Bronte's"])
(2, ['Callisto', 'Castillo'])
(2, ["Cantor's", "Carnot's"])
(2, ['Caroline', 'Cornelia'])
(2, ["Carter's", "Crater's"])
(2, ["Castor's", "Castro's"])
(2, ["Ceylon's", "Conley's"])
(2, ["Dalian's", "Danial's"])
(2, ["Dannie's", "Dianne's"])
(2, ["Dionne's", "Donnie's"])
(2, ["Dunbar's", "Durban's"])
(2, ["Elanor's", "Elnora's"])
(2, ["Eliseo's", "Eloise's"])
(2, ["Fannie's", "Fenian's"])
(2, ["Forest's", "Foster's"])
(2, ["Freida's", "Frieda's"])
(2, ['Georgian', 'Georgina'])
(2, ["Janine's", "Jannie's"])
(2, ["Lenora's", "Lorena's"])
(2, ["Lenore's", "Lorene's"])
(2, ["Malian's", "Manila's"])
(2, ["Maoism's", "Mimosa's"])
(2, ["Marcie's", "Mercia's"])
(2, ["Marian's", "Marina's"])
(2, ["Marlon's", "Molnar's"])
(2, ["Maseru's", "Mauser's"])
(2, ["Monroe's", "Moreno's"])
(2, ["Roland's", "Ronald's"])
(2, ["Shar

In [78]:
!redis-server --daemonize yes

100317:C 08 Aug 2021 16:32:52.903 # oO0OoO0OoO0Oo Redis is starting oO0OoO0OoO0Oo
100317:C 08 Aug 2021 16:32:52.903 # Redis version=5.0.3, bits=64, commit=00000000, modified=0, pid=100317, just started
100317:C 08 Aug 2021 16:32:52.903 # Configuration loaded


Here's an example from the [Redis tutorial on Real Python](https://realpython.com/python-redis/).

In [45]:
import redis

r = redis.Redis()

In [47]:
r.mset({"Croatia": "Zagreb", "Bahamas": "Nassau"})

True

In [48]:
r.get("Bahamas")

b'Nassau'

In [64]:
for key in r.keys():
    r.delete(key)

In [65]:
def all_anagrams_redis(word_iterator, r):
    """Finds all anagrams in a list of words.

    filename: string filename of the word list

    Returns: a map from each word to a list of its anagrams.
    """
    for word in word_iterator:
        t = signature(word)
        r.lpush(t, word)

In [66]:
all_anagrams_redis(iterate_words('american-english'), r)

In [70]:
r.keys()

[b'giinrw',
 b"'Hasy",
 b"'Zagnosuvy",
 b"'acceinnnosv",
 b'Nhooprrt',
 b'ccegiiimnnosv',
 b'accehst',
 b'Maegirt',
 b'Jaaintu',
 b"'Daennos",
 b'bddfmnosuu',
 b"'aceimnoprst",
 b'achktw',
 b"'Rosss",
 b'eehiilmmnosst',
 b"'Keinrsst",
 b'Saahnn',
 b'addeegnr',
 b"'aeinssswx",
 b"'Sacehilmnns",
 b'bbeiknor',
 b'adiiooprsuv',
 b'adeeehprsst',
 b'ikrs',
 b'eimossst',
 b'delopsstu',
 b'aadoprt',
 b'ceginnovy',
 b'eiorssu',
 b'aeghinrrsstt',
 b'aeimotx',
 b'accefinost',
 b"'aaaeimnqrsu",
 b'Saeilnnov',
 b'afllosw',
 b'abgiiln',
 b'Faeklnru',
 b'diloy',
 b'acn',
 b'ddeloprs',
 b'adgiluy',
 b'deghhilorsstty',
 b'aaegpsss',
 b'eeept',
 b'bdeefiils',
 b'Eaeenrrst',
 b'denorsu',
 b'adeeeegnrrt',
 b'cdeeu',
 b'Aaadilnnsu',
 b'degiklo',
 b'Wiiklns',
 b'aachrswy',
 b"'Toost",
 b'aacdeeps',
 b"'aaeegmnoprst",
 b"'Lefis",
 b'eikooprs',
 b'Oainsss',
 b'acdimno',
 b'acinopt',
 b"'agorstu",
 b"'Jadimssu",
 b'deeglrs',
 b"'Eaddrsw",
 b'aceeekrrst',
 b"'aehhst",
 b'ailrsttu',
 b"'acdhhioprrss",
 b"'eiopst

In [79]:
r.lrange(b'opst', 0, -1)

[]

In [80]:
def print_anagram_sets_redis(r):
    """Prints the anagram sets in d.

    d: map from words to list of their anagrams
    """
    for key in r.keys():
        v = r.lrange(key, 0, -1)
        if len(v) > 1:
            print(len(v), v)

In [81]:
print_anagram_sets_redis(r)

In [77]:
!killall redis-server