<a href="https://colab.research.google.com/github/RussAbbott/Wordle/blob/main/Wordle.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# !pip install "rich[jupyter]"

from collections import Counter, defaultdict
import contextlib
from functools import reduce
from itertools import product
from math import ceil, floor, log2
from random import choice
from re import sub
from rich import print as rprint
from rich.console import Console, Group
from rich.padding import Padding
from rich.panel import Panel
from rich.theme import Theme

# fmean (float mean) is faster than mean
from statistics import fmean, median
from string import ascii_letters, ascii_lowercase, ascii_uppercase
from time import perf_counter
from typing import Any, Callable, Dict, ForwardRef, Iterable, List, Literal, Optional, Set, Sized, Tuple, TypeVar, Union

# Guess = ForwardRef('Guess')
G = TypeVar("G", bound="Guess")

from typing_extensions import Self
from warnings import warn


!python --version

Python 3.10.12


In [2]:
# CONSTANTS

# SCORE_RANKING assigns an integer to each score so that they can be compared with '>'.
# The ordering is '_~yg'.
SCORE_RANKING: Dict[str, int] = {score: rank for rank, score in enumerate('_~yg')}

INTERACTIVE = True
DEBUG: bool = False

# Panel colors
PANEL_FOREGROUND_COLOR ='#eeeeee' # '#ffffff',
PANEL_BACKGROUND_COLOR ='#002025'   #  '#5f87af',    '#202525',  '#262626',  '#1c1c1c',   '#000000', '#333333',
PANEL_BORDER_COLOR ="#00cc00"
# PANEL_TITLE_STYLE ='bold black on #00bbdd'
PANEL_TITLE_STYLE ='#00cc00 on #002025'
# PANEL_TITLE_STYLE ='bold "#eeeeee" on #00bbdd'

WORD_LENGTH = 5

WORDLE_THEME = Theme({'correct':   'bold black on #ddffdd',
                      'incorrect': 'bold black on #ffdddd',
                      'warning':   'bold red on #eeffee',
                      })


# This is probably more trouble than it's worth, but I wanted to fool around with defaultdict.
# (defaultdict is more like a generalization of Counter than simply a dictionary that returns
# a default value when presented with a non-existent key.)
def build_styles_defaultdict() -> defaultdict:

    # Define a function that returns a function that returns a constant default-value.
    # A function like this must be passed to the defaultdict call when creating a defaultdict.
    def constant_factory(value):
        return lambda: value

    STYLES_ = {'g': "bold black on #00dd00",
               'y': "bold black on #dddd00",
               '~': "bold black on #bb7799",
               '_': "#777777 on #333333",
                }

    # Want the STYLES defaultdict to be like the STYLES_ dict defined above.
    # defaultdict support the merge (|) and update (|=) operations.
    # Create an empty defaultdict and update it with the STYLES_ dict.

    d = defaultdict(constant_factory(None))
    d |= STYLES_
    return d

# Run build_styles_defaultdict() to get a STYLES defaultdict.
STYLES = build_styles_defaultdict()


In [3]:
### Wordle word list

WORDLE_WORD_LIST = [
    'aback', 'abase', 'abate', 'abbey', 'abbot', 'abhor', 'abide', 'abled', 'abode', 'abort', 'about', 'above', 'abuse', 'abyss', 'acorn', 'acrid', 'actor', 'acute', 'adage', 'adapt', 'adept', 'admin', 'admit', 'adobe', 'adopt', 'adore', 'adorn', 'adult', 'affix', 'afire', 'afoot', 'afoul', 'after', 'again', 'agape', 'agate', 'agent', 'agile', 'aging', 'aglow', 'agony', 'agora', 'agree', 'ahead', 'aider', 'aisle', 'alarm', 'album', 'alert', 'algae', 'alibi', 'alien', 'align', 'alike', 'alive', 'allay', 'alley', 'allot', 'allow', 'alloy', 'aloft', 'alone', 'along', 'aloof', 'aloud', 'alpha', 'altar', 'alter', 'amass', 'amaze', 'amber', 'amble', 'amend', 'amiss', 'amity', 'among', 'ample', 'amply', 'amuse', 'angel', 'anger', 'angle', 'angry', 'angst', 'anime', 'ankle', 'annex', 'annoy', 'annul', 'anode', 'antic', 'anvil', 'aorta', 'apart', 'aphid', 'aping', 'apnea', 'apple', 'apply', 'apron', 'aptly', 'arbor', 'ardor', 'arena', 'argue', 'arise', 'armor', 'aroma', 'arose', 'array', 'arrow', 'arson', 'artsy', 'ascot', 'ashen', 'aside', 'askew', 'assay', 'asset', 'atoll', 'atone', 'attic', 'audio', 'audit', 'augur', 'aunty', 'avail', 'avert', 'avian', 'avoid', 'await', 'awake', 'award', 'aware', 'awash', 'awful', 'awoke', 'axial', 'axiom', 'axion', 'azure', 'bacon', 'badge', 'badly', 'bagel', 'baggy', 'baker', 'baler', 'balmy', 'banal', 'banjo', 'barge', 'baron', 'basal', 'basic', 'basil', 'basin', 'basis', 'baste', 'batch', 'bathe', 'baton', 'batty', 'bawdy', 'bayou', 'beach', 'beady', 'beard', 'beast', 'beech', 'beefy', 'befit', 'began', 'begat', 'beget', 'begin', 'begun', 'being', 'belch', 'belie', 'belle', 'belly', 'below', 'bench', 'beret', 'berry', 'berth', 'beset', 'betel', 'bevel', 'bezel', 'bible', 'bicep', 'biddy', 'bigot', 'bilge', 'billy', 'binge', 'bingo', 'biome', 'birch', 'birth', 'bison', 'bitty', 'black', 'blade', 'blame', 'bland', 'blank', 'blare', 'blast', 'blaze', 'bleak', 'bleat', 'bleed', 'bleep', 'blend', 'bless', 'blimp', 'blind', 'blink', 'bliss', 'blitz', 'bloat', 'block', 'bloke', 'blond', 'blood', 'bloom', 'blown', 'bluer', 'bluff', 'blunt', 'blurb', 'blurt', 'blush', 'board', 'boast', 'bobby', 'boney', 'bongo', 'bonus', 'booby', 'boost', 'booth', 'booty', 'booze', 'boozy', 'borax', 'borne', 'bosom', 'bossy', 'botch', 'bough', 'boule', 'bound', 'bowel', 'boxer', 'brace', 'braid', 'brail', 'brain', 'brake', 'brand', 'brash', 'brass', 'brave', 'bravo', 'brawl', 'brawn', 'bread', 'break', 'breed', 'briar', 'bribe', 'brick', 'bride', 'brief', 'brine', 'bring', 'brink', 'briny', 'brisk', 'broad', 'broil', 'broke', 'brood', 'brook', 'broom', 'broth', 'brown', 'brunt', 'brush', 'brute', 'buddy', 'budge', 'buggy', 'bugle', 'build', 'built', 'bulge', 'bulky', 'bully', 'bunch', 'bunny', 'burly', 'burnt', 'burst', 'bused', 'bushy', 'butch', 'butte', 'buxom', 'buyer', 'bylaw', 'cabal', 'cabby', 'cabin', 'cable', 'cacao', 'cache', 'cacti', 'caddy', 'cadet', 'cagey', 'cairn', 'camel', 'cameo', 'canal', 'candy', 'canny', 'canoe', 'canon', 'caper', 'caput', 'carat', 'cargo', 'carol', 'carry', 'carve', 'caste', 'catch', 'cater', 'catty', 'caulk', 'cause', 'cavil', 'cease', 'cedar', 'cello', 'chafe', 'chaff', 'chain', 'chair', 'chalk', 'champ', 'chant', 'chaos', 'chard', 'charm', 'chart', 'chase', 'chasm', 'cheap', 'cheat', 'check', 'cheek', 'cheer', 'chess', 'chest', 'chick', 'chide', 'chief', 'child', 'chili', 'chill', 'chime', 'china', 'chirp', 'chock', 'choir', 'choke', 'chord', 'chore', 'chose', 'chuck', 'chump', 'chunk', 'churn', 'chute', 'cider', 'cigar', 'cinch', 'circa', 'civic', 'civil', 'clack', 'claim', 'clamp', 'clang', 'clank', 'clash', 'clasp', 'class', 'clean', 'clear', 'cleat', 'cleft', 'clerk', 'click', 'cliff', 'climb', 'cling', 'clink', 'cloak', 'clock', 'clone', 'close', 'cloth', 'cloud', 'clout', 'clove', 'clown', 'cluck', 'clued', 'clump', 'clung', 'coach', 'coast', 'cobra', 'cocoa', 'colon', 'color', 'comet', 'comfy', 'comic', 'comma', 'conch', 'condo', 'conic', 'copse', 'coral', 'corer', 'corny', 'couch', 'cough', 'could', 'count', 'coupe', 'court', 'coven', 'cover', 'covet', 'covey', 'cower', 'coyly', 'crack', 'craft', 'cramp', 'crane', 'crank', 'crash', 'crass', 'crate', 'crave', 'crawl', 'craze', 'crazy', 'creak', 'cream', 'credo', 'creed', 'creek', 'creep', 'creme', 'crepe', 'crept', 'cress', 'crest', 'crick', 'cried', 'crier', 'crime', 'crimp', 'crisp', 'croak', 'crock', 'crone', 'crony', 'crook', 'cross', 'croup', 'crowd', 'crown', 'crude', 'cruel', 'crumb', 'crump', 'crush', 'crust', 'crypt', 'cubic', 'cumin', 'curio', 'curly', 'curry', 'curse', 'curve', 'curvy', 'cutie', 'cyber', 'cycle', 'cynic', 'daddy', 'daily', 'dairy', 'daisy', 'dally', 'dance', 'dandy', 'datum', 'daunt', 'dealt', 'death', 'debar', 'debit', 'debug', 'debut', 'decal', 'decay', 'decor', 'decoy', 'decry', 'defer', 'deign', 'deity', 'delay', 'delta', 'delve', 'demon', 'demur', 'denim', 'dense', 'depot', 'depth', 'derby', 'deter', 'detox', 'deuce', 'devil', 'diary', 'dicey', 'digit', 'dilly', 'dimly', 'diner', 'dingo', 'dingy', 'diode', 'dirge', 'dirty', 'disco', 'ditch', 'ditto', 'ditty', 'diver', 'dizzy', 'dodge', 'dodgy', 'dogma', 'doing', 'dolly', 'donor', 'donut', 'dopey', 'doubt', 'dough', 'dowdy', 'dowel', 'downy', 'dowry', 'dozen', 'draft', 'drain', 'drake', 'drama', 'drank', 'drape', 'drawl', 'drawn', 'dread', 'dream', 'dress', 'dried', 'drier', 'drift', 'drill', 'drink', 'drive', 'droit', 'droll', 'drone', 'drool', 'droop', 'dross', 'drove', 'drown', 'druid', 'drunk', 'dryer', 'dryly', 'duchy', 'dully', 'dummy', 'dumpy', 'dunce', 'dusky', 'dusty', 'dutch', 'duvet', 'dwarf', 'dwell', 'dwelt', 'dying', 'eager', 'eagle', 'early', 'earth', 'easel', 'eaten', 'eater', 'ebony', 'eclat', 'edict', 'edify', 'eerie', 'egret', 'eight', 'eject', 'eking', 'elate', 'elbow', 'elder', 'elect', 'elegy', 'elfin', 'elide', 'elite', 'elope', 'elude', 'email', 'embed', 'ember', 'emcee', 'empty', 'enact', 'endow', 'enema', 'enemy', 'enjoy', 'ennui', 'ensue', 'enter', 'entry', 'envoy', 'epoch', 'epoxy', 'equal', 'equip', 'erase', 'erect', 'erode', 'error', 'erupt', 'essay', 'ester', 'ether', 'ethic', 'ethos', 'etude', 'evade', 'event', 'every', 'evict', 'evoke', 'exact', 'exalt', 'excel', 'exert', 'exile', 'exist', 'expel', 'extol', 'extra', 'exult', 'eying', 'fable', 'facet', 'faint', 'fairy', 'faith', 'false', 'fancy', 'fanny', 'farce', 'fatal', 'fatty', 'fault', 'fauna', 'favor', 'feast', 'fecal', 'feign', 'fella', 'felon', 'femme', 'femur', 'fence', 'feral', 'ferry', 'fetal', 'fetch', 'fetid', 'fetus', 'fever', 'fewer', 'fiber', 'fibre', 'ficus', 'field', 'fiend', 'fiery', 'fifth', 'fifty', 'fight', 'filer', 'filet', 'filly', 'filmy', 'filth', 'final', 'finch', 'finer', 'first', 'fishy', 'fixer', 'fizzy', 'fjord', 'flack', 'flail', 'flair', 'flake', 'flaky', 'flame', 'flank', 'flare', 'flash', 'flask', 'fleck', 'fleet', 'flesh', 'flick', 'flier', 'fling', 'flint', 'flirt', 'float', 'flock', 'flood', 'floor', 'flora', 'floss', 'flour', 'flout', 'flown', 'fluff', 'fluid', 'fluke', 'flume', 'flung', 'flunk', 'flush', 'flute', 'flyer', 'foamy', 'focal', 'focus', 'foggy', 'foist', 'folio', 'folly', 'foray', 'force', 'forge', 'forgo', 'forte', 'forth', 'forty', 'forum', 'found', 'foyer', 'frail', 'frame', 'frank', 'fraud', 'freak', 'freed', 'freer', 'fresh', 'friar', 'fried', 'frill', 'frisk', 'fritz', 'frock', 'frond', 'front', 'frost', 'froth', 'frown', 'froze', 'fruit', 'fudge', 'fugue', 'fully', 'fungi', 'funky', 'funny', 'furor', 'furry', 'fussy', 'fuzzy', 'gaffe', 'gaily', 'gamer', 'gamma', 'gamut', 'gassy', 'gaudy', 'gauge', 'gaunt', 'gauze', 'gavel', 'gawky', 'gayer', 'gayly', 'gazer', 'gecko', 'geeky', 'geese', 'genie', 'genre', 'ghost', 'ghoul', 'giant', 'giddy', 'gipsy', 'girly', 'girth', 'given', 'giver', 'glade', 'gland', 'glare', 'glass', 'glaze', 'gleam', 'glean', 'glide', 'glint', 'gloat', 'globe', 'gloom', 'glory', 'gloss', 'glove', 'glyph', 'gnash', 'gnome', 'godly', 'going', 'golem', 'golly', 'gonad', 'goner', 'goody', 'gooey', 'goofy', 'goose', 'gorge', 'gouge', 'gourd', 'grace', 'grade', 'graft', 'grail', 'grain', 'grand', 'grant', 'grape', 'graph', 'grasp', 'grass', 'grate', 'grave', 'gravy', 'graze', 'great', 'greed', 'green', 'greet', 'grief', 'grill', 'grime', 'grimy', 'grind', 'gripe', 'groan', 'groin', 'groom', 'grope', 'gross', 'group', 'grout', 'grove', 'growl', 'grown', 'gruel', 'gruff', 'grunt', 'guard', 'guava', 'guess', 'guest', 'guide', 'guild', 'guile', 'guilt', 'guise', 'gulch', 'gully', 'gumbo', 'gummy', 'guppy', 'gusto', 'gusty', 'gypsy', 'habit', 'hairy', 'halve', 'handy', 'happy', 'hardy', 'harem', 'harpy', 'harry', 'harsh', 'haste', 'hasty', 'hatch', 'hater', 'haunt', 'haute', 'haven', 'havoc', 'hazel', 'heady', 'heard', 'heart', 'heath', 'heave', 'heavy', 'hedge', 'hefty', 'heist', 'helix', 'hello', 'hence', 'heron', 'hilly', 'hinge', 'hippo', 'hippy', 'hitch', 'hoard', 'hobby', 'hoist', 'holly', 'homer', 'honey', 'honor', 'horde', 'horny', 'horse', 'hotel', 'hotly', 'hound', 'house', 'hovel', 'hover', 'howdy', 'human', 'humid', 'humor', 'humph', 'humus', 'hunch', 'hunky', 'hurry', 'husky', 'hussy', 'hutch', 'hydro', 'hyena', 'hymen', 'hyper', 'icily', 'icing', 'ideal', 'idiom', 'idiot', 'idler', 'idyll', 'igloo', 'iliac', 'image', 'imbue', 'impel', 'imply', 'inane', 'inbox', 'incur', 'index', 'inept', 'inert', 'infer', 'ingot', 'inlay', 'inlet', 'inner', 'input', 'inter', 'intro', 'ionic', 'irate', 'irony', 'islet', 'issue', 'itchy', 'ivory', 'jaunt', 'jazzy', 'jelly', 'jerky', 'jetty', 'jewel', 'jiffy', 'joint', 'joist', 'joker', 'jolly', 'joust', 'judge', 'juice', 'juicy', 'jumbo', 'jumpy', 'junta', 'junto', 'juror', 'kappa', 'karma', 'kayak', 'kebab', 'khaki', 'kinky', 'kiosk', 'kitty', 'knack', 'knave', 'knead', 'kneed', 'kneel', 'knelt', 'knife', 'knock', 'knoll', 'known', 'koala', 'krill', 'label', 'labor', 'laden', 'ladle', 'lager', 'lance', 'lanky', 'lapel', 'lapse', 'large', 'larva', 'lasso', 'latch', 'later', 'lathe', 'latte', 'laugh', 'layer', 'leach', 'leafy', 'leaky', 'leant', 'leapt', 'learn', 'lease', 'leash', 'least', 'leave', 'ledge', 'leech', 'leery', 'lefty', 'legal', 'leggy', 'lemon', 'lemur', 'leper', 'level', 'lever', 'libel', 'liege', 'light', 'liken', 'lilac', 'limbo', 'limit', 'linen', 'liner', 'lingo', 'lipid', 'lithe', 'liver', 'livid', 'llama', 'loamy', 'loath', 'lobby', 'local', 'locus', 'lodge', 'lofty', 'logic', 'login', 'loopy', 'loose', 'lorry', 'loser', 'louse', 'lousy', 'lover', 'lower', 'lowly', 'loyal', 'lucid', 'lucky', 'lumen', 'lumpy', 'lunar', 'lunch', 'lunge', 'lupus', 'lurch', 'lurid', 'lusty', 'lying', 'lymph', 'lynch', 'lyric', 'macaw', 'macho', 'macro', 'madam', 'madly', 'mafia', 'magic', 'magma', 'maize', 'major', 'maker', 'mambo', 'mamma', 'mammy', 'manga', 'mange', 'mango', 'mangy', 'mania', 'manic', 'manly', 'manor', 'maple', 'march', 'marry', 'marsh', 'mason', 'masse', 'match', 'matey', 'mauve', 'maxim', 'maybe', 'mayor', 'mealy', 'meant', 'meaty', 'mecca', 'medal', 'media', 'medic', 'melee', 'melon', 'mercy', 'merge', 'merit', 'merry', 'metal', 'meter', 'metro', 'micro', 'midge', 'midst', 'might', 'milky', 'mimic', 'mince', 'miner', 'minim', 'minor', 'minty', 'minus', 'mirth', 'miser', 'missy', 'mocha', 'modal', 'model', 'modem', 'mogul', 'moist', 'molar', 'moldy', 'money', 'month', 'moody', 'moose', 'moral', 'moron', 'morph', 'mossy', 'motel', 'motif', 'motor', 'motto', 'moult', 'mound', 'mount', 'mourn', 'mouse', 'mouth', 'mover', 'movie', 'mower', 'mucky', 'mucus', 'muddy', 'mulch', 'mummy', 'munch', 'mural', 'murky', 'mushy', 'music', 'musky', 'musty', 'myrrh', 'nadir', 'naive', 'nanny', 'nasal', 'nasty', 'natal', 'naval', 'navel', 'needy', 'neigh', 'nerdy', 'nerve', 'never', 'newer', 'newly', 'nicer', 'niche', 'niece', 'night', 'ninja', 'ninny', 'ninth', 'noble', 'nobly', 'noise', 'noisy', 'nomad', 'noose', 'north', 'nosey', 'notch', 'novel', 'nudge', 'nurse', 'nutty', 'nylon', 'nymph', 'oaken', 'obese', 'occur', 'ocean', 'octal', 'octet', 'odder', 'oddly', 'offal', 'offer', 'often', 'olden', 'older', 'olive', 'ombre', 'omega', 'onion', 'onset', 'opera', 'opine', 'opium', 'optic', 'orbit', 'order', 'organ', 'other', 'otter', 'ought', 'ounce', 'outdo', 'outer', 'outgo', 'ovary', 'ovate', 'overt', 'ovine', 'ovoid', 'owing', 'owner', 'oxide', 'ozone', 'paddy', 'pagan', 'paint', 'paler', 'palsy', 'panel', 'panic', 'pansy', 'papal', 'paper', 'parer', 'parka', 'parry', 'parse', 'party', 'pasta', 'paste', 'pasty', 'patch', 'patio', 'patsy', 'patty', 'pause', 'payee', 'payer', 'peace', 'peach', 'pearl', 'pecan', 'pedal', 'penal', 'pence', 'penne', 'penny', 'perch', 'peril', 'perky', 'pesky', 'pesto', 'petal', 'petty', 'phase', 'phone', 'phony', 'photo', 'piano', 'picky', 'piece', 'piety', 'piggy', 'pilot', 'pinch', 'piney', 'pinky', 'pinto', 'piper', 'pique', 'pitch', 'pithy', 'pivot', 'pixel', 'pixie', 'pizza', 'place', 'plaid', 'plain', 'plait', 'plane', 'plank', 'plant', 'plate', 'plaza', 'plead', 'pleat', 'plied', 'plier', 'pluck', 'plumb', 'plume', 'plump', 'plunk', 'plush', 'poesy', 'point', 'poise', 'poker', 'polar', 'polka', 'polyp', 'pooch', 'poppy', 'porch', 'poser', 'posit', 'posse', 'pouch', 'pound', 'pouty', 'power', 'prank', 'prawn', 'preen', 'press', 'price', 'prick', 'pride', 'pried', 'prime', 'primo', 'primp', 'print', 'prior', 'prism', 'privy', 'prize', 'probe', 'prone', 'prong', 'proof', 'prose', 'proud', 'prove', 'prowl', 'proxy', 'prude', 'prune', 'psalm', 'pubic', 'pudgy', 'puffy', 'pulpy', 'pulse', 'punch', 'pupal', 'pupil', 'puppy', 'puree', 'purer', 'purge', 'purse', 'pushy', 'putty', 'pygmy', 'quack', 'quail', 'quake', 'qualm', 'quark', 'quart', 'quash', 'quasi', 'queen', 'queer', 'quell', 'query', 'quest', 'queue', 'quick', 'quiet', 'quill', 'quilt', 'quirk', 'quite', 'quota', 'quote', 'quoth', 'rabbi', 'rabid', 'racer', 'radar', 'radii', 'radio', 'rainy', 'raise', 'rajah', 'rally', 'ralph', 'ramen', 'ranch', 'randy', 'range', 'rapid', 'rarer', 'raspy', 'ratio', 'ratty', 'raven', 'rayon', 'razor', 'reach', 'react', 'ready', 'realm', 'rearm', 'rebar', 'rebel', 'rebus', 'rebut', 'recap', 'recur', 'recut', 'reedy', 'refer', 'refit', 'regal', 'rehab', 'reign', 'relax', 'relay', 'relic', 'remit', 'renal', 'renew', 'repay', 'repel', 'reply', 'rerun', 'reset', 'resin', 'retch', 'retro', 'retry', 'reuse', 'revel', 'revue', 'rhino', 'rhyme', 'rider', 'ridge', 'rifle', 'right', 'rigid', 'rigor', 'rinse', 'ripen', 'riper', 'risen', 'riser', 'risky', 'rival', 'river', 'rivet', 'roach', 'roast', 'robin', 'robot', 'rocky', 'rodeo', 'roger', 'rogue', 'roomy', 'roost', 'rotor', 'rouge', 'rough', 'round', 'rouse', 'route', 'rover', 'rowdy', 'rower', 'royal', 'ruddy', 'ruder', 'rugby', 'ruler', 'rumba', 'rumor', 'rupee', 'rural', 'rusty', 'sadly', 'safer', 'saint', 'salad', 'sally', 'salon', 'salsa', 'salty', 'salve', 'salvo', 'sandy', 'saner', 'sappy', 'sassy', 'satin', 'satyr', 'sauce', 'saucy', 'sauna', 'saute', 'savor', 'savoy', 'savvy', 'scald', 'scale', 'scalp', 'scaly', 'scamp', 'scant', 'scare', 'scarf', 'scary', 'scene', 'scent', 'scion', 'scoff', 'scold', 'scone', 'scoop', 'scope', 'score', 'scorn', 'scour', 'scout', 'scowl', 'scram', 'scrap', 'scree', 'screw', 'scrub', 'scrum', 'scuba', 'sedan', 'seedy', 'segue', 'seize', 'semen', 'sense', 'sepia', 'serif', 'serum', 'serve', 'setup', 'seven', 'sever', 'sewer', 'shack', 'shade', 'shady', 'shaft', 'shake', 'shaky', 'shale', 'shall', 'shalt', 'shame', 'shank', 'shape', 'shard', 'share', 'shark', 'sharp', 'shave', 'shawl', 'shear', 'sheen', 'sheep', 'sheer', 'sheet', 'sheik', 'shelf', 'shell', 'shied', 'shift', 'shine', 'shiny', 'shire', 'shirk', 'shirt', 'shoal', 'shock', 'shone', 'shook', 'shoot', 'shore', 'shorn', 'short', 'shout', 'shove', 'shown', 'showy', 'shrew', 'shrub', 'shrug', 'shuck', 'shunt', 'shush', 'shyly', 'siege', 'sieve', 'sight', 'sigma', 'silky', 'silly', 'since', 'sinew', 'singe', 'siren', 'sissy', 'sixth', 'sixty', 'skate', 'skier', 'skiff', 'skill', 'skimp', 'skirt', 'skulk', 'skull', 'skunk', 'slack', 'slain', 'slang', 'slant', 'slash', 'slate', 'sleek', 'sleep', 'sleet', 'slept', 'slice', 'slick', 'slide', 'slime', 'slimy', 'sling', 'slink', 'sloop', 'slope', 'slosh', 'sloth', 'slump', 'slung', 'slunk', 'slurp', 'slush', 'slyly', 'smack', 'small', 'smart', 'smash', 'smear', 'smell', 'smelt', 'smile', 'smirk', 'smite', 'smith', 'smock', 'smoke', 'smoky', 'smote', 'snack', 'snail', 'snake', 'snaky', 'snare', 'snarl', 'sneak', 'sneer', 'snide', 'sniff', 'snipe', 'snoop', 'snore', 'snort', 'snout', 'snowy', 'snuck', 'snuff', 'soapy', 'sober', 'soggy', 'solar', 'solid', 'solve', 'sonar', 'sonic', 'sooth', 'sooty', 'sorry', 'sound', 'south', 'sower', 'space', 'spade', 'spank', 'spare', 'spark', 'spasm', 'spawn', 'speak', 'spear', 'speck', 'speed', 'spell', 'spelt', 'spend', 'spent', 'sperm', 'spice', 'spicy', 'spied', 'spiel', 'spike', 'spiky', 'spill', 'spilt', 'spine', 'spiny', 'spire', 'spite', 'splat', 'split', 'spoil', 'spoke', 'spoof', 'spook', 'spool', 'spoon', 'spore', 'sport', 'spout', 'spray', 'spree', 'sprig', 'spunk', 'spurn', 'spurt', 'squad', 'squat', 'squib', 'stack', 'staff', 'stage', 'staid', 'stain', 'stair', 'stake', 'stale', 'stalk', 'stall', 'stamp', 'stand', 'stank', 'stare', 'stark', 'start', 'stash', 'state', 'stave', 'stead', 'steak', 'steal', 'steam', 'steed', 'steel', 'steep', 'steer', 'stein', 'stern', 'stick', 'stiff', 'still', 'stilt', 'sting', 'stink', 'stint', 'stock', 'stoic', 'stoke', 'stole', 'stomp', 'stone', 'stony', 'stood', 'stool', 'stoop', 'store', 'stork', 'storm', 'story', 'stout', 'stove', 'strap', 'straw', 'stray', 'strip', 'strut', 'stuck', 'study', 'stuff', 'stump', 'stung', 'stunk', 'stunt', 'style', 'suave', 'sugar', 'suing', 'suite', 'sulky', 'sully', 'sumac', 'sunny', 'super', 'surer', 'surge', 'surly', 'sushi', 'swami', 'swamp', 'swarm', 'swash', 'swath', 'swear', 'sweat', 'sweep', 'sweet', 'swell', 'swept', 'swift', 'swill', 'swine', 'swing', 'swirl', 'swish', 'swoon', 'swoop', 'sword', 'swore', 'sworn', 'swung', 'synod', 'syrup', 'tabby', 'table', 'taboo', 'tacit', 'tacky', 'taffy', 'taint', 'taken', 'taker', 'tally', 'talon', 'tamer', 'tango', 'tangy', 'taper', 'tapir', 'tardy', 'tarot', 'taste', 'tasty', 'tatty', 'taunt', 'tawny', 'teach', 'teary', 'tease', 'teddy', 'teeth', 'tempo', 'tenet', 'tenor', 'tense', 'tenth', 'tepee', 'tepid', 'terra', 'terse', 'testy', 'thank', 'theft', 'their', 'theme', 'there', 'these', 'theta', 'thick', 'thief', 'thigh', 'thing', 'think', 'third', 'thong', 'thorn', 'those', 'three', 'threw', 'throb', 'throw', 'thrum', 'thumb', 'thump', 'thyme', 'tiara', 'tibia', 'tidal', 'tiger', 'tight', 'tilde', 'timer', 'timid', 'tipsy', 'titan', 'tithe', 'title', 'toast', 'today', 'toddy', 'token', 'tonal', 'tonga', 'tonic', 'tooth', 'topaz', 'topic', 'torch', 'torso', 'torus', 'total', 'totem', 'touch', 'tough', 'towel', 'tower', 'toxic', 'toxin', 'trace', 'track', 'tract', 'trade', 'trail', 'train', 'trait', 'tramp', 'trash', 'trawl', 'tread', 'treat', 'trend', 'triad', 'trial', 'tribe', 'trice', 'trick', 'tried', 'tripe', 'trite', 'troll', 'troop', 'trope', 'trout', 'trove', 'truce', 'truck', 'truer', 'truly', 'trump', 'trunk', 'truss', 'trust', 'truth', 'tryst', 'tubal', 'tuber', 'tulip', 'tulle', 'tumor', 'tunic', 'turbo', 'tutor', 'twang', 'tweak', 'tweed', 'tweet', 'twice', 'twine', 'twirl', 'twist', 'twixt', 'tying', 'udder', 'ulcer', 'ultra', 'umbra', 'uncle', 'uncut', 'under', 'undid', 'undue', 'unfed', 'unfit', 'unify', 'union', 'unite', 'unity', 'unlit', 'unmet', 'unset', 'untie', 'until', 'unwed', 'unzip', 'upper', 'upset', 'urban', 'urine', 'usage', 'usher', 'using', 'usual', 'usurp', 'utile', 'utter', 'uvula', 'vague', 'valet', 'valid', 'valor', 'value', 'valve', 'vapid', 'vapor', 'vault', 'vaunt', 'vegan', 'venom', 'venue', 'verge', 'verse', 'verso', 'verve', 'vicar', 'video', 'vigil', 'vigor', 'villa', 'vinyl', 'viola', 'viper', 'viral', 'virus', 'visit', 'visor', 'vista', 'vital', 'vivid', 'vixen', 'vocal', 'vodka', 'vogue', 'voice', 'voila', 'vomit', 'voter', 'vouch', 'vowel', 'vying', 'wacky', 'wafer', 'wager', 'wagon', 'waist', 'waive', 'waltz', 'warty', 'waste', 'watch', 'water', 'waver', 'waxen', 'weary', 'weave', 'wedge', 'weedy', 'weigh', 'weird', 'welch', 'welsh', 'wench', 'whack', 'whale', 'wharf', 'wheat', 'wheel', 'whelp', 'where', 'which', 'whiff', 'while', 'whine', 'whiny', 'whirl', 'whisk', 'white', 'whole', 'whoop', 'whose', 'widen', 'wider', 'widow', 'width', 'wield', 'wight', 'willy', 'wimpy', 'wince', 'winch', 'windy', 'wiser', 'wispy', 'witch', 'witty', 'woken', 'woman', 'women', 'woody', 'wooer', 'wooly', 'woozy', 'wordy', 'world', 'worry', 'worse', 'worst', 'worth', 'would', 'wound', 'woven', 'wrack', 'wrath', 'wreak', 'wreck', 'wrest', 'wring', 'wrist', 'write', 'wrong', 'wrote', 'wrung', 'wryly', 'yacht', 'yearn', 'yeast', 'yield', 'young', 'youth', 'zebra', 'zesty', 'zonal'
    ]

print(f'{len(WORDLE_WORD_LIST) = }')


len(WORDLE_WORD_LIST) = 2317


In [4]:
"""
_aste
_ater
_eter
_ight
_udge

__ade
__and
__ank
__ant
__ate
__ead
__ear
__eat
__een
__int
__ite
__nal
__ral
__rch
__own
__unt
__ute
__tal
__tch
__ven
_ois_
_rai_
sca__
sco__
sta__
ste__
sti__
sto__
stu__
the__
thi__

_ea__
_ie__
_oa__
_oi__
_ou__
ch___
sh___
si___
sk___
sl___
__ai_
__ie_
__oi_
__ou_
___ch
___en
___ey
___ly
___rd
___th
"""

def alpha_segments(wrds, line_length=12, multiple_lines_per_letter=None):
    """
    Convert a list of words into a single string for printing.
    The string will be broken into lines by '\n' characters.
    - line_length is number of words per line.
    - multiple_lines_per_letter: may a line of words be split among multiple lines?
      If not, truncate lines with more than line_length words
    Order the lines of words alphabetically.
    """
    if wrds == [] or line_length <= 0: return ''

    alphabet = 'abcdefghijklmnopqrstuvwxyz'
    # lsts_of_wrds is lists of words, one list for words starting each letter of the alphabet.
    # Also, drop empy lists, i.e., lists corresponding to letters that start no words.
    lsts_of_wrds = [lst for lst in [[w for w in wrds if w[0] == ltr] for ltr in alphabet] if len(lst) > 0]
    lngth_of_longest_word_list = max([len(lst) for lst in lsts_of_wrds])

    # If multiple_lines_per_letter was set by the caller, keep it, otherwise set it
    # to depend on whether lngth_of_longest_word_list > line_length
    multiple_lines_per_letter = (multiple_lines_per_letter if multiple_lines_per_letter is not None
                                 else lngth_of_longest_word_list > line_length)

    if multiple_lines_per_letter:
        # For each list of words:
        # a) Add an extra '\n\t' in front of each list. This puts an empty line between the lines
        #    for each member of the alphabet--making it easier to read. Do this by adding '\n\t'
        #    as characters to the first word in each list.
        # b) If a list is too long, break it into multiple lists. The resulting lists will have
        #    length <= line_length.
        # bnded_lsts_of_wrds will be lists of words, each list of length <= line_length
        bnded_lsts_of_wrds = []
        for lst in lsts_of_wrds:
            # lst is originally a list of words starting with the same letter
            lst = ['\n\t' + lst[0]] + lst[1:]
            # Like bnded_lsts_of_wrds, lsts will be a list of lists of words,
            # each no lonter than line_length. Add those lists to bnded_lsts_of_wrds
            lsts = [lst[i:i + line_length] for i in range(0, len(lst), line_length)]
            bnded_lsts_of_wrds += lsts

    else:
        # multiple_lines_per_letter = False

        # Since we are not splitting up long lists, truncate them and add '...' if too long.
        # Also, add no extra space between lines as we did with the extra '\n\t' additions.
        bnded_lsts_of_wrds = [(lst[:line_length] + ([] if len(lst) <= line_length else ['...']))
                              for lst in lsts_of_wrds]

    # Join each list of words into a string with the words separated by ', '
    # Add \t in front of each string.
    # Add a comma at the end of each string if the end is not '...'.
    # Use the pack/unpack (*) operator to isolate the last string.
    *list_of_strings, last_string = [('\t' + ', '.join(lst) + (',' if lst[-1] != '...' else ''))
                                     for lst in bnded_lsts_of_wrds]
    # If the last_string ends with a ',', remove it.
    if last_string[-1] == ',':
        last_string = last_string[:-1]

    # Combine all the strings into one, separated by '\n'
    final_string = "\n".join(list_of_strings + [last_string])

    # If multiple_lines_per_letter, final_string will begin with an extraneous '\n\t'. Remove it.
    final_string = final_string[2:] if multiple_lines_per_letter else final_string

    return final_string


# alpha_segments() tests
def test_alpha_segments():
    print(f'1. {alpha_segments([]) = }\n')
    print(f'2. {alpha_segments(WORDLE_WORD_LIST[:150]) = }\n')
    print(f'3. {alpha_segments(WORDLE_WORD_LIST[2200:], multiple_lines_per_letter=True) = }\n')
    print(f'4. {alpha_segments(WORDLE_WORD_LIST[2200:], multiple_lines_per_letter=False) = }\n')
    print(f'5. {alpha_segments(WORDLE_WORD_LIST[2200:]) = }\n')
    print(f'6. {alpha_segments(WORDLE_WORD_LIST[2293:]) = }\n')



In [5]:
# Rich IO functions

console = Console(width=120, theme=WORDLE_THEME, highlight=False)

def console_print(content, style=None, justify=None, end='\n', sep=' '):
    console.print(content, style=style, justify=justify, end=end, sep=sep)

def guess_a_word(previous_guesses) -> str:
    guess = console.input("\nGuess word: ").lower().strip()

    if guess in previous_guesses:
        console_print(f"You've already guessed {guess}.", style="warning")
        return guess_a_word(previous_guesses)

    if len(guess) != WORD_LENGTH:
        console_print(
            f"Your guess must be {WORD_LENGTH} letters.", style="warning"
                                )
        return guess_a_word(previous_guesses)

    if any((invalid := letter) not in ascii_letters for letter in guess):
        console_print(
            f"Invalid letter: '{invalid}'. Please use only {ascii_lowercase}, lower or upper case.",
            style="warning",
                                )
        return guess_a_word(previous_guesses)

    return guess


def panel(content,
          emoji_left='',  # ":mage:",
          title_style=PANEL_TITLE_STYLE,
          title_text=''*10,
          emoji_right='',  # ":bulb:",
          foreground_color=PANEL_FOREGROUND_COLOR,
          background_color=PANEL_BACKGROUND_COLOR,
          border_color=PANEL_BORDER_COLOR):
    return Panel(content,
                #  title=f'[{title_style}]:{emoji_left}: {title_text} :{emoji_right}:',
                 title=f'[{title_style}] {title_text} [/]',
                 style=f'{foreground_color} on {background_color}',
                 expand=True,
                 border_style=border_color)


def panel_print(content, title_text=''*10, justify=None):
    console.print(panel(content, title_text=title_text), justify=justify)


def print_data_panels(cndt_prfl, current_step, filter):
    """
    Generates and prints the panels defined below.
    """
    len_candidates = len(cndt_prfl.current_candidates)

    # ------------------------- Remaining secret-word candidates -------------------------

    str_1 = f'{len_candidates} {"remaining" if current_step > 1 else "initial"} ' + \
            f'secret-word candidate{add_s(len_candidates > 1)}'
    str_2 = str_1 + ('' if current_step == 1 else '\n' + alpha_segments(cndt_prfl.current_candidates))
    title_text_1 = f'The {"initial" if current_step == 1 else "current"} state of the game'
    panel_print(str_2, title_text=title_text_1)

    # ------------------------- Table of letters in positions and list of letters ---------

    # The table of frequencies by word position
    str_3 = '\n'.join(  [f"{pos}. {', '.join(  [f'{ltr}: {cnt}' for ltr, cnt in counter.most_common()[:10]]  )}"
                         for pos, counter in enumerate(cndt_prfl.position_profiles, 1)]
                      )

    # The list of most common letters overall
    str_4 = "\n\nLetters that appear most often in the secret-word candidates:\n\t"
    str_5 = '' if len(cndt_prfl.letter_profiles) <= 10 else f' (out of {len(cndt_prfl.letter_profiles)})'
    str_6 = f'{str_4}{", ".join([f"{ltr}: {cnt}" for ltr, cnt in cndt_prfl.letter_profiles.most_common()[:10]])} {str_5}'

    title_text_2 = f"Letter position frequencies in secret-word candidate{'s' if len_candidates > 1 else ''}"
    panel_print(str_3 + str_6, title_text=title_text_2)

    # ------------------------ Filter ----------------------------------------------------

    str_7 = filter.format_filter(cndt_prfl)
    panel_print(str_7, title_text='Filter')



In [6]:
# Rich Styles functions

def add_style(elt, style):
    # The system works entirely in lower case. But it displays in upper case.
    # This is where the transition from lower to upper takes place.
    return elt if style is None else f"[{style}]{elt.upper()}[/]"


def format_alphabet(scored_alphabet: Dict[str, str]) -> str:
    # Clever unzip()
    letters, scores = zip(*scored_alphabet.items())

    # scores is of type Tuple[str, str, ..., str] with an unknown number of elements. (Each element is a
    # single character.) We can pass it to scores_to_styles() by declaring the scores_to_styles() parameter
    # as an Iterable or a Sequence--or by not declaring the scores_to_styles() parameter at all!
    styled_alphabet = join_with_scores(letters, scores, ' ')
    return '\n' + styled_alphabet


def format_guesses(guesses: List[str], guess_scores: List[str]) -> List[str]:
    # Format the sequences of guesses, one styled guess per line.
    styled_guesses: List[str] = [join_with_scores(guess, guess_score, ' ')
                                                        for guess, guess_score in zip(guesses, guess_scores)]
    return styled_guesses


def join_with_scores(letters, scores, join_elt) -> str:
    styles = scores_to_styles(scores)
    return join_with_styles(letters, styles, join_elt)


def join_with_styles(letters, styles, join_elt) -> str:
    styled_letters = [add_style(letter, letter_style) for letter, letter_style in zip(letters, styles)]
    return join_elt.join(styled_letters)


def scores_to_styles(scores: Iterable[Literal['g', 'y', '~', '_']]) -> List[str]:
    # The hint for scores is not necessary. But if we want to declare it, we must declare it
    # as an Iterable or Sequence. (Sequence is a subtype of Iterable that allows access by
    # index.) This allows us to pass to scores_to_styles() arguments of both type str and
    # type Tuple[str, str, ..., str], i.e., a tuple of an unknown number of strings. We pass
    # both from show_guesses_and_letters(). See above in
    return [STYLES[score] for score in scores]


In [7]:

# Utility functions

def add_s(s_condition):
    return 's' if s_condition else ''


def lists_to_many_lists(lst, list_length):
    lsts = [lst[i:i + list_length] for i in range(0, len(lst), list_length)]
    return lsts


def dict_to_row_strings(dct: Dict[str, Tuple[float, float]]) -> str:
    spacing = '    '
    rows = [    [f'{key}: ({avg:3}, {med:2}, )'
                 for key, (avg, med) in list(dct.items())[i: i+5]]
            for i in range(0, len(dct), 5)]
    row_strings = [f',{spacing}'.join(row) for row in rows]
    full_string = spacing + f'\n{spacing}'.join(row_strings)
    return full_string


def filter_guess_lists(guess_lists: List[List[str]]) -> List[List[str]]:

    guess_lists_and_sets: List[Tuple[List[str], Set[str]]] = []

    for guess_list in guess_lists:

        if len(guess_list) == 0:
            continue

        guess_set = set(guess_list)
        to_be_added = True
        for list_x, set_x in guess_lists_and_sets:
            if guess_set.issubset(set_x):
                to_be_added = False
                continue
            else:
                if guess_set.issuperset(set_x):
                    guess_lists_and_sets.remove((list_x, set_x))
        if to_be_added:
            guess_lists_and_sets.append((guess_list, guess_set))

    filtered_list = [list for list, _ in guess_lists_and_sets]

    return filtered_list


def get_score_from_user():
    while True:
        print('Please use the following letters to enter a 5-character score: g - Green; y - Yellow; ~ - gray')
        score = (input('    Score > ') + ' '*5)[:5].strip().lower()
        if len(score) == 5 and set(score).issubset(set('gy~')):
            return score
        elif len(score) != 5:
            print(f'\n==> Your score entry "{score}" has {len(score)} letters rather than the needed 5.')
        elif not set(score).issubset(set('gy~')):
            print(f'\n==> Your score entry "{score}" includes letters other than "g", "y", and "~".')
        else:
            print(f'\n==> Something is amiss with your score entry "{score}." Please try again.')


def get_user_guess(current_step, guess_list: List[str], filter, hard_mode: bool=True):
    # menu = guess_lists
    menu_lines = lists_to_many_lists(guess_list, 10)
    output_str = ''
    while True:
        output_str += "Enter either (a) your guess or (b) your selection from the following menu.\n"
        output_str += "To select from the menu, enter either an item number or the desired item itself.\n\n"

        strs = '\n'.join([" ".join([f"{nbr:2}. {menu_item}"
                                    for nbr, menu_item in enumerate(menu_lines[line_nbr], 10*line_nbr + 1)])
                          for line_nbr in range(len(menu_lines))])
        output_str += strs
        panel_print(output_str, title_text='Make a guess')

        user_guess = (input(f'\nGuess {current_step}   > ') + ' '*5)[:5].strip().lower()

        if len(user_guess) <= 3:    # s1 = "sub(r'\D', ', '   ds3f  ')"
            item_number = sub(r'\D', '', user_guess)
            if '.' in item_number:
                item_number = item_number.replace('.', '')
            if item_number.isnumeric():
                selection = guess_list[int(item_number)-1]
                return selection

        elif len(user_guess) == 5 and user_guess.isalpha():
            if hard_mode and not filter.word_passes(user_guess):
                print(f'Hard-mode violation: "{user_guess}" fails one or more known constraints.\n')
            else:
                return user_guess

        # No "else" in case the user enters a single non-digit character
        output_str = f'\nPlease enter a one-digit number, a two-digit number, or a sequence of 5 alphabetic characters.\n'


def get_user_secret_word(current_candidates, guess_words):
    while True:
        print(f'Please enter a {WORD_LENGTH}-letter secret word.')
        print('Or enter "help" for the list of possible secret words.')
        print('Or enter "no" to have the system pick a secret word.\n')
        user_secret = input(' secret word > ').lower().strip()

        # Do these cases first
        match user_secret:

            case 'no':
                user_secret = choice(current_candidates)
                print('A secret word has been selected.')
                return user_secret

            case 'help':
                print(alpha_segments(guess_words, line_length=19, multiple_lines_per_letter=True))
                continue

        if len(user_secret) != WORD_LENGTH:
                comparator = "more" if len(user_secret) > WORD_LENGTH else "fewer"
                print(f'"{user_secret}" has {comparator} than 5 characters.')
                continue

        if user_secret not in current_candidates:
                print(f'"{user_secret}" isn\'t in the list of currently allowable secret words.')
                continue

        # Since we found no reason to reject user_secret, return it as the secret word.
        return user_secret


def int_to_str(n: int, w=2):
    st = f"{n:{w}}"
    return st


def raw_remaining_to_pct_removed(raw, total):
    return int(round(100*(total - raw)/total))


def score_a_guess(secret, guess) -> str:
    '''
    Build a list with one char for each guess element.
    g = Letter in Correct position (green);
    y = Letter in Incorrect position (yellow);
    ~ = Letter not in secret word. (These are the gray results. Can't use 'g' since we use it for green)

    In the following example: secret    = 'hpaay'
                                guess     = 'aaapz'
                                -> score  = 'y~gy~'

    The third letter in both the secret and the guess is "a".
        So the third letter in the score must be "g".
        (The matching letters must be determined first.)
    The first letter in the guess, 'a', is associated with the fourth letter in the secret, 'a'.
        So the first letter in the score is "y".
    The second letter in the guess, 'a', has nothing in secret to be associated with.
        So the second letter in the score is "~".
    The other two letter in the score should be self explanatory.
    '''

    # Generate the score list with the letters in the correct position marked 'g'.
    # All non-green elements are initially marked '~'. These will be changed to 'y'
    # later if appropriate.
    score = [('g' if sec_ltr == guess_ltr else '~') for sec_ltr, guess_ltr in zip(secret, guess)]

    # Generate a list of the "holes," i.e., secret letters that do not have correct corresponding guess letters.
    # secret_holes = [secret[i] for i in range(word_length) if score[i] != 'g']
    secret_holes = [sec_ltr for sec_ltr, score_ltr in zip(secret, score) if score_ltr != 'g']

    if DEBUG: print(f'{secret_holes = }')

    # For each available secret hole find a matching guess letter. Ignore guess letters that are paired with
    # their secret letters. Score the first match as 'y' and don't look further.
    for ltr in secret_holes:
        for i in range(len(score)):
            # Have we found a hole-filling letter in guess that is not otherwise committed?
            # If so, mark it 'y'. Since we have now filled this hole, go on to the next hole.
            if guess[i] == ltr and score[i] == '~':
                score[i] = 'y'
                break

    # Return score as a string
    score_string = ''.join(score)
    if DEBUG: print(f'\n\t{score_string = }\n\n')

    return score_string


def smallest_n(dict: Dict[str, int], n=20) -> Dict[str, int]:
    """
    Given a dictionary of type Dict[str, Tuple[int, int]], sort the keys with
    the smallest Tuple values.
    """
    sorted_elts: List[str] = sorted(dict, key=lambda guess: dict[guess])[:n]
    the_best = {guess: dict[guess] for guess in sorted_elts}
    return the_best



In [8]:

class Filter():

    def __init__(self):

        # self.word_length = word_length

        # The filter is the combination of self.greens, self.grays, self.yellows, and self.disallowed.
        # self.greens are the known letters; '_' if not known.
        # Initially none of the letters are known.
        self.greens = list('_____')

        # self.grays are letters known not to be in the word. These are lettters that were guessed
        # and returned gray, i.e., not in the word. Not included as grays are lettters that returned
        # gray because they are known greens or known to be unmatched from a yellow.
        self.grays = set()

        # self.disallowed are the letters that are known not to be at specific positions,
        # e.g., letters that have been guessed at a position and have been yellow
        self.disallowed: List[set] = [set() for _ in range(WORD_LENGTH)]

        # A Counter of Counters. A count of its still-hidden instances for each hidden letter.
        self.yellows = Counter()


    def __str__(self):
        return f'*** {self.greens = }\n*** {self.grays = }\n*** {self.disallowed = }\n*** {self.yellows = }'


    def edit_filter(self, guess, score) -> Self:   #  -> Filter
        """
        Update the filter based on the guess and its score.

        The score characters are interpreted as follows.
        'g': the letter is at its correct position in guess.
        'y': the letter is not at its correct position in guess but is elsewhere in the word
        '~': the letter is not at any position in the word that is neither green nor assigned
        """

        # Since we use the Iterator enumerate(zip(self.guess, self.score))
        # multiple times, convert it to a concrete list to allow reuse.
        enumerated_guess_score = list(enumerate(zip(guess, score)))

        # Update the greens first.
        for index, (guess_ltr, score_letter) in enumerated_guess_score:

            # If this is a new green: self.greens[index] == '_'
            if score_letter == 'g' and self.greens[index] == '_':
                # Record the green value
                self.greens[index] = guess_ltr
                # This shouldn't be necessary.
                self.grays.discard(guess_ltr)

                # Decrement the yellow_counter for guess_ltr if there is one
                if self.yellows[guess_ltr] <= 1:
                    del self.yellows[guess_ltr]
                else: # self.yellows[guess_ltr] > 1:
                    self.yellows[guess_ltr] -= 1
            # If this is an existing green, i.e., self.greens[index] == guess_ltr, do nothing


        # Update the 'y's, i.e., update self.disallowed and self.yellows.

        # Update self.disallowed by adding the yellow letters to the disallowed sets for each yellow letter position
        for index, (guess_ltr, score_letter) in enumerated_guess_score:
            if score_letter == 'y':
                self.disallowed[index].add(guess_ltr)

        # Update self.yellows:
        # First, make a local yellow Counter for the y-letters in the score for this guess.
        # The following will count the number of occurrences of each yellow letter.
        yellows = Counter([guess_ltr for _, (guess_ltr, score_letter) in enumerated_guess_score
                                     if score_letter == 'y'])

        # Update yellows.
        # The new value is the max value from yellows and self.yellows
        for guess_ltr in yellows:
            empty_holes = [(green == guess_ltr and sc != 'g') for green, sc in zip(self.greens, score)].count(True)
            self.yellows[guess_ltr] = max(self.yellows[guess_ltr], yellows[guess_ltr] - empty_holes)

        # Update self.grays and self.disallowed.
        # Add guess_ltr to self.grays if it is marked # and is in neither self.greens nor self.yellows
        # If guess_ltr is in self.greens or self.yellows, add it to self.disallowed[index] but not to self.grays
        for index, (guess_ltr, score_ltr) in enumerated_guess_score:
            if score_ltr == '~':
                self.disallowed[index].add(guess_ltr)
                # Did it get a # because it is in the secret word but its
                # hole was already filled when this letter was scored?
                if guess_ltr in self.greens or guess_ltr in self.yellows:
                    # This shouldn't be necessary.
                    self.grays.discard(guess_ltr)
                else:
                    self.grays.add(guess_ltr)

        return self


    def edit_filter_from_profiles(self, position_profiles) -> List[str]:
        """
        Update the filter to reflect any de facto greens
        """
        greens_profile_keys = zip(self.greens, [list(prfl.keys()) for prfl in position_profiles])
        new_greens = [(keys[0] if ltr == '_' and len(keys) == 1 else ltr) for ltr, keys in greens_profile_keys]
        old_greens = self.greens
        self.greens = new_greens

        # Identify the new green letters.
        new_green_ltrs = [new_ltr for new_ltr, old_ltr in zip(new_greens, old_greens) if new_ltr != old_ltr]

        # No need to test whether new_green_ltrs is empty.
        # If len(new_green_ltrs) == 0, the following loop will do nothing.
        for ltr in new_green_ltrs:
            self.yellows[ltr] -= 1
            if self.yellows[ltr] <= 0:
                del self.yellows[ltr]

        return new_green_ltrs


    def format_filter(self, cndt_prfl):
        str1 = f'Placed letters: {"".join(self.greens)}\n'
        unplaced = sorted(self.yellows.keys())
        disalloweds = [(ltr, [str(n+1) for n in range(len(self.disallowed)) if ltr in self.disallowed[n]]) for ltr in unplaced]
        unplaced_ltrs = [f'"{ltr}" but not in position{add_s(len(ns) != 1)} {", ".join(ns)}' for ltr, ns in disalloweds]

        str2 = 'Required but unplaced letters:' + \
               (('\n\t\t' + "\n\t\t".join(unplaced_ltrs)) if len(unplaced_ltrs) > 0 else ' none')
        str3 = '\nExcluded letters: ' + \
               (('"' + '", "'.join(sorted(self.grays)) + '"') if len(self.grays) > 0 else 'none') + '\n'

        len_uncat = len(cndt_prfl.uncategorized_letters)
        nbr_to_show = 10
        str_out_of = ('' if len_uncat <= nbr_to_show else f' (out of {len_uncat})')
        uncat = ', '.join([f'{ltr}: {cnt}' for ltr, cnt in cndt_prfl.uncategorized_letters.most_common()[:nbr_to_show]])

        str4 = f'Uncategorized letter{add_s(len_uncat != 1)}' + (':' if len_uncat > 0 else '') + \
               (('\n\t' + uncat + str_out_of) if len_uncat > 0 else '')

        return str1 + str2 + str3 + str4


    def guess_to_pattern(self, trial_guess):

        pattern = list(trial_guess)
        for index, ltr in enumerate(trial_guess):

            if self.greens[index] != '_' or ltr in self.disallowed[index]:
                pattern[index] = '~'

        for yellow_ltr, nbr_needed in self.yellows.items():
            for index, ltr in enumerate(trial_guess):
                if ltr == yellow_ltr:
                    nbr_needed -= 1
                    pattern[index] = '~'
                if nbr_needed == 0:
                    break

        return ''.join(pattern)


    def word_passes(self, word):
        '''
        A word fails the filter if:
        - It contains a gray letter
        - It fails to match a known (i.e., green) letter
        - It includes a letter in one of the disallowed sets
        - It is missing a required hidden yellow letter
        '''
        # This loop makes the first three tests.
        for index, ltr in enumerate(word):
            # print(index, ltr, self.disallowed[index], end = ';  ')
            if (ltr in self.grays or
                # If self.greens[index] not in {'_', ltr}, the secret word's
                # letter at index is known to be something other than ltr.
                self.greens[index] not in {'_', ltr} or
                ltr in self.disallowed[index]
                ): return False

        # The following makes the yellows test.

        # Count the instances of the letters in word
        letter_counter = Counter(word)

        # Count the instances of the letters in self.greens
        green_counters = Counter(self.greens)

        # self.yellows[ltr] is the known number of hidden positions in the secret word where ltr appears.
        # green_counters[ltr] is the number of known positions in the secret word where ltr appears.
        # I.e., the secret word requires at least self.yellows[ltr] + green_counters[ltr] ltr instances.
        # If letter_counter[ltr] is less than self.yellows[ltr] + green_counters[ltr], the word fails
        # because it has fewer instances of ltr than the secret word is known to require.
        for ltr in self.yellows:
            if letter_counter[ltr] < self.yellows[ltr] + green_counters[ltr]: return False

        return True



In [9]:

class Guess():
    """
    Instances are guesses along with metrics for them.
    """

    # These are the top 30 guesses for the first step.
    step_1 = {'arise': (93, 97, 55), 'raise': (93, 97, 55), 'arose': (92, 97, 54), 'alone': (92, 97, 51),
              'irate': (92, 97, 51), 'aisle': (92, 97, 44), 'alter': (92, 97, 42), 'alert': (92, 97, 40),
              'atone': (92, 96, 46), 'later': (92, 96, 45), 'ratio': (92, 96, 32), 'audio': (92, 96, 27),
              'snare': (91, 97, 58), 'saner': (91, 96, 58), 'canoe': (91, 96, 52), 'teary': (91, 96, 52),
              'noise': (91, 96, 51), 'alien': (91, 96, 46), 'early': (91, 96, 46), 'leant': (91, 96, 46),
              'trail': (91, 96, 45), 'relay': (91, 96, 43), 'trial': (91, 96, 43), 'loser': (91, 96, 42),
              'yeast': (91, 96, 42), 'layer': (91, 96, 41), 'learn': (91, 96, 40), 'yearn': (91, 96, 36),
              'renal': (91, 96, 35), 'ocean': (91, 96, 24)
              }


    def __init__(self, guess, current_candidates=WORDLE_WORD_LIST, included_letters=set(), current_step=1):

        self.guess = guess
        self.current_candidates = current_candidates

        # These three metrics are set differently depending on whether we are doing step 1.
        self.pct_words_hit = 0
        self.pct_pos_matched = 0
        self.nbr_remaining_mean = 0

        if current_step == 1:
            len_cc = len(current_candidates)

            if guess in Guess.step_1:
                # Use the step_1 dict
                """
                The metrics in the step_1 dict are:
                    self.pct_words_hit,
                    raw_remaining_to_pct_removed(self.nbr_remaining_mean, len_cc),
                    self.pct_pos_matched,
                """

                self.pct_words_hit, pct_removed, self.pct_pos_matched = Guess.step_1[guess]
                # raw_remaining_to_pct_removed(self.nbr_remaining_mean, len_cc),
                self.nbr_remaining_mean = int(round(len_cc*(100 - pct_removed)/100))

            else:
                self.pct_words_hit = self.pct_pos_matched = 0
                self.nbr_remaining_mean = len_cc

        else:
            self.pct_words_hit = Guess.pct_of_words_hit(guess, current_candidates, included_letters)

            self.pct_pos_matched = Guess.pct_current_cndt_pos_matched(guess, current_candidates, included_letters)

            self.nbr_remaining_mean, Guess.fwst_remaining(current_candidates, guess)


    def __str__(self) -> str:
        return f"{self.guess}: {self.metrics()}"


    @staticmethod
    def fwst_remaining(current_candidates, guess) -> int:
        """
        Determine the mean (and median) number of remaining candidates for each possible secret word.
        """
        # print('\n ** Start: fwst_remaining\n')
        nbr_curr_cndts = len(current_candidates)
        # Keep track of the number of remaining candidates for each possible secret word.
        nbr_remaining_lst: List[float] = []
        for secret_word in current_candidates:
            # Score the guess with secret_word as the secret word
            score = score_a_guess(guess, secret_word)
            # Create a filter for that combination
            filter = Filter().edit_filter(guess, score)
            # Use the created filter to filter current_candidates
            nbr_remaining_candidates = len([word for word in current_candidates if filter.word_passes(word)])
            # nbr_remaining = nbr_curr_cndts - len([word for word in current_candidates if filter.word_passes(word)])
            nbr_remaining_lst.append(nbr_remaining_candidates)

        # Use an arithmetic average over all the number-remaining values.
        # Use statistics.fmean (float mean), which is faster than statistics.mean.
        nbr_remaining_mean = int(round(fmean(nbr_remaining_lst)))

        # Use the median remaining candidates number as the metric for guess.
        # nbr_remaining_median = int(round(median(nbr_remaining_lst)))

        # print('\n ** End: fwst_remaining\n')
        return nbr_remaining_mean   # , nbr_remaining_median


    def metrics(self) -> Tuple:
        len_cc = len(self.current_candidates)
        return (self.pct_words_hit,
                raw_remaining_to_pct_removed(self.nbr_remaining_mean, len_cc),
                self.pct_pos_matched,
                )


    @staticmethod
    def pct_current_cndt_pos_matched(guess, current_candidates, included_letters):
        """
        Determine the percentage of candidate positions--excluding included_letters--
        that guess matches over all candidates.
        """
        list_nbr_uncat: List[int]= [int(guess[pos] == cndt[pos] and cndt[pos] not in included_letters)
                                    for pos in range(5)
                                    for cndt in current_candidates]
        avg_interscts_pct = 100*(sum(list_nbr_uncat)/len(current_candidates))
        avg_interscts = int(round(avg_interscts_pct))
        return avg_interscts


    @staticmethod
    def pct_of_words_hit(guess, current_candidates, included_letters) -> int:
        trgt_ltrs: set[str] = set(guess) - set(included_letters)
        words_hit: List[str]= [cndt for cndt in current_candidates if trgt_ltrs.intersection(set(cndt))]
        return int(round(100*len(words_hit)/len(current_candidates)))


    @staticmethod
    def y_g_reduce_fn(input: Tuple[int, int], trial_guess, next_candidate, greens, included_letters) -> Tuple[int, int]:
        score = score_a_guess(secret=trial_guess, guess=next_candidate)
        (yellow_sum, green_sum) = input
        output = (yellow_sum + score.count('y'), green_sum + score.count('g'))
        return output


    @staticmethod
    def pct_of_posns_hit(guess, current_candidates, included_letters) -> int:
        """
        Determine the percentage of candidate positions--excluding included_letters--
        that guess matches over all candidates.
        """
        trgt_ltrs: set[str] = set(guess) - set(included_letters)
        words_hit: List[str]= [cndt for cndt in current_candidates if trgt_ltrs.intersection(set(cndt))]
        return int(round(100*len(words_hit)/len(current_candidates)))



In [10]:
## Examples and Tests

fwst_rem =  {'raise': (61, 43, 70), 'arise': (64, 51, 70), 'irate': (64, 53, 70), 'arose': (66, 49, 70),
             'alter': (70, 60, 70), 'stare': (71, 51, 70), 'snare': (71, 54, 70), 'slate': (72, 51, 70),
             'alert': (72, 52, 70), 'crate': (73, 49, 70), 'trace': (74, 49, 70), 'aisle': (76, 57, 70),
             'alone': (77, 56, 70), 'crane': (79, 51, 70), 'react': (80, 48, 70), 'scare': (81, 53, 70),
             'alien': (84, 51, 70), 'caste': (85, 56, 70), 'heart': (89, 55, 70), 'plate': (89, 61, 70)}

nbr_cndts = len(WORDLE_WORD_LIST)

def fwst_to_pct(x):
    return round(100*(nbr_cndts - x) / nbr_cndts, 1)

max_metrics = {guess :  (Guess.pct_of_words_hit(guess, WORDLE_WORD_LIST, []), b, a, c)
                for (guess, (a, b, c)) in fwst_rem.items()}

print(f'\n{max_metrics}\n')

from re import sub
print(f"{'sdf'.isnumeric() = }")
s1 = "sub(r'\\D', '', '   ds3f  ')"
print(f"{s1} -> {eval(s1)}")
s2 = "sub(r'\\d', '', '   ds3f  ')"
print(f"{s2} -> {eval(s2)}")
print(f"{'b3'.upper() = }")

## Tests

# Secret_Candidates test
# def test_Secret_Candidates():
#     test_words = ['arise', 'crane', 'crate', 'irate', 'least', 'plate', 'raise',
#                   'sauce', 'sauce', 'saute', 'shine', 'slate', 'trace']

#     # Test Words_Profile on both words and test_words
#     for wrds in [test_words]:
#         candidates = Secret_Candidates(current_candidates=WORDLE_WORD_LIST, guess_words=WORDLE_WORD_LIST, filter=Filter())
#         # print(f'\n{candidates.best_guesses = }')
#         print(f'{candidates.best_position_guesses = }\n\n')


# This data is used in the following two tests.
secret_guess_exp_list = [('hpaax', 'aaapz', "y#gy#"), ('aabbb', 'ccaaa', "##yy#"),
                         ('godly', 'dolly', "yg#gg"), ('madam', 'mamma', 'ggy#y'),
                         ('madam', 'magma', 'gg#yy'), ('moose', 'onion', 'y##y#')
                         ]

# Scoring test
def test_Scoring():
    """
    Scoring: g (green)  = Letter in correct position;
             y (yellow) = Letter in incorrect position;
             # (gray)   = Letter not in word.

    In the following example:  guess  = 'aaapz'
                               secret = 'hpaay'
                            -> score  = 'y#gy#'

    First check the matching letters.

    The third letter in both the guess and the secret is "a". So the third letter in the score must be "g".

    The first letter in the guess, 'a', is associated with the fourth letter in the secret, 'a'. (This is
    why the matching letters must be determined first.) So the first letter in the score is "y".

    The second letter in the guess, 'a', has nothing in secret to be associated with. So the second letter
    in the score is "#".

    The fourth letter in the guess, 'p', is associated with the second letter in guess. So the fourth letter
    in the score is "y".

    The fifth letter in the guess, 'z', is not in secret. So the fifth letter in the score is "#".
    """

    for secret, guess, exp_score in secret_guess_exp_list:
        score = score_a_guess(secret=secret, guess=guess)
        print(f' {secret = } :: {guess = } -> {score = } {exp_score = } => {"Pass" if score == exp_score else "Fail" }')


def test_Filter():
    filter = Filter()
    print(f'1.\n{filter}\n')
    print(f"{filter.guess_to_pattern('abxcd') = }")
    filter.greens = list("_x___")
    filter.edit_filter('abxcd', '~~y~~')
    print(f'2.\n{filter}\n')
    print(f"{filter.guess_to_pattern('abxcd') = }")

    print(f'{score_a_guess("wxyza", "wxyza")} -> ggggg\n')
    filter.edit_filter('wxyza', 'ggggg')
    print(f'3.\n{filter}')
    print(f"{filter.guess_to_pattern('abxcd') = }")

    print(f'\n{filter.word_passes("wxyza") = } -> True')
    print(f"{filter.guess_to_pattern('wxyza') = }")

    filter.yellows['x'] = 1
    filter.yellows['y'] = 1
    print(f'\n{filter.yellows = }')
    filter.greens = list("_z__y")
    filter.grays = set()
    filter.disallowed = [set(), set(), set('x'), set(), set()]
    print(f'{filter.word_passes("wxyza") = } -> False')
    print(f'\n4.\n{filter}')
    print(f"{filter.guess_to_pattern('yzxxx') = }")

    filter.yellows.pop('x')
    print(f'\n{filter.yellows = }')
    print(f'{filter.word_passes("wxyza") = } -> True')
    filter.grays.add('a')
    print(f'\n{filter.grays = }')
    print(f'{filter.word_passes("wxyza") =  } -> False')

# test_Filter()

# Test nbr_remaining_candidates guess selection
def test_fwst_rem():
    scp = Secret_Candidates(WORDLE_WORD_LIST, WORDLE_WORD_LIST, Filter(), False)
    scp.max_candidates_reduction(current_candidates=WORDLE_WORD_LIST, guess_words=['arise', 'crane'])


#######################################################################################################
# Run tests
# tests = [(False, test_alpha_segments),      (False, test_Scoring),
#          (False, test_Filter),              (False, test_Secret_Candidates),
#          (False, test_fwst_rem),           # (False, is_long_test),
#          ]

# for run, test in tests:
#     if run:
#         print(f'\tRunning: {test.__name__}\n')
#         test()
#         print(f'{"="*100}', flush=True)



{'raise': (93, 43, 61, 70), 'arise': (93, 51, 64, 70), 'irate': (92, 53, 64, 70), 'arose': (92, 49, 66, 70), 'alter': (92, 60, 70, 70), 'stare': (90, 51, 71, 70), 'snare': (91, 54, 71, 70), 'slate': (90, 51, 72, 70), 'alert': (92, 52, 72, 70), 'crate': (89, 49, 73, 70), 'trace': (89, 49, 74, 70), 'aisle': (92, 57, 76, 70), 'alone': (92, 56, 77, 70), 'crane': (89, 51, 79, 70), 'react': (89, 48, 80, 70), 'scare': (90, 53, 81, 70), 'alien': (91, 51, 84, 70), 'caste': (89, 56, 85, 70), 'heart': (89, 55, 89, 70), 'plate': (89, 61, 89, 70)}

'sdf'.isnumeric() = False
sub(r'\D', '', '   ds3f  ') -> 3
sub(r'\d', '', '   ds3f  ') ->    dsf  
'b3'.upper() = 'B3'


In [11]:

class Secret_Candidates():
    """
    Given a set of possible Wordle answers, a profile of those words is:
    (a) a list of 5 Counters, one for each letter position in the 5-character Wordle words.
        Each counter has the English letters as keys. The values are the number of times that letter
        appears at the Counter's position in the set of words.
    (b) a counter for the number of words in which the letters appear at least once.

    After these Counters are calculated, the words are evaluated against them. The result can be used
    to select a Wordle guess
    """

    W = TypeVar("W", bound="Wordle")

    def __init__(self, current_candidates, guess_words, filter, current_step):
        """
        Extract scores for the positions and letters from words. (These are the remaining
        possible secret words.)
        self.position_profiles and self.letter_profiles characterize the collection of words.
        a) position_profiles are profiles for each letter position. For each position, position_profiles
           counts the number of occurrences of the various letters in that position in all the words.
        b) letter_profiles is a single Counter for the letters in all the words. It counts the
           number of occurrences of the various letters in all the words.

        First build the position_profiles and the letter_profile and then evaluate the words
        against them.
        """

        if current_candidates == []:
            raise ValueError(f'current_candidates is empty.')

        self.current_candidates = current_candidates

        # self.word_length = WORD_LENGTH

        # ===============================================================

        # Generate the Profiles and letter-sets based on the candidates.

        self.position_profiles: List[Counter[str]]
        self.letter_profiles: Counter[str]

        self.position_profiles, self.letter_profiles = self.generate_candidates_profiles(current_candidates)

        # ==============  Update the Filter; then update included_letters and uncategorized_letters ===============

        # sets: Tuple[Set[str], Set[str], Set[str], Counter[str]] = self.generate_letter_sets(filter)

        # The following declarations are not functional. They just make these objects visible here.

        # Letters that are known to be included in the secret word
        self.included_letters: Set[str]

        # Letters that are known to be excluded from the secret word
        self.excluded_letters: Set[str]

        # The union of the included and excluded sets
        self.known_letters: Set[str]

        # Letters in the candidate secret word that are not (yet) in known letters
        self.uncategorized_letters: Counter[str]


        # Generate the sets declared above.
        self.generate_letter_sets(filter)

        new_green_ltrs: List[str] = filter.edit_filter_from_profiles(self.position_profiles)

        self.included_letters = self.included_letters.union(new_green_ltrs)
        self.uncategorized_letters = Counter({key: self.uncategorized_letters[key] for key in
                                              (set(self.uncategorized_letters.keys()) - set(new_green_ltrs))})

        # ======================== Build the guess lists =======================================
        # Some (many?) of these are no longer used

        # self.fl_guesses: Set[str] = self.guesses_with_frequent_letters(self.position_profiles, guess_words)

        # self.best_position_guesses: Set[str] = self.best_positn_guesses(guess_words)
        # self.best_uncat_position_guesses: Set[str] = self.best_positn_guesses(guess_words, discount_knowns=True)

        # self.guesses_with_most_hits: Counter[str] = self.guesses_with_the_most_hits(guess_words)
        # self.guesses_with_most_unc_hits: Counter[str] = self.guesses_with_the_most_unc_hits(guess_words)

        # self.intersections: Dict[str, float] = (
        #     {'alert': 1.78, 'alter': 1.78, 'irate': 1.78, 'later': 1.78, 'arose': 1.77,
        #      'stare': 1.77, 'arise': 1.76, 'raise': 1.76, 'learn': 1.73, 'renal': 1.73,
        #      'saner': 1.71, 'snare': 1.71, 'cater': 1.69, 'crate': 1.69, 'react': 1.69,
        #      'trace': 1.69, 'aisle': 1.68, 'clear': 1.68, 'least': 1.68, 'slate': 1.68}

        #     if current_step == 1 else self.avg_of_intersections(guess_words, current_candidates)
        #                                         )

        # self.max_cndts_reduction: Dict[str, Tuple] = (

        #     {'raise': (97, 43, 97), 'arise': (97, 51, 97), 'irate': (97, 53, 97), 'arose': (97, 49, 97),
        #      'alter': (97, 60, 97), 'stare': (97, 51, 97), 'snare': (97, 54, 97), 'slate': (97, 51, 97),
        #      'alert': (97, 52, 97), 'crate': (97, 49, 97), 'trace': (97, 49, 97), 'aisle': (97, 57, 97),
        #      'alone': (97, 56, 97), 'crane': (97, 51, 97), 'react': (97, 48, 97), 'scare': (97, 53, 97),
        #      'alien': (96, 51, 97), 'caste': (96, 56, 97), 'heart': (96, 55, 97), 'plate': (96, 61, 97)}

        #     if current_step == 1 else self.max_candidates_reduction(current_candidates=current_candidates,
        #                                                             guess_words=guess_words,
        #                                                             current_step=current_step)
        #                                               )


    # ================================== Profile generation ===========================================

    def generate_candidates_profiles(self, candidates) -> Tuple[List[Counter[str]], Counter[str]]:
        '''
        Calculate scores for the positions and letters of the words.
        candidates are the candidate secret words
        '''
        # position_profiles are profiles for each letter position. For each position, a profile counts
        # the number of occurrences of the various letters in that position in the remaining words.
        position_profiles: List[Counter[str]] = [Counter(), Counter(), Counter(), Counter(), Counter()]

        # letter_profiles counts the number of occurrences of the various letters in all the words.
        letter_profiles: Counter[str]  = Counter()

        # Build the position_profiles and the letter_profiles
        for cndt in candidates:
            # Use zip to pair the word positions with its associated position Counter
            # Count a letter only once for each cndt. It's reset to empty for each cndt.
            used_ltrs = set()
            for (ltr, position_profile) in zip(cndt, position_profiles):
                position_profile[ltr] += 1
                if ltr not in used_ltrs:
                    letter_profiles[ltr] += 1
                    used_ltrs.add(ltr)
        return position_profiles, letter_profiles


    def generate_letter_sets(self, filter): # -> Tuple[Set[str], Set[str], Set[str], Counter[str]]:

        # The letters known to be included in the secret word
        self.included_letters: Set[str] = set(filter.greens).difference("_").union(filter.yellows.keys())

        # The letters known to be excluded in the secret word
        self.excluded_letters: Set[str] = set(filter.grays)

        # The union of the preceding two sets
        self.known_letters: Set[str] = self.included_letters.union(self.excluded_letters)

        # Letters in the secret word candidates that are not (yet) known to be either included or excluded
        self.uncategorized_letters: Counter[str] = Counter({ltr: self.letter_profiles[ltr]
                                                            for ltr in (set(self.letter_profiles) - self.known_letters)})

        return # included_letters, excluded_letters, known_letters, uncategorized_letters


    def avg_of_intersections(self, guess_words, secret_word_candidates) -> Dict[str, float]:
        """
        Compare each possible guess (in guess_words) with each secret-word candidate by determining how
        many letters each pair has in common. (Don't count self.included_letters since each possible guess and each
        candidate secret word has them all.) For each possible guess, average the number of letters in common with all
        the candidate secret words and return a dictionary with that average as the value for each possible guess.

        We want to know how much of each secret-word candidate, on average, (other than the included letters)
        a guess would expose to the secret word were it to be used as the guess.
        """
        len_cndts = len(secret_word_candidates)
        soi: Dict[str, float] = dict(Counter(
            {guess: Guess.pct_current_cndt_pos_matched(guess, secret_word_candidates, self.included_letters)
                                                                    for guess in guess_words}).most_common()[:20])
        return soi


    def best_positn_guesses(self, guess_words, discount_knowns=False) -> Set[str]:
        """
        Find and return the best position-based guess.
        If discount_knowns=True, discount known letters in scoring.
        """

        highest_ltrs: List[str] = list([cntr.most_common()[0][0] for cntr in self.position_profiles])
        if len(highest_ltrs) < 5:
            raise ValueError(f'In best_position_guesses: {highest_ltrs = }')
        tent_guess: str = ''.join(highest_ltrs)
        if tent_guess in guess_words:
            return {tent_guess}

        # If we've narrowed the remaining candidates down to 3 or fewer, don't use discount_knowns
        if len(self.current_candidates) <= 3:
            discount_knowns = False

        eval_fn = self.eval_guess_against_profiles

        guess_scores: Counter[str] = Counter({guess: eval_fn(guess, discount_knowns) for guess in guess_words})

        best_guesses = set(dict(guess_scores.most_common()[:5]).keys())

        return best_guesses


    def compute_entropy(self, guess, score_a_guess) -> float:
        hashes_per_cand: Dict[str, int] = {candidate: score_a_guess(guess, candidate).count('~')
                                                                        for candidate in self.current_candidates}
        hash_classes: Dict[int, Set[str]] = {n: {cand for cand in hashes_per_cand if hashes_per_cand[cand] == n}
                                                                for n in sorted(set(hashes_per_cand.values()))}
        len_candidates = len(self.current_candidates) - 1 if len(self.current_candidates) > 1 else 1
        dist_value = sum([(5-n)*len(hash_classes[n]) for n in hash_classes])

        entropy = round(sum([len(cls)/len_candidates * log2(len_candidates/len(cls))
                                                                         for cls in hash_classes.values()]), 2)
        return entropy


    def eval_guess_against_profiles(self, guess, discount_knowns) -> int:
        '''
        Given a guess, find (a) the sum of its word-position-vals according to the position_profiles
        and (b) its letters-val according to the letter_profiles
        A word gets credit for its letters in proportion to the frequency with which those
        letters correspond the the overall letter frequency at each position.
        '''
        # positions_score is the score for guess based on its letters at their positions
        # letters_score is the score for guess based on its letters at any position
        positions_score, letters_score = 0, 0

        # Only count the letter score once even if a letter appears multiple times in a guess
        used_ltrs = set()

        # pos_counter is a Counter of frequencies of letters in a specific position.
        # We want to know for each ltr in a guess, how often that ltr appears in its position in
        # all remaining candidate secret words.
        # The zip expression pairs the letters with the Counter for that position
        for (ltr, pos_counter) in zip(guess, self.position_profiles):
            positions_score += pos_counter[ltr] if not discount_knowns or ltr in self.uncategorized_letters else 0
            if ltr not in used_ltrs:
                letters_score += self.letter_profiles[ltr] - 1
                used_ltrs.add(ltr)

        # A word's score is its position score + 1/4 * its letter score
        score = positions_score + int(round(letters_score/4))
        return score


    def max_candidates_reduction(self,
                                 current_candidates,
                                 guess_words,
                                 current_step=-1) -> Dict[str, int]:
        """
        The objective is to determine which of the words in guess_words would eliminate the most words
        from current_candidates. Since we don't know which word in current_candidates is the secret word,
        try them all and score each guess_word as the mean/median number of current_candidates remaining.

        Test each guess (gw) in guess_words against each word in current_candidates as the unknown secret word.
        Use the resulting "score" (pattern) to determine how many words in current_candidates would remain
        viable given gw and the presumed secret word. (The smaller the better.) This determines
        how many of the current_candidates, gw would eliminate. Return a dictionary with the guess words
        as key and a tuple (mean remaining, median remaining) as the value for each key. The guess_word
        with the smallest mean is the best guess.
        """

        rem_dict: Dict[str, int] = dict() # if saved_dict is None else saved_dict

        for guess in guess_words:
            # mean_remaining, median_remaining = Guess.fwst_remaining(current_candidates, guess)
            mean_remaining = Guess.fwst_remaining(current_candidates, guess)
            rem_dict[guess] = mean_remaining  #, median_remaining)

        fwst_remaining: Dict[str, int] = smallest_n(rem_dict, n=20)
        return fwst_remaining


    def guesses_with_the_most_hits(self, guess_words):
        hits_cntr = Counter({guess: sum([self.letter_profiles[ltr] for ltr in set(guess)]) for guess in guess_words})
        best_guesses = Counter(dict(hits_cntr.most_common()[:5]))
        return best_guesses


    def guesses_with_the_most_unc_hits(self, guess_words):
        hits_cntr = Counter({guess: sum([self.uncategorized_letters[ltr] for ltr in set(guess)]) for guess in guess_words})
        best_guesses =  Counter(dict(hits_cntr.most_common()[:5]).keys())
        return best_guesses


    def guesses_with_frequent_letters(self, pos_prfls, guess_words):
        """
        Find guesses made from top letters in each position.
        """
        tmp_highest_cntrs: List[Tuple[Counter, int]] = [(ctr, ceil(max(ctr.values())*0.75)) for ctr in pos_prfls]
        highest_cntrs = [(ctr, min(lb, max(ctr.values())-1)) for (ctr, lb) in tmp_highest_cntrs]

        highest_ltr_sets: List[Set[str]] = [set(sorted({ltr for ltr in cntr if cntr[ltr] >= cntr_mx}))
                                                                         for cntr, cntr_mx in highest_cntrs]
        if DEBUG: print(f'{highest_ltr_sets = }')

        cand_guesses_0: List[str] = sorted([''.join(elmt) for elmt in product(*highest_ltr_sets)])

        cand_guesses = set(sorted([guess for guess in cand_guesses_0 if guess in guess_words]))

        return cand_guesses



In [12]:
class Wordle():

    def __init__(self, is_long_test=False):

        # Are we performing a long test?
        self.is_long_test: bool = is_long_test


    def play_a_game(self, current_candidates, guess_words,
                    secret_word=None, hard_mode=False, interactive=False, online=False):

        # If self.candidates_are_guess_words, no need to recompute the guess_words at each turn.
        # Can use the same list of words.
        self.candidates_are_guess_words = current_candidates is guess_words

        # In hard_mode, guesses must be acceptable secret-word candidates
        self.hard_mode = hard_mode

        # If self.online is True, the user enters both the guesses and the scores of the guesses,
        # which allows this system to play the NYT Wordle. The user serves as a communication
        # channel between the system and the onnline game.
        self.online: Optional[bool] = online

        # If self.interactive is True, the user enters guesses.
        self.interactive: Optional[bool] = self.online or interactive

        # Using either "str or None" or "Optional[str]" as the type and assigning None generates an error
        # when we apply len() to self.secret_word. Also, "Sized" as the type generates an error when
        # None is assigned. But Union[str, Any] works. None can be assigned, and len() can be applied.
        self.secret_word: Union[str, Any] = None if self.online else \
                                            secret_word if secret_word else \
                                            choice(current_candidates)

        INTERACTIVE = self.interactive

        # self.placed_ltrs = ["".join(filter.greens)]
        self.placed_ltrs = []
        # self.unplaced_ltrs = ["".join(sorted(filter.yellows.keys()))]
        self.unplaced_ltrs = []

        # The following keep track of the guesses, scores, etc. Used for reporting after each guess.
        # self.guesses, self.scores, and self.words_left are synchronized. The n-th position in each
        # list correcponds to the n-th guess. The 0th position is before the first guess. We use
        # that to show the number of words initially in the secret-word pool.
        self.guesses = [' '*5]
        self.scores  = [' '*5]
        # ":4" means format the number in 4 spaces
        self.words_left = [f'{len(current_candidates):4}']

        # self.scored_alphabet stores the most recent score for each letter. Initially, there is no
        # information about any letter. So each letter starts with a score of '_'.
        self.scored_alphabet: Dict[str, str] = {letter: '_' for letter in ascii_lowercase}

        # return self.finish_the_game(current_candidates, guess_words)
        return self.finish_the_game(current_candidates, guess_words)


    def finish_the_game(self, current_candidates, guess_words, filter: Filter=Filter(), current_step=1):
        """
        Use recursion instead of looping to go through the guesses.
        """

        candidates_object = Secret_Candidates(current_candidates, guess_words, filter, current_step)

        if INTERACTIVE:
            # Not clear what this does.
            console.clear()
            print_data_panels(candidates_object, current_step, filter)
            if current_step > 1:
                self.print_game_history()

        guess: str = self.select_a_guess(filter.greens, current_candidates, guess_words,
                                         current_step, candidates_object.included_letters, filter)

        score: str = get_score_from_user() if self.online else score_a_guess(self.secret_word, guess)

        if INTERACTIVE: console_print(f'     Score: {join_with_scores(guess, score, "")}\n')

        # Update scored_alphabet
        for letter, letter_score in zip(guess, score):
            if SCORE_RANKING[letter_score] > SCORE_RANKING[self.scored_alphabet[letter]]:
                self.scored_alphabet[letter] = letter_score

        self.placed_ltrs.append("".join(filter.greens))
        sorted_keys = sorted(filter.yellows.keys())
        joined_keys = ".".join(sorted_keys)
        yellows = '     ' if not sorted_keys else ((' '*(5-len(joined_keys))) + joined_keys)

        self.unplaced_ltrs.append(yellows)

        self.guesses.append(guess)
        self.scores.append(score)

        if self.is_long_test:
            print(f'{current_step}. {guess} ({score}); ', end='')

        # Update the filter.
        new_filter = filter.edit_filter(guess, score)

        # Use the updated filter to filter the current_candidates and the guess_words
        remaining_current_candidates = [word for word in current_candidates if new_filter.word_passes(word)]

        self.words_left.append(f'{len(remaining_current_candidates):4}')

        # Stop if we've found the secret word.
        if set(score) == {'g'}:
            self.print_game_history(title_text='Game history', show_alphabet=False)
            return current_step

        if self.secret_word is not None and self.secret_word not in remaining_current_candidates:
            raise Exception(f'The secret_word "{self.secret_word}" is not in the {remaining_current_candidates = }')

        remaining_guess_words = remaining_current_candidates \
                                        if self.candidates_are_guess_words or self.hard_mode else guess_words

        # Recursive call for the remaining guesses
        return self.finish_the_game(remaining_current_candidates, remaining_guess_words, new_filter, current_step + 1)


    def game_history_text(self, show_alphabet=True) -> str:

        styled_guesses = format_guesses(self.guesses, self.scores)

        guess_nbrs = [('  ' if n == 0 else f'{n}.') for n in range(len(styled_guesses) + 1)]
        zipped = zip(guess_nbrs, styled_guesses, self.words_left)
        guesses_and_num_left = [f'{guess_nbr} {guess}  {num_left}' for guess_nbr, guess, num_left in zipped]
        game_history = '\n'.join(guesses_and_num_left)

        styled_alphabet = ('\n' + format_alphabet(self.scored_alphabet) if show_alphabet else ' '*15)

        return game_history + styled_alphabet


    @staticmethod
    def long_test():
        guess_words=WORDLE_WORD_LIST
        total_guesses = 0
        print_counter = 0
        nbr_of_games = len(WORDLE_WORD_LIST)
        for game_nbr in range(1, nbr_of_games + 1):
            secret_word = WORDLE_WORD_LIST[-game_nbr]
            print(f'{game_nbr:4}. secret word: {secret_word}', end=';  ')
            wordle = Wordle(is_long_test=True)
            guess_count = wordle.play_a_game(WORDLE_WORD_LIST, guess_words, secret_word=secret_word)
            total_guesses += guess_count
            print(f'Avg guesses: {round(total_guesses/game_nbr, 2)}')


    def print_game_history(self, title_text='Guesses so far', show_alphabet=True):
        panel_print(self.game_history_text(show_alphabet=show_alphabet), title_text=title_text, justify='center')


    def select_a_guess(self, greens, current_candidates, guess_words, current_step, included_letters, filter) -> str:
        """
        Suggest the next guess. There are a couple of issues.
        - The range of possible words from which to select. The two
          most widely used approaches are:
            a) the remaining candidates. This list automatically
               satisfies the constraints derived from previous guesses.
            b) a separate list of allowable guess words. This list is
               generally larger than the list of words from which the
               secret word is selected. This list must be filtered to
               eliminate words that don't satisfy the constraints derived
               from previous guesses.
        - The system used to score possible guesses to determine which has
          the best chance of being the best guess.
        """

        the_guess: str

        # Is only one candidate left?
        output_str = ''
        if '_' not in greens or len(current_candidates) == 1:
            the_guess = ''.join(greens) if '_' not in greens else current_candidates[0]

            if self.interactive:
                output_str += f'Since "{the_guess}" is the only remaining secret-word candidate, it will be the next guess.\n'
                output_str += f'Guess {current_step} > {the_guess}'
                panel_print(output_str,  title_text=f'Default guess')
            return the_guess

        else:
            guesses = [Guess(guess, current_candidates, included_letters, current_step)
                        for guess in (guess_words if current_step > 1 else Guess.step_1.keys())]

            sorted_guesses: List[Guess] = sorted(guesses, reverse=True, key=lambda g: g.metrics())[:30]

            if self.interactive:
                lsts_of_sorted_guesses = lists_to_many_lists(sorted_guesses, 5)
                strs = '\n'.join([", ".join([str(g) for g in lst_of_sorted_guesses])
                                  for lst_of_sorted_guesses in lsts_of_sorted_guesses])
                output_str += strs

            words: List[str] = [g.guess for g in sorted_guesses]

            if self.interactive:
                panel_print(output_str, title_text=f'Sorted Guesses')
                user_guess = get_user_guess(current_step, words, filter, self.hard_mode)
                return user_guess

            else:
                the_guess = words[0]
                panel_print(output_str, title_text=f'The guess')
                return the_guess


span_words = """FIGHT, CLOMP -x, BRAND, JUKES - x, WOOZY; derby, flank, ghost, winch, jumps -x"""

wwl = WORDLE_WORD_LIST
# ww2 = WORDLE_WORD_LIST.copy()
# Make it an assignment so that the result is not printed when the game completes.
_ = Wordle().play_a_game(wwl, wwl, secret_word='smack', hard_mode=True, interactive=True, online=False)
# _ = Wordle.long_test()


Guess 1   > 3



Guess 2   > 1



Guess 3   > 1
