In [2]:
from collections import Counter
import math, statistics
import numpy as np
def feedback(guess, answer):
    guess, answer = guess.upper(), answer.upper()
    pat, remain = ["0"]*5, []  # 先預設五格全灰 # 綠色先吃掉，剩下的字母暫存
    #把大小寫統一後，先做一個長度 5 的字串，稍後逐格改成 2 / 1 / 0。
    for i,(g,a) in enumerate(zip(guess, answer)):
        if g==a: pat[i]="2" # 同位置同字母 → 綠
        else:     remain.append(a)
    #第一輪先標綠色並把未命中位置的「答案字母」留到 remain。
    cnt = Counter(remain)
    for i,g in enumerate(guess):
        if pat[i]=="0" and cnt[g]>0:
            pat[i]="1"; cnt[g]-=1 # 還有庫存 → 黃色 # 消耗一次
    #第二輪再標黃色。使用 Counter 確保重複字母符合官方「先到先得」規則。
    return "".join(pat)
#找到 Sw,p 
def alive_after(guess, pattern, cand): 
    return [w for w in cand if feedback(guess, w)==pattern]
#逐一重算 feedback(guess, w)，留下 pattern 相同的字──這就是定義裡的子集合 Sw,p
#期望剩餘量E[|Sw,p|]
#def exprem(guess, cand):
#    return sum(len(alive_after(guess, feedback(guess,a), cand)) for a in cand)/len(cand)
def exprem(guess, cand):
    total = 0
    for answer in cand:                   # ∑_{a∈S}
        pat   = feedback(guess, answer)   # p(a)
        alive = alive_after(guess, pat, cand)
        total += len(alive)               # |S_{w,p(a)}|
    return total / len(cand)              # 除以 |S|
#按公式做「加總再除以候選個數」；結果越小代表平均砍掉更多字。
def next_guess(cand):                # 取當前候選中 ExpRem 最小者
    best=min(cand, key=lambda w: exprem(w, cand))
    return best
#把「目前還活著的字」逐一算 ExpRem，挑出最小值。這是最簡單的貪婪策略。
def play(answer, start, full):
    cand  = full.copy()        # 一開始候選 = 全字庫
    guess = start              # 起手詞
    steps = 1                  # 步數計數
    while True:
        pat = feedback(guess, answer)
        if pat == "22222":     # 全綠 → 猜中
            return steps
        cand  = alive_after(guess, pat, cand)  # 縮小候選集
        guess = next_guess(cand)               # 挑下一步
        steps += 1
#核心迴圈：feedback → 篩選 cand → 選新 guess 持續到猜中。
#每輪都挑當前 ExpRem 最小的字
def load_wordlist(path, *, length=5, to_upper=True, max_words=None):
    """
    讀取文字檔並回傳單字清單。
    - path       : 檔案路徑
    - length     : 只保留指定長度的單字 (預設 5)
    - to_upper   : 是否轉成大寫，方便與 feedback() 對齊
    - max_words  : 選擇性，若只想拿前 N 個字來跑小規模測試
    """
    words = []
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            w = line.strip()
            if len(w) != length:      # 排除不是 5 字母的行
                continue
            if not w.isalpha():       # 排除含 - ' 等符號
                continue
            words.append(w.upper() if to_upper else w.lower())
            if max_words and len(words) >= max_words:
                break
    return words
CAND = load_wordlist("valid-wordle-words.txt")
print("候選字數：", len(CAND))
STARTS=load_wordlist("valid-wordle-words.txt")
exp   = {w: exprem(w, CAND) for w in STARTS}
avg_n = {w: statistics.mean(play(ans, w, CAND) for ans in CAND) for w in STARTS}
import pandas as pd
df = (
    pd.DataFrame({
        "Word":   list(exp.keys()),
        "ExpRem": [exp[w]  for w in exp],
        "AvgSteps": [avg_n[w] for w in exp]
    })
    .set_index("Word")
    .sort_values("ExpRem")
)
pd.set_option("display.max_rows", None) 
print(df)
#correlation
words = list(exp.keys())
x = [exp[w] for w in words]   # ExpRem
y = [avg_n[w]     for w in words]   # 平均步數

xm, ym = sum(x)/len(x), sum(y)/len(y)
num = sum((xi - xm)*(yi - ym) for xi, yi in zip(x, y))
den = math.sqrt(sum((xi - xm)**2 for xi in x) *
                sum((yi - ym)**2 for yi in y))
r = num / den
print(r)

候選字數： 14855


KeyboardInterrupt: 