In [7]:
# ================================================================
# üß† SMART HANGMAN RL + HMM AGENT v8.5
# Optimized for Higher Final Score & Stable Success Rate
# ================================================================

import random
import time
from collections import Counter, defaultdict
from google.colab import files
import io

# ============================
# üìÇ 1Ô∏è‚É£ UPLOAD FILES
# ============================
print("üì§ Please upload your training and test files (corpus.txt and test.txt)")
uploaded = files.upload()

TRAIN_PATH = "corpus.txt"
TEST_PATH = "test.txt"

for name, data in uploaded.items():
    with open(name, "wb") as f:
        f.write(data)

# ============================
# ‚öôÔ∏è CONFIG
# ============================
MIN_LEN, MAX_LEN = 3, 12
WARMUP_GAMES = 2000
PRUNE_TOP_K = 400
MAX_WRONG = 10

# RL Hyperparameters (tuned)
ALPHA = 0.40
GAMMA = 0.94
EXPLORATION_DECAY = 0.0016  # slower decay, more stable exploration

# ============================
# üìñ LOAD & CLEAN CORPUS
# ============================
def load_corpus(train_path=TRAIN_PATH, test_path=TEST_PATH):
    try:
        with open(train_path, "r") as f:
            train = [w.strip().upper() for w in f if w.strip().isalpha()]
        with open(test_path, "r") as f:
            test = [w.strip().upper() for w in f if w.strip().isalpha()]
    except FileNotFoundError:
        print("‚ùå File not found. Please make sure both corpus.txt and test.txt are uploaded.")
        return [], []

    train = [w for w in train if MIN_LEN <= len(w) <= MAX_LEN]
    test = [w for w in test if MIN_LEN <= len(w) <= MAX_LEN]
    print(f"‚úÖ Filtered: {len(train)} training words, {len(test)} testing words remain.")
    return train, test


# ============================
# ü§ñ SMART AGENT (HMM + RL)
# ============================
class SmartHangmanAgentV8_5:
    def __init__(self, corpus):
        self.corpus = corpus
        self.vowels = set("AEIOU")
        self.english_freq = "ETAOINSHRDLUCMFYWGPBVKXQJZ"
        self.memory = defaultdict(lambda: 1.0)
        self.q_values = defaultdict(float)
        self.pattern_cache = {}
        self.length_models = {}
        self.transition_probs = defaultdict(lambda: defaultdict(float))
        self._build_models()

    def _build_models(self):
        for L in range(MIN_LEN, MAX_LEN + 1):
            words = [w for w in self.corpus if len(w) == L]
            if not words:
                continue
            global_freq = Counter()
            pos_freq = defaultdict(Counter)
            for w in words:
                global_freq.update(w)
                for i, c in enumerate(w):
                    pos_freq[i][c] += 1
                    if i < len(w) - 1:
                        self.transition_probs[w[i]][w[i + 1]] += 1
            total = sum(global_freq.values()) or 1
            global_probs = {c: global_freq[c] / total for c in global_freq}
            self.length_models[L] = {"words": words, "global": global_probs, "pos": pos_freq}

        for a in self.transition_probs:
            total = sum(self.transition_probs[a].values()) or 1
            for b in self.transition_probs[a]:
                self.transition_probs[a][b] /= total

    def filter_candidates(self, pattern, guessed):
        L = len(pattern)
        if L not in self.length_models:
            return []
        candidates = []
        for w in self.length_models[L]["words"]:
            if any((c in guessed and c not in pattern) for c in w):
                continue
            if all(p == "_" or p == c for p, c in zip(pattern, w)):
                candidates.append(w)
        return candidates

    def hmm_predict(self, pattern, guessed):
        scores = defaultdict(float)
        known = [c for c in pattern if c != "_"]
        if not known:
            return {}
        for i, ch in enumerate(pattern):
            if ch != "_":
                for nxt, p in self.transition_probs[ch].items():
                    if nxt not in guessed:
                        scores[nxt] += p * (1.0 / (i + 1))
        total = sum(scores.values()) or 1
        return {c: scores[c] / total for c in scores}

    def score_letters(self, L, candidates, guessed, pattern):
        if not candidates:
            return {}
        candidates = candidates[:PRUNE_TOP_K]
        letter_counts = Counter()
        pos_freq = self.length_models[L]["pos"]
        for w in candidates:
            for i, c in enumerate(w):
                if pattern[i] == "_" and c not in guessed:
                    weight = 1.2 + (pos_freq[i][c] / max(1, len(candidates))) * 1.3
                    letter_counts[c] += weight
        total = sum(letter_counts.values()) or 1
        return {c: letter_counts[c] / total for c in letter_counts}

    def get_guess(self, pattern, guessed, wrong_guesses, last_guess=None):
        pattern_str = "".join(pattern) if isinstance(pattern, list) else pattern
        if pattern_str in self.pattern_cache:
            for l in self.pattern_cache[pattern_str]:
                if l not in guessed:
                    return l
        L = len(pattern_str)
        candidates = self.filter_candidates(pattern_str, guessed)
        freq_scores = self.score_letters(L, candidates, guessed, pattern_str)
        hmm_scores = self.hmm_predict(pattern_str, guessed)
        unguessed = [c for c in "ABCDEFGHIJKLMNOPQRSTUVWXYZ" if c not in guessed]
        final_scores = {}

        explore_factor = max(0.001, 0.015 - wrong_guesses * EXPLORATION_DECAY)

        for c in unguessed:
            base_prior = (27 - self.english_freq.index(c)) / 27 if c in self.english_freq else 0.01
            freq = freq_scores.get(c, 0)
            hmm = hmm_scores.get(c, 0)
            q_val = self.q_values[c]
            vowel_bonus = 0.08 if (pattern_str.count("_") > len(pattern_str) * 0.6 and c in self.vowels) else 0
            final_scores[c] = (
                0.55 * freq +
                0.30 * hmm +
                0.08 * base_prior +
                0.07 * q_val +
                vowel_bonus +
                random.uniform(0, explore_factor)
            )
        return max(final_scores, key=final_scores.get)

    def update_q(self, letter, reward):
        old_q = self.q_values[letter]
        max_future_q = max(self.q_values.values() or [0])
        self.q_values[letter] = old_q + ALPHA * (reward + GAMMA * max_future_q - old_q)

    def reinforce(self, letter, success=True):
        reward = 1.8 if success else -0.8  # better reward balance
        self.update_q(letter, reward)
        if success:
            self.memory[letter] = min(self.memory[letter] * 1.15 + 0.05, 3.8)
        else:
            self.memory[letter] = max(self.memory[letter] * 0.88, 0.25)

    def cache_pattern_from_game(self, pattern, guessed_seq, word):
        key = "".join(pattern)
        correct_seq = [g for g in guessed_seq if g in word]
        if not correct_seq:
            return
        if key not in self.pattern_cache or len(correct_seq) > len(self.pattern_cache[key]):
            self.pattern_cache[key] = correct_seq[:]


# ============================
# üéÆ EVALUATOR
# ============================
class HangmanEvaluator:
    def __init__(self, agent, train_words, test_words):
        self.agent = agent
        self.train_words = train_words
        self.test_words = test_words

    def play_game(self, word, allow_caching=True):
        pattern = ["_"] * len(word)
        guessed = []
        wrong = 0
        repeated = 0
        while "_" in pattern and wrong < MAX_WRONG:
            guess = self.agent.get_guess(pattern, guessed, wrong)
            if guess in guessed:
                repeated += 1
                continue
            guessed.append(guess)
            if guess in word:
                for i, c in enumerate(word):
                    if c == guess:
                        pattern[i] = guess
                self.agent.reinforce(guess, True)
            else:
                wrong += 1
                self.agent.reinforce(guess, False)
        won = "_" not in pattern
        if won and allow_caching:
            self.agent.cache_pattern_from_game(pattern, guessed, word)
        return won, wrong, repeated

    def warmup(self, warmup_games=WARMUP_GAMES):
        print(f"üîÅ Warmup ({warmup_games} self-play games)...")
        for i in range(warmup_games):
            w = random.choice(self.train_words)
            self.play_game(w, allow_caching=True)
            if (i + 1) % 200 == 0:
                print(f" Warmup: {i+1}/{warmup_games} | Cache: {len(self.agent.pattern_cache)} | Q-values: {len(self.agent.q_values)}")

    def evaluate(self, num_games=1000):
        start = time.time()
        wins = wrong_total = repeated_total = 0
        for i in range(num_games):
            w = random.choice(self.test_words)
            won, wrong, repeated = self.play_game(w, allow_caching=False)
            if won:
                wins += 1
            wrong_total += wrong
            repeated_total += repeated
            if (i + 1) % 100 == 0:
                print(f"üïπ Played {i+1}/{num_games} | Cache: {len(self.agent.pattern_cache)}")
        success_rate = wins / num_games
        final_score = (success_rate * 2200) - (wrong_total * 4.2) - (repeated_total * 1.3)
        print("\n===== üß† FINAL REPORT (HMM + RL Optimized 8.5) =====")
        print(f"‚úÖ Success Rate: {success_rate*100:.2f}%")
        print(f"‚ùå Total Wrong Guesses: {wrong_total}")
        print(f"üîÅ Total Repeated Guesses: {repeated_total}")
        print(f"üèÜ Final Score: {final_score:.2f}")
        print(f"üíæ Cached Patterns: {len(self.agent.pattern_cache)}")
        print(f"üî§ Learned Q-values: {len(self.agent.q_values)}")
        print(f"‚è± Time: {time.time()-start:.2f}s")
        return success_rate, final_score


# ============================
# üöÄ MAIN EXECUTION
# ============================
if __name__ == "__main__":
    train_corpus, test_corpus = load_corpus(TRAIN_PATH, TEST_PATH)
    if not train_corpus or not test_corpus:
        print("‚ö†Ô∏è Training or test corpus empty. Please re-upload valid files.")
    else:
        agent = SmartHangmanAgentV8_5(train_corpus)
        evaluator = HangmanEvaluator(agent, train_corpus, test_corpus)
        evaluator.warmup(WARMUP_GAMES)
        success, score = evaluator.evaluate(num_games=1000)
        print("\nüéØ Final Scoring Formula: (SuccessRate√ó2200) - (Wrong√ó4.2) - (Repeated√ó1.3)")


üì§ Please upload your training and test files (corpus.txt and test.txt)


Saving corpus.txt to corpus (5).txt
Saving test.txt to test (5).txt
‚úÖ Filtered: 42107 training words, 1672 testing words remain.
üîÅ Warmup (2000 self-play games)...
 Warmup: 200/2000 | Cache: 154 | Q-values: 26
 Warmup: 400/2000 | Cache: 290 | Q-values: 26
 Warmup: 600/2000 | Cache: 433 | Q-values: 26
 Warmup: 800/2000 | Cache: 580 | Q-values: 26
 Warmup: 1000/2000 | Cache: 733 | Q-values: 26
 Warmup: 1200/2000 | Cache: 882 | Q-values: 26
 Warmup: 1400/2000 | Cache: 1033 | Q-values: 26
 Warmup: 1600/2000 | Cache: 1151 | Q-values: 26
 Warmup: 1800/2000 | Cache: 1294 | Q-values: 26
 Warmup: 2000/2000 | Cache: 1431 | Q-values: 26
üïπ Played 100/1000 | Cache: 1431
üïπ Played 200/1000 | Cache: 1431
üïπ Played 300/1000 | Cache: 1431
üïπ Played 400/1000 | Cache: 1431
üïπ Played 500/1000 | Cache: 1431
üïπ Played 600/1000 | Cache: 1431
üïπ Played 700/1000 | Cache: 1431
üïπ Played 800/1000 | Cache: 1431
üïπ Played 900/1000 | Cache: 1431
üïπ Played 1000/1000 | Cache: 1431

===== üß

In [8]:
# ================================================================
# üß† SMART HANGMAN RL + HMM AGENT v8.7
# Improved Success Rate & Positive Final Score
# ================================================================

import random
import time
from collections import Counter, defaultdict
from google.colab import files
import io

# ============================
# üìÇ 1Ô∏è‚É£ UPLOAD FILES
# ============================
print("üì§ Please upload your training and test files (corpus.txt and test.txt)")
uploaded = files.upload()

TRAIN_PATH = "corpus.txt"
TEST_PATH = "test.txt"

for name, data in uploaded.items():
    with open(name, "wb") as f:
        f.write(data)

# ============================
# ‚öôÔ∏è CONFIG (TUNED)
# ============================
MIN_LEN, MAX_LEN = 3, 12
WARMUP_GAMES = 2500            # slightly more warmup ‚Üí stronger pattern cache
PRUNE_TOP_K = 500              # consider more top candidates
MAX_WRONG = 10

# RL Hyperparameters (tuned)
ALPHA = 0.45                   # faster learning from mistakes
GAMMA = 0.98                   # longer memory of good letters
EXPLORATION_DECAY = 0.0012     # slower decay = more stable learning

# ============================
# üìñ LOAD & CLEAN CORPUS
# ============================
def load_corpus(train_path=TRAIN_PATH, test_path=TEST_PATH):
    try:
        with open(train_path, "r") as f:
            train = [w.strip().upper() for w in f if w.strip().isalpha()]
        with open(test_path, "r") as f:
            test = [w.strip().upper() for w in f if w.strip().isalpha()]
    except FileNotFoundError:
        print("‚ùå File not found. Please make sure both corpus.txt and test.txt are uploaded.")
        return [], []

    train = [w for w in train if MIN_LEN <= len(w) <= MAX_LEN]
    test = [w for w in test if MIN_LEN <= len(w) <= MAX_LEN]
    print(f"‚úÖ Filtered: {len(train)} training words, {len(test)} testing words remain.")
    return train, test


# ============================
# ü§ñ SMART AGENT (HMM + RL)
# ============================
class SmartHangmanAgentV8_7:
    def __init__(self, corpus):
        self.corpus = corpus
        self.vowels = set("AEIOU")
        self.english_freq = "ETAOINSHRDLUCMFYWGPBVKXQJZ"
        self.memory = defaultdict(lambda: 1.0)
        self.q_values = defaultdict(float)
        self.pattern_cache = {}
        self.length_models = {}
        self.transition_probs = defaultdict(lambda: defaultdict(float))
        self._build_models()

    def _build_models(self):
        for L in range(MIN_LEN, MAX_LEN + 1):
            words = [w for w in self.corpus if len(w) == L]
            if not words:
                continue
            global_freq = Counter()
            pos_freq = defaultdict(Counter)
            for w in words:
                global_freq.update(w)
                for i, c in enumerate(w):
                    pos_freq[i][c] += 1
                    if i < len(w) - 1:
                        self.transition_probs[w[i]][w[i + 1]] += 1
            total = sum(global_freq.values()) or 1
            global_probs = {c: global_freq[c] / total for c in global_freq}
            self.length_models[L] = {"words": words, "global": global_probs, "pos": pos_freq}

        for a in self.transition_probs:
            total = sum(self.transition_probs[a].values()) or 1
            for b in self.transition_probs[a]:
                self.transition_probs[a][b] /= total

    def filter_candidates(self, pattern, guessed):
        L = len(pattern)
        if L not in self.length_models:
            return []
        candidates = []
        for w in self.length_models[L]["words"]:
            if any((c in guessed and c not in pattern) for c in w):
                continue
            if all(p == "_" or p == c for p, c in zip(pattern, w)):
                candidates.append(w)
        return candidates

    def hmm_predict(self, pattern, guessed):
        scores = defaultdict(float)
        known = [c for c in pattern if c != "_"]
        if not known:
            return {}
        for i, ch in enumerate(pattern):
            if ch != "_":
                for nxt, p in self.transition_probs[ch].items():
                    if nxt not in guessed:
                        scores[nxt] += p * (1.0 / (i + 1))
        total = sum(scores.values()) or 1
        return {c: scores[c] / total for c in scores}

    def score_letters(self, L, candidates, guessed, pattern):
        if not candidates:
            return {}
        candidates = candidates[:PRUNE_TOP_K]
        letter_counts = Counter()
        pos_freq = self.length_models[L]["pos"]
        for w in candidates:
            for i, c in enumerate(w):
                if pattern[i] == "_" and c not in guessed:
                    weight = 1.3 + (pos_freq[i][c] / max(1, len(candidates))) * 1.4
                    letter_counts[c] += weight
        total = sum(letter_counts.values()) or 1
        return {c: letter_counts[c] / total for c in letter_counts}

    def get_guess(self, pattern, guessed, wrong_guesses, last_guess=None):
        pattern_str = "".join(pattern) if isinstance(pattern, list) else pattern
        if pattern_str in self.pattern_cache:
            for l in self.pattern_cache[pattern_str]:
                if l not in guessed:
                    return l
        L = len(pattern_str)
        candidates = self.filter_candidates(pattern_str, guessed)
        freq_scores = self.score_letters(L, candidates, guessed, pattern_str)
        hmm_scores = self.hmm_predict(pattern_str, guessed)
        unguessed = [c for c in "ABCDEFGHIJKLMNOPQRSTUVWXYZ" if c not in guessed]
        final_scores = {}

        explore_factor = max(0.001, 0.02 - wrong_guesses * EXPLORATION_DECAY)  # slightly more exploration

        for c in unguessed:
            base_prior = (27 - self.english_freq.index(c)) / 27 if c in self.english_freq else 0.01
            freq = freq_scores.get(c, 0)
            hmm = hmm_scores.get(c, 0)
            q_val = self.q_values[c]
            vowel_bonus = 0.10 if (pattern_str.count("_") > len(pattern_str) * 0.5 and c in self.vowels) else 0
            final_scores[c] = (
                0.60 * freq +
                0.25 * hmm +
                0.07 * base_prior +
                0.08 * q_val +
                vowel_bonus +
                random.uniform(0, explore_factor)
            )
        return max(final_scores, key=final_scores.get)

    def update_q(self, letter, reward):
        old_q = self.q_values[letter]
        max_future_q = max(self.q_values.values() or [0])
        self.q_values[letter] = old_q + ALPHA * (reward + GAMMA * max_future_q - old_q)

    def reinforce(self, letter, success=True):
        reward = 2.6 if success else -0.5   # higher reward for success, smaller penalty
        self.update_q(letter, reward)
        if success:
            self.memory[letter] = min(self.memory[letter] * 1.2 + 0.05, 4.0)
        else:
            self.memory[letter] = max(self.memory[letter] * 0.9, 0.3)

    def cache_pattern_from_game(self, pattern, guessed_seq, word):
        key = "".join(pattern)
        correct_seq = [g for g in guessed_seq if g in word]
        if not correct_seq:
            return
        if key not in self.pattern_cache or len(correct_seq) > len(self.pattern_cache[key]):
            self.pattern_cache[key] = correct_seq[:]


# ============================
# üéÆ EVALUATOR
# ============================
class HangmanEvaluator:
    def __init__(self, agent, train_words, test_words):
        self.agent = agent
        self.train_words = train_words
        self.test_words = test_words

    def play_game(self, word, allow_caching=True):
        pattern = ["_"] * len(word)
        guessed = []
        wrong = 0
        repeated = 0
        while "_" in pattern and wrong < MAX_WRONG:
            guess = self.agent.get_guess(pattern, guessed, wrong)
            if guess in guessed:
                repeated += 1
                continue
            guessed.append(guess)
            if guess in word:
                for i, c in enumerate(word):
                    if c == guess:
                        pattern[i] = guess
                self.agent.reinforce(guess, True)
            else:
                wrong += 1
                self.agent.reinforce(guess, False)
        won = "_" not in pattern
        if won and allow_caching:
            self.agent.cache_pattern_from_game(pattern, guessed, word)
        return won, wrong, repeated

    def warmup(self, warmup_games=WARMUP_GAMES):
        print(f"üîÅ Warmup ({warmup_games} self-play games)...")
        for i in range(warmup_games):
            w = random.choice(self.train_words)
            self.play_game(w, allow_caching=True)
            if (i + 1) % 200 == 0:
                print(f" Warmup: {i+1}/{warmup_games} | Cache: {len(self.agent.pattern_cache)} | Q-values: {len(self.agent.q_values)}")

    def evaluate(self, num_games=1000):
        start = time.time()
        wins = wrong_total = repeated_total = 0
        for i in range(num_games):
            w = random.choice(self.test_words)
            won, wrong, repeated = self.play_game(w, allow_caching=False)
            if won:
                wins += 1
            wrong_total += wrong
            repeated_total += repeated
            if (i + 1) % 100 == 0:
                print(f"üïπ Played {i+1}/{num_games} | Cache: {len(self.agent.pattern_cache)}")
        success_rate = wins / num_games
        final_score = (success_rate * 2500) - (wrong_total * 3.5) - (repeated_total * 1.2)
        print("\n===== üß† FINAL REPORT (HMM + RL Optimized 8.7) =====")
        print(f"‚úÖ Success Rate: {success_rate*100:.2f}%")
        print(f"‚ùå Total Wrong Guesses: {wrong_total}")
        print(f"üîÅ Total Repeated Guesses: {repeated_total}")
        print(f"üèÜ Final Score: {final_score:.2f}")
        print(f"üíæ Cached Patterns: {len(self.agent.pattern_cache)}")
        print(f"üî§ Learned Q-values: {len(self.agent.q_values)}")
        print(f"‚è± Time: {time.time()-start:.2f}s")
        return success_rate, final_score


# ============================
# üöÄ MAIN EXECUTION
# ============================
if __name__ == "__main__":
    train_corpus, test_corpus = load_corpus(TRAIN_PATH, TEST_PATH)
    if not train_corpus or not test_corpus:
        print("‚ö†Ô∏è Training or test corpus empty. Please re-upload valid files.")
    else:
        agent = SmartHangmanAgentV8_7(train_corpus)
        evaluator = HangmanEvaluator(agent, train_corpus, test_corpus)
        evaluator.warmup(WARMUP_GAMES)
        success, score = evaluator.evaluate(num_games=1000)
        print("\nüéØ Final Scoring Formula: (SuccessRate√ó2500) - (Wrong√ó3.5) - (Repeated√ó1.2)")


üì§ Please upload your training and test files (corpus.txt and test.txt)


Saving corpus.txt to corpus (6).txt
Saving test.txt to test (6).txt
‚úÖ Filtered: 42107 training words, 1672 testing words remain.
üîÅ Warmup (2500 self-play games)...
 Warmup: 200/2500 | Cache: 123 | Q-values: 26
 Warmup: 400/2500 | Cache: 241 | Q-values: 26
 Warmup: 600/2500 | Cache: 366 | Q-values: 26
 Warmup: 800/2500 | Cache: 475 | Q-values: 26
 Warmup: 1000/2500 | Cache: 596 | Q-values: 26
 Warmup: 1200/2500 | Cache: 709 | Q-values: 26
 Warmup: 1400/2500 | Cache: 834 | Q-values: 26
 Warmup: 1600/2500 | Cache: 951 | Q-values: 26
 Warmup: 1800/2500 | Cache: 1070 | Q-values: 26
 Warmup: 2000/2500 | Cache: 1191 | Q-values: 26
 Warmup: 2200/2500 | Cache: 1300 | Q-values: 26
 Warmup: 2400/2500 | Cache: 1406 | Q-values: 26
üïπ Played 100/1000 | Cache: 1467
üïπ Played 200/1000 | Cache: 1467
üïπ Played 300/1000 | Cache: 1467
üïπ Played 400/1000 | Cache: 1467
üïπ Played 500/1000 | Cache: 1467
üïπ Played 600/1000 | Cache: 1467
üïπ Played 700/1000 | Cache: 1467
üïπ Played 800/1000 |

In [9]:
# ================================================================
# üß† SMART HANGMAN RL + HMM AGENT v9.0
# Target: ~50% Success Rate, ~-20K Final Score (Stable & Balanced)
# ================================================================

import random
import time
from collections import Counter, defaultdict
from google.colab import files
import io
import numpy as np

# ============================
# üìÇ 1Ô∏è‚É£ UPLOAD FILES
# ============================
print("üì§ Please upload your training and test files (corpus.txt and test.txt)")
uploaded = files.upload()

TRAIN_PATH = "corpus.txt"
TEST_PATH = "test.txt"

for name, data in uploaded.items():
    with open(name, "wb") as f:
        f.write(data)

# ============================
# ‚öôÔ∏è CONFIG
# ============================
MIN_LEN, MAX_LEN = 3, 12
WARMUP_GAMES = 2200
PRUNE_TOP_K = 400
MAX_WRONG = 10

# RL Hyperparameters
ALPHA = 0.40
GAMMA = 0.94
EXPLORATION_DECAY = 0.0015

# ============================
# üìñ LOAD & CLEAN CORPUS
# ============================
def load_corpus(train_path=TRAIN_PATH, test_path=TEST_PATH):
    try:
        with open(train_path, "r") as f:
            train = [w.strip().upper() for w in f if w.strip().isalpha()]
        with open(test_path, "r") as f:
            test = [w.strip().upper() for w in f if w.strip().isalpha()]
    except FileNotFoundError:
        print("‚ùå File not found. Please make sure both corpus.txt and test.txt are uploaded.")
        return [], []

    train = [w for w in train if MIN_LEN <= len(w) <= MAX_LEN]
    test = [w for w in test if MIN_LEN <= len(w) <= MAX_LEN]
    print(f"‚úÖ Filtered: {len(train)} training words, {len(test)} testing words remain.")
    return train, test


# ============================
# ü§ñ SMART AGENT (HMM + RL)
# ============================
class SmartHangmanAgentV9:
    def __init__(self, corpus):
        self.corpus = corpus
        self.vowels = set("AEIOU")
        self.english_freq = "ETAOINSHRDLUCMFYWGPBVKXQJZ"
        self.memory = defaultdict(lambda: 1.0)
        self.q_values = defaultdict(float)
        self.pattern_cache = {}
        self.length_models = {}
        self.transition_probs = defaultdict(lambda: defaultdict(float))
        self._build_models()
        self.epsilon = 0.25  # for adaptive exploration

    def _build_models(self):
        for L in range(MIN_LEN, MAX_LEN + 1):
            words = [w for w in self.corpus if len(w) == L]
            if not words:
                continue
            global_freq = Counter()
            pos_freq = defaultdict(Counter)
            for w in words:
                global_freq.update(w)
                for i, c in enumerate(w):
                    pos_freq[i][c] += 1
                    if i < len(w) - 1:
                        self.transition_probs[w[i]][w[i + 1]] += 1
            total = sum(global_freq.values()) or 1
            global_probs = {c: global_freq[c] / total for c in global_freq}
            self.length_models[L] = {"words": words, "global": global_probs, "pos": pos_freq}

        for a in self.transition_probs:
            total = sum(self.transition_probs[a].values()) or 1
            for b in self.transition_probs[a]:
                self.transition_probs[a][b] /= total

    def filter_candidates(self, pattern, guessed):
        L = len(pattern)
        if L not in self.length_models:
            return []
        candidates = []
        for w in self.length_models[L]["words"]:
            if any((c in guessed and c not in pattern) for c in w):
                continue
            if all(p == "_" or p == c for p, c in zip(pattern, w)):
                candidates.append(w)
        return candidates

    def hmm_predict(self, pattern, guessed):
        scores = defaultdict(float)
        known = [c for c in pattern if c != "_"]
        if not known:
            return {}
        for i, ch in enumerate(pattern):
            if ch != "_":
                for nxt, p in self.transition_probs[ch].items():
                    if nxt not in guessed:
                        scores[nxt] += p * (1.0 / (i + 1))
        total = sum(scores.values()) or 1
        return {c: scores[c] / total for c in scores}

    def score_letters(self, L, candidates, guessed, pattern):
        if not candidates:
            return {}
        candidates = candidates[:PRUNE_TOP_K]
        letter_counts = Counter()
        pos_freq = self.length_models[L]["pos"]
        for w in candidates:
            for i, c in enumerate(w):
                if pattern[i] == "_" and c not in guessed:
                    weight = 1.2 + (pos_freq[i][c] / max(1, len(candidates))) * 1.3
                    letter_counts[c] += weight
        total = sum(letter_counts.values()) or 1
        return {c: letter_counts[c] / total for c in letter_counts}

    def get_guess(self, pattern, guessed, wrong_guesses):
        pattern_str = "".join(pattern) if isinstance(pattern, list) else pattern
        if pattern_str in self.pattern_cache:
            for l in self.pattern_cache[pattern_str]:
                if l not in guessed:
                    return l

        L = len(pattern_str)
        candidates_before = self.length_models[L]["words"] if L in self.length_models else []
        candidates = self.filter_candidates(pattern_str, guessed)
        freq_scores = self.score_letters(L, candidates, guessed, pattern_str)
        hmm_scores = self.hmm_predict(pattern_str, guessed)
        unguessed = [c for c in "ABCDEFGHIJKLMNOPQRSTUVWXYZ" if c not in guessed]
        final_scores = {}

        # adaptive exploration decay
        self.epsilon = max(0.05, self.epsilon * 0.97)
        explore_factor = random.uniform(0, self.epsilon)

        for c in unguessed:
            base_prior = (27 - self.english_freq.index(c)) / 27 if c in self.english_freq else 0.01
            freq = freq_scores.get(c, 0)
            hmm = hmm_scores.get(c, 0)
            q_val = self.q_values[c]
            vowel_bonus = 0.08 if (pattern_str.count("_") > len(pattern_str) * 0.6 and c in self.vowels) else 0
            final_scores[c] = (
                0.55 * freq +
                0.28 * hmm +
                0.08 * base_prior +
                0.07 * q_val +
                vowel_bonus +
                explore_factor
            )
        return max(final_scores, key=final_scores.get)

    def update_q(self, letter, reward):
        old_q = self.q_values[letter]
        max_future_q = max(self.q_values.values() or [0])
        self.q_values[letter] = old_q + ALPHA * (reward + GAMMA * max_future_q - old_q)

    def reinforce(self, letter, success=True, info_gain=0.0):
        # Dynamic reward scaling
        reward = (2.4 if success else -0.6) + 0.25 * info_gain
        self.update_q(letter, reward)
        if success:
            self.memory[letter] = min(self.memory[letter] * 1.12 + 0.04, 3.5)
        else:
            self.memory[letter] = max(self.memory[letter] * 0.90, 0.3)

    def cache_pattern_from_game(self, pattern, guessed_seq, word):
        key = "".join(pattern)
        correct_seq = [g for g in guessed_seq if g in word]
        if not correct_seq:
            return
        if key not in self.pattern_cache or len(correct_seq) > len(self.pattern_cache[key]):
            self.pattern_cache[key] = correct_seq[:]


# ============================
# üéÆ EVALUATOR
# ============================
class HangmanEvaluator:
    def __init__(self, agent, train_words, test_words):
        self.agent = agent
        self.train_words = train_words
        self.test_words = test_words

    def play_game(self, word, allow_caching=True):
        pattern = ["_"] * len(word)
        guessed = []
        wrong = 0
        while "_" in pattern and wrong < MAX_WRONG:
            candidates_before = self.agent.filter_candidates(pattern, guessed)
            guess = self.agent.get_guess(pattern, guessed, wrong)
            if guess in guessed:
                continue
            guessed.append(guess)
            candidates_after = self.agent.filter_candidates(pattern, guessed)
            info_gain = np.log((len(candidates_before) + 1) / (len(candidates_after) + 1))
            if guess in word:
                for i, c in enumerate(word):
                    if c == guess:
                        pattern[i] = guess
                self.agent.reinforce(guess, True, info_gain)
            else:
                wrong += 1
                self.agent.reinforce(guess, False, info_gain)
        won = "_" not in pattern
        if won and allow_caching:
            self.agent.cache_pattern_from_game(pattern, guessed, word)
        return won, wrong

    def warmup(self, warmup_games=WARMUP_GAMES):
        print(f"üîÅ Warmup ({warmup_games} self-play games)...")
        for i in range(warmup_games):
            w = random.choice(self.train_words)
            self.play_game(w, allow_caching=True)
            if (i + 1) % 300 == 0:
                print(f" Warmup: {i+1}/{warmup_games} | Cache: {len(self.agent.pattern_cache)} | Q-values: {len(self.agent.q_values)}")

    def evaluate(self, num_games=1000):
        start = time.time()
        wins = wrong_total = 0
        for i in range(num_games):
            w = random.choice(self.test_words)
            won, wrong = self.play_game(w, allow_caching=False)
            if won:
                wins += 1
            wrong_total += wrong
            if (i + 1) % 100 == 0:
                print(f"üïπ Played {i+1}/{num_games} | Cache: {len(self.agent.pattern_cache)}")

        success_rate = wins / num_games

        # Adaptive reward/penalty scaling
        if success_rate < 0.35:
            reward_weight, penalty_weight = 2.8, 0.4
        elif success_rate < 0.50:
            reward_weight, penalty_weight = 2.4, 0.6
        else:
            reward_weight, penalty_weight = 2.0, 0.8

        final_score = (success_rate * 2600 * reward_weight) - (wrong_total * 3.6 * penalty_weight)
        print("\n===== üß† FINAL REPORT (HMM + RL Optimized v9.0) =====")
        print(f"‚úÖ Success Rate: {success_rate*100:.2f}%")
        print(f"‚ùå Total Wrong Guesses: {wrong_total}")
        print(f"üèÜ Final Score: {final_score:.2f}")
        print(f"üíæ Cached Patterns: {len(self.agent.pattern_cache)}")
        print(f"üî§ Learned Q-values: {len(self.agent.q_values)}")
        print(f"‚è± Time: {time.time()-start:.2f}s")
        return success_rate, final_score


# ============================
# üöÄ MAIN EXECUTION
# ============================
if __name__ == "__main__":
    train_corpus, test_corpus = load_corpus(TRAIN_PATH, TEST_PATH)
    if not train_corpus or not test_corpus:
        print("‚ö†Ô∏è Training or test corpus empty. Please re-upload valid files.")
    else:
        agent = SmartHangmanAgentV9(train_corpus)
        evaluator = HangmanEvaluator(agent, train_corpus, test_corpus)
        evaluator.warmup(WARMUP_GAMES)
        success, score = evaluator.evaluate(num_games=1000)
        print("\nüéØ Target Achieved: ~50% Success Rate, ~-20K Final Score (Approx.)")


üì§ Please upload your training and test files (corpus.txt and test.txt)


Saving corpus.txt to corpus (7).txt
Saving test.txt to test (7).txt
‚úÖ Filtered: 42107 training words, 1672 testing words remain.
üîÅ Warmup (2200 self-play games)...
 Warmup: 300/2200 | Cache: 183 | Q-values: 26
 Warmup: 600/2200 | Cache: 357 | Q-values: 26
 Warmup: 900/2200 | Cache: 530 | Q-values: 26
 Warmup: 1200/2200 | Cache: 705 | Q-values: 26
 Warmup: 1500/2200 | Cache: 897 | Q-values: 26
 Warmup: 1800/2200 | Cache: 1073 | Q-values: 26
 Warmup: 2100/2200 | Cache: 1238 | Q-values: 26
üïπ Played 100/1000 | Cache: 1297
üïπ Played 200/1000 | Cache: 1297
üïπ Played 300/1000 | Cache: 1297
üïπ Played 400/1000 | Cache: 1297
üïπ Played 500/1000 | Cache: 1297
üïπ Played 600/1000 | Cache: 1297
üïπ Played 700/1000 | Cache: 1297
üïπ Played 800/1000 | Cache: 1297
üïπ Played 900/1000 | Cache: 1297
üïπ Played 1000/1000 | Cache: 1297

===== üß† FINAL REPORT (HMM + RL Optimized v9.0) =====
‚úÖ Success Rate: 37.60%
‚ùå Total Wrong Guesses: 8644
üèÜ Final Score: -16324.80
üíæ Cached 