In [None]:
import numpy as np
import re
from collections import defaultdict, Counter
import random
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import random
from collections import deque

class AdvancedHMMHangman:
    def __init__(self, corpus_file):
        self.corpus_file = corpus_file
        self.words = []
        self.hmm_models = {}
        self.trigram_models = {}
        self.alphabet = 'abcdefghijklmnopqrstuvwxyz'
        self.vocab_size = len(self.alphabet)
        self.letter_freq = None
        self.bigram_probs = None
        self.trigram_probs = None

    def load_corpus(self):
        """Load and preprocess the corpus"""
        print("Loading corpus...")
        try:
            with open(self.corpus_file, 'r') as f:
                self.words = [word.strip().lower() for word in f.readlines()]
        except FileNotFoundError:
            print(f"Error: Corpus file '{self.corpus_file}' not found.")
            # Try to load from a default path if it exists, or raise error
            # For this example, we'll assume it's in the same directory.
            # In a real scenario, you might have a fallback or user input.
            return

        self.words = [word for word in self.words if re.match('^[a-z]+$', word)]
        print(f"Loaded {len(self.words)} valid words")
        all_letters = ''.join(self.words)
        self.letter_freq = Counter(all_letters)
        total_letters = len(all_letters)
        self.letter_probs = {char: count/total_letters for char, count in self.letter_freq.items()}

        self._calculate_bigram_probs()
        self._calculate_trigram_probs()

    def _calculate_bigram_probs(self):
        """Calculate bigram probabilities across all words"""
        print("Calculating bigram probabilities...")
        bigram_counts = defaultdict(lambda: defaultdict(int))

        for word in self.words:
            for i in range(len(word) - 1):
                current_char = word[i]
                next_char = word[i + 1]
                bigram_counts[current_char][next_char] += 1

        self.bigram_probs = defaultdict(dict)
        for char1 in bigram_counts:
            total = sum(bigram_counts[char1].values())
            for char2 in bigram_counts[char1]:
                self.bigram_probs[char1][char2] = bigram_counts[char1][char2] / total

        print("Bigram probabilities calculated")

    def _calculate_trigram_probs(self):
        """Calculate trigram probabilities across all words"""
        print("Calculating trigram probabilities...")
        trigram_counts = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))

        for word in self.words:
            for i in range(len(word) - 2):
                char1 = word[i]
                char2 = word[i + 1]
                char3 = word[i + 2]
                trigram_counts[char1][char2][char3] += 1

        self.trigram_probs = defaultdict(lambda: defaultdict(dict))
        for char1 in trigram_counts:
            for char2 in trigram_counts[char1]:
                total = sum(trigram_counts[char1][char2].values())
                for char3 in trigram_counts[char1][char2]:
                    self.trigram_probs[char1][char2][char3] = trigram_counts[char1][char2][char3] / total if total > 0 else 0

        print("Trigram probabilities calculated")

    def train_hmm(self):
        """Train enhanced HMM with trigram features"""
        print("\nTraining Enhanced HMM models with trigrams...")

        words_by_length = defaultdict(list)
        for word in self.words:
            words_by_length[len(word)].append(word)

        for length, word_list in words_by_length.items():
            if len(word_list) < 5:
                continue

            print(f"Training HMM for word length {length} with {len(word_list)} words")

            # Enhanced transition matrices including trigrams
            transition = np.ones((length, self.vocab_size, self.vocab_size)) * 1e-10
            trigram_transition = np.ones((length, self.vocab_size, self.vocab_size, self.vocab_size)) * 1e-10
            emission = np.ones((length, self.vocab_size)) * 1e-10
            position_freq = np.ones((length, self.vocab_size)) * 1e-10

            for word in word_list:
                indices = [ord(c) - ord('a') for c in word]

                for pos, char_idx in enumerate(indices):
                    emission[pos, char_idx] += 1
                    position_freq[pos, char_idx] += 1

                # Bigram transitions
                for i in range(len(indices) - 1):
                    current_char = indices[i]
                    next_char = indices[i + 1]
                    transition[i, current_char, next_char] += 1

                # Trigram transitions
                for i in range(len(indices) - 2):
                    char1 = indices[i]
                    char2 = indices[i + 1]
                    char3 = indices[i + 2]
                    trigram_transition[i, char1, char2, char3] += 1

            # Normalization
            for pos in range(length):
                if emission[pos].sum() > 0:
                    emission[pos] /= emission[pos].sum()
                if position_freq[pos].sum() > 0:
                    position_freq[pos] /= position_freq[pos].sum()
                for i in range(self.vocab_size):
                    if transition[pos, i].sum() > 0:
                        transition[pos, i] /= transition[pos, i].sum()
                    for j in range(self.vocab_size):
                        if trigram_transition[pos, i, j].sum() > 0:
                            trigram_transition[pos, i, j] /= trigram_transition[pos, i, j].sum()

            self.hmm_models[length] = {
                'transition': transition,
                'trigram_transition': trigram_transition,
                'emission': emission,
                'position_freq': position_freq,
                'initial': self._compute_initial_probs(word_list),
                'word_count': len(word_list)
            }

        print(f"Trained HMMs for {len(self.hmm_models)} different word lengths")

    def _compute_initial_probs(self, word_list):
        """Compute initial letter probabilities"""
        initial_counts = np.zeros(self.vocab_size)
        for word in word_list:
            if word:
                initial_counts[ord(word[0]) - ord('a')] += 1
        if initial_counts.sum() > 0:
            initial_counts /= initial_counts.sum()
        return initial_counts

    def get_letter_probabilities(self, masked_word, guessed_letters):
        """
        Enhanced probability calculation with trigram features
        """
        length = len(masked_word)

        # If no HMM for this length, use fallback
        if length not in self.hmm_models:
            return self._smart_fallback(guessed_letters)

        model = self.hmm_models[length]
        probs = np.zeros(self.vocab_size)
        position_scores = np.zeros(self.vocab_size)

        # Enhanced pattern analysis with trigrams
        for pos, char in enumerate(masked_word):
            if char == '_':
                # Emission probability
                position_scores += model['emission'][pos]

                # Bigram context
                if pos > 0 and masked_word[pos-1] != '_':
                    prev_char_idx = ord(masked_word[pos-1]) - ord('a')
                    position_scores += model['transition'][pos-1, prev_char_idx]

                # Trigram context
                if pos > 1 and masked_word[pos-2] != '_' and masked_word[pos-1] != '_':
                    char1_idx = ord(masked_word[pos-2]) - ord('a')
                    char2_idx = ord(masked_word[pos-1]) - ord('a')
                    position_scores += model['trigram_transition'][pos-2, char1_idx, char2_idx]

        pattern_scores = self._pattern_matching(masked_word, guessed_letters, length)

        freq_scores = np.array([self.letter_probs.get(chr(i + ord('a')), 0)
                               for i in range(self.vocab_size)])

        # Weighted combination of features
        probs = (0.25 * position_scores +
                 0.35 * pattern_scores +
                 0.20 * freq_scores +
                 0.20 * self._trigram_context_score(masked_word))

        probs = self._apply_guessed_letters_filter(probs, guessed_letters)

        return probs

    def _trigram_context_score(self, masked_word):
        """Calculate trigram-based context scores"""
        scores = np.zeros(self.vocab_size)
        length = len(masked_word)

        for i in range(length):
            if masked_word[i] == '_':
                # Look for trigram patterns
                if i >= 2 and masked_word[i-2] != '_' and masked_word[i-1] != '_':
                    char1 = masked_word[i-2]
                    char2 = masked_word[i-1]
                    for char3_idx in range(self.vocab_size):
                        char3 = chr(char3_idx + ord('a'))
                        if char1 in self.trigram_probs and char2 in self.trigram_probs[char1]:
                            scores[char3_idx] += self.trigram_probs[char1][char2].get(char3, 0)

                # Also look ahead
                if i < length - 2 and masked_word[i+1] != '_' and masked_word[i+2] != '_':
                    char2 = masked_word[i+1]
                    char3 = masked_word[i+2]
                    for char1_idx in range(self.vocab_size):
                        char1 = chr(char1_idx + ord('a'))
                        if char1 in self.trigram_probs and char2 in self.trigram_probs[char1]:
                            scores[char1_idx] += self.trigram_probs[char1][char2].get(char3, 0)

        if scores.sum() > 0:
            scores /= scores.sum()
        return scores

    def _pattern_matching(self, masked_word, guessed_letters, length):
        """Enhanced pattern matching with trigram consideration"""
        if length not in self.hmm_models:
            return np.ones(self.vocab_size) / self.vocab_size

        # Regex pattern
        pattern = masked_word.replace('_', '[a-z]')
        matching_words = [word for word in self.words
                         if len(word) == length and re.match(pattern, word)]

        if not matching_words:
            return np.ones(self.vocab_size) / self.vocab_size

        letter_counts = Counter()
        for word in matching_words:
            letter_counts.update(word)

        total = sum(letter_counts.values())
        pattern_scores = np.zeros(self.vocab_size)
        for i in range(self.vocab_size):
            letter = chr(i + ord('a'))
            pattern_scores[i] = letter_counts[letter] / total if total > 0 else 0

        return pattern_scores

    def _smart_fallback(self, guessed_letters):
        """Smart fallback using letter frequency and vowel/consonant patterns"""
        probs = np.array([self.letter_probs.get(chr(i + ord('a')), 0)
                         for i in range(self.vocab_size)])

        vowels = {'a', 'e', 'i', 'o', 'u'}
        guessed_vowels = len([l for l in guessed_letters if l in vowels])
        if guessed_vowels < 2:
            for i, letter in enumerate(self.alphabet):
                if letter in vowels:
                    probs[i] *= 2

        return self._apply_guessed_letters_filter(probs, guessed_letters)

    def _apply_guessed_letters_filter(self, probs, guessed_letters):
        """Set probability to 0 for already guessed letters and normalize"""
        for letter in guessed_letters:
            if letter in self.alphabet:
                idx = ord(letter) - ord('a')
                probs[idx] = 0

        if probs.sum() > 0:
            probs /= probs.sum()
        else:
            probs = np.ones(self.vocab_size)
            for letter in guessed_letters:
                if letter in self.alphabet:
                    idx = ord(letter) - ord('a')
                    probs[idx] = 0
            if probs.sum() > 0:
                probs /= probs.sum()
            else:
                probs = np.ones(self.vocab_size) / self.vocab_size

        return probs

    def analyze_corpus(self):
        """Comprehensive corpus analysis"""
        print("\n" + "="*50)
        print("CORPUS ANALYSIS")
        print("="*50)

        # Letter frequency
        print("\nTop 15 most common letters:")
        if self.letter_freq:
            for letter, count in self.letter_freq.most_common(15):
                print(f"{letter}: {count} ({count/len(''.join(self.words))*100:.2f}%)")
        else:
            print("Corpus not loaded, no frequencies to show.")

        # Word length distribution
        if self.words:
            word_lengths = [len(word) for word in self.words]
            print(f"\nWord length statistics:")
            print(f"Min: {min(word_lengths)}, Max: {max(word_lengths)}, Avg: {np.mean(word_lengths):.2f}")
        else:
            print("\nCorpus not loaded, no word length stats.")

        # HMM coverage
        print(f"\nHMM coverage: {len(self.hmm_models)} different word lengths")
        for length in sorted(self.hmm_models.keys()):
            count = self.hmm_models[length]['word_count']
            print(f"Length {length}: {count} words")

        return self.letter_freq

class HangmanEnvironment:
    """Hangman game environment for RL"""
    def __init__(self, word_list, max_wrong=6):
        self.word_list = word_list
        self.max_wrong = max_wrong
        self.reset()

    def reset(self):
        """Reset the environment"""
        self.target_word = random.choice(self.word_list)
        self.masked_word = ['_'] * len(self.target_word)
        self.guessed_letters = set()
        self.wrong_guesses = 0
        self.done = False
        self.reward = 0
        return self._get_state()

    def _get_state(self):
        """Get current state representation (ENVIRONMENT STATE ONLY)"""
        # Convert masked word to numerical representation
        masked_vec = [ord(c) - ord('a') if c != '_' else 26 for c in self.masked_word]

        # Convert guessed letters to binary vector
        guessed_vec = [1 if chr(i + ord('a')) in self.guessed_letters else 0
                      for i in range(26)]

        # Normalize lengths for neural network
        max_length = 15
        if len(masked_vec) < max_length:
            masked_vec += [27] * (max_length - len(masked_vec))
        else:
            masked_vec = masked_vec[:max_length]

        state = masked_vec + guessed_vec + [self.wrong_guesses / self.max_wrong]
        return np.array(state, dtype=np.float32)

    def step(self, action, hmm_model):
        """Take an action (guess a letter)"""
        letter = chr(action + ord('a'))
        reward = 0

        if letter in self.guessed_letters:
            # Repeated guess penalty
            reward = -2
            self.reward = reward
            return self._get_state(), reward, self.done, {"reason": "repeated"}

        self.guessed_letters.add(letter)

        if letter in self.target_word:
            # Correct guess
            reward = 1
            for i, char in enumerate(self.target_word):
                if char == letter:
                    self.masked_word[i] = letter
        else:
            # Wrong guess
            self.wrong_guesses += 1
            reward = -1

        # Check if game is over
        if '_' not in self.masked_word:
            self.done = True
            reward = 10  # Winning bonus
        elif self.wrong_guesses >= self.max_wrong:
            self.done = True
            reward = -5  # Losing penalty

        self.reward = reward
        return self._get_state(), reward, self.done, {}

class DQN(nn.Module):
    """Deep Q-Network for Hangman - This network now learns the ADVANTAGE"""
    def __init__(self, state_size, action_size, hidden_size=128):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(state_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, action_size)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        # Output is the "Advantage" (A(s,a))
        return self.fc3(x)

class RLHangmanAgent:
    """RL Agent for Hangman - Now uses a truly hybrid Advantage-Learning model"""
    def __init__(self, state_size, action_size, hmm_model, lr=0.001, gamma=0.99):
        self.state_size = state_size
        self.action_size = action_size
        self.hmm_model = hmm_model
        self.gamma = gamma
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.9994 # Slower decay

        self.batch_size = 32
        self.memory = deque(maxlen=20000) # Increased memory

        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        print(f"RL Agent using device: {self.device}")

        # The DQN learns the "Advantage" A(s,a)
        self.model = DQN(state_size, action_size).to(self.device)
        self.optimizer = optim.Adam(self.model.parameters(), lr=lr)
        self.criterion = nn.MSELoss()

    def remember(self, state, action, reward, next_state, done):
        """Store experience in memory"""
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state, masked_word, guessed_letters, verbose=False):
        """Choose action using epsilon-greedy policy"""

        def print_top_k(probs, k=5, title=""):
            print(f"    {title}:")
            indices = np.argsort(probs)[-k:][::-1]
            for i in indices:
                if probs[i] > -np.inf and probs[i] > 0.0001:
                    print(f"      {chr(i + ord('a'))}: {probs[i]:.4f}")

        # Exploration: Use HMM's probabilities to pick an action
        if np.random.random() <= self.epsilon:
            probs = self.hmm_model.get_letter_probabilities(masked_word, guessed_letters)
            if verbose:
                print("  -> Decision: EXPLORATION (HMM-guided random choice)")
                print_top_k(probs, k=5, title="HMM Probabilities")

            if probs.sum() > 0:
                return np.random.choice(self.action_size, p=probs)
            else: # Fallback if all probs are zero
                available_actions = [i for i in range(self.action_size) if chr(i + ord('a')) not in guessed_letters]
                return random.choice(available_actions) if available_actions else 0

        # Exploitation: Use Hybrid Q-Value
        else:
            # 1. Get HMM Probabilities (The "Baseline")
            # We use state[-26:] which is the hmm_probs we stored in the state vector
            hmm_probs = state[-26:]

            # 2. Get DQN Advantage (The "Correction")
            state_tensor = torch.from_numpy(state).float().unsqueeze(0).to(self.device)
            self.model.eval()
            with torch.no_grad():
                advantage_values = self.model(state_tensor).cpu().data.numpy()[0]
            self.model.train()

            # 3. Create the Hybrid Q-Value
            # Q(s,a) = P(a|s) + A(s,a)
            hybrid_q_values = hmm_probs + advantage_values

            if verbose:
                print("  -> Decision: EXPLOITATION (Hybrid Q-Value)")
                print_top_k(hmm_probs, k=5, title="HMM Probabilities (Baseline)")
                print_top_k(advantage_values, k=5, title="DQN Advantage (Correction)")
                print_top_k(hybrid_q_values, k=5, title="Hybrid Q-Value (Final)")

            # 4. Mask and choose the best action
            for i in range(self.action_size):
                if chr(i + ord('a')) in guessed_letters:
                    hybrid_q_values[i] = -np.inf

            return np.argmax(hybrid_q_values)

    def replay(self):
        """Train the model on remembered experiences"""
        if len(self.memory) < self.batch_size:
            return

        batch = random.sample(self.memory, self.batch_size)

        states = torch.from_numpy(np.array([e[0] for e in batch])).float().to(self.device)
        actions = torch.LongTensor([e[1] for e in batch]).to(self.device)
        rewards = torch.FloatTensor([e[2] for e in batch]).to(self.device)
        next_states = torch.from_numpy(np.array([e[3] for e in batch])).float().to(self.device)
        dones = torch.BoolTensor([e[4] for e in batch]).to(self.device)

        # --- Get Q-values for CURRENT state ---
        # 1. Get Advantage A(s,a) from the model
        current_advantage = self.model(states)
        # 2. Get HMM Probs P(a|s) from the state vector
        current_hmm_probs = states[:, -26:]
        # 3. Q(s,a) = P(a|s) + A(s,a)
        current_hybrid_q = current_hmm_probs + current_advantage
        # 4. Get the Q-value for the action that was actually taken
        current_q = current_hybrid_q.gather(1, actions.unsqueeze(1))

        # --- Get Q-values for NEXT state (for Bellman update) ---
        # 1. Get Advantage A(s',a) from the model
        next_advantage = self.model(next_states).detach()
        # 2. Get HMM Probs P(a|s') from the next_state vector
        next_hmm_probs = next_states[:, -26:]
        # 3. Q(s',a) = P(a|s') + A(s',a)
        next_hybrid_q = next_hmm_probs + next_advantage
        # 4. Find the max Q-value for the next state
        next_q = next_hybrid_q.max(1)[0]

        # --- Calculate Target and Loss ---
        target_q = rewards + (self.gamma * next_q * ~dones)

        loss = self.criterion(current_q.squeeze(), target_q)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

class HybridHangmanPlayer:
    """Hybrid player combining HMM and RL"""
    def __init__(self, hmm_model, use_rl=True):
        self.hmm_model = hmm_model
        self.use_rl = use_rl

        if use_rl:
            # **HYBRID STATE FIX**: State size = 15 (masked) + 26 (guessed) + 1 (wrong) + 26 (HMM probs)
            state_size = 68
            action_size = 26
            self.rl_agent = RLHangmanAgent(state_size, action_size, hmm_model)
            self.env = HangmanEnvironment(hmm_model.words)

        self.games_played = 0
        self.games_won = 0
        self.total_wrong_guesses = 0
        self.total_repeated_guesses = 0

    def train_rl(self, episodes=1000):
        """Train the RL agent"""
        if not self.use_rl or not hasattr(self, 'rl_agent'):
            print("RL agent not initialized. Skipping training.")
            return []

        print(f"Training RL agent for {episodes} episodes...")
        rewards = []

        for episode in range(episodes):
            env_state = self.env.reset()
            total_reward = 0
            done = False

            while not done:
                # Get current game state for HMM
                masked_word = ''.join(self.env.masked_word)
                guessed_letters = self.env.guessed_letters

                # **HYBRID STATE FIX**: Create the FULL hybrid state
                hmm_probs = self.hmm_model.get_letter_probabilities(masked_word, guessed_letters)
                full_state = np.concatenate((env_state, hmm_probs))

                # Pass verbose=False during training
                action = self.rl_agent.act(full_state, masked_word, guessed_letters, verbose=False)
                env_next_state, reward, done, info = self.env.step(action, self.hmm_model)

                if info.get("reason") == "repeated":
                    self.total_repeated_guesses += 1

                # **HYBRID STATE FIX**: Create the FULL next hybrid state
                next_masked_word = ''.join(self.env.masked_word)
                next_guessed_letters = self.env.guessed_letters
                next_hmm_probs = self.hmm_model.get_letter_probabilities(next_masked_word, next_guessed_letters)
                full_next_state = np.concatenate((env_next_state, next_hmm_probs))

                self.rl_agent.remember(full_state, action, reward, full_next_state, done)
                env_state = env_next_state # The env state for the next loop
                total_reward += reward

            self.rl_agent.replay()
            rewards.append(total_reward)

            if (episode+1) % 100 == 0:
                avg_reward = np.mean(rewards[-100:])
                print(f"Episode {episode+1}, Average Reward: {avg_reward:.2f}, Epsilon: {self.rl_agent.epsilon:.3f}")

        return rewards

    def play_game(self, target_word, max_wrong=6, verbose=False, use_rl=None):
        """Play a single game"""
        if use_rl is None:
            use_rl = self.use_rl

        # Reset internal game state for play_game
        masked_word = ['_'] * len(target_word)
        guessed_letters = set()
        wrong_guesses = 0
        repeated_guesses = 0
        steps = 0

        if verbose:
            print(f"\nTarget word: {target_word}")
            print(f"Starting game: {' '.join(masked_word)}")

        while wrong_guesses < max_wrong and '_' in masked_word:
            steps += 1
            letter = ''

            if use_rl and hasattr(self, 'rl_agent'):
                if verbose:
                    print(f"--- Step {steps} (Epsilon: {self.rl_agent.epsilon:.3f}) ---")

                # **HYBRID STATE FIX**: Get the full hybrid state
                state = self._get_rl_state(masked_word, guessed_letters, wrong_guesses, max_wrong)
                # Pass verbose flag to 'act' method for detailed logging
                action = self.rl_agent.act(state, ''.join(masked_word), guessed_letters, verbose=verbose)
                letter = chr(action + ord('a'))

                if verbose:
                    print(f"  -> Agent's Final Choice: '{letter}'")
            else:
                # Use HMM only
                probs = self.hmm_model.get_letter_probabilities(''.join(masked_word), guessed_letters)
                best_letter_idx = np.argmax(probs)
                letter = chr(best_letter_idx + ord('a'))

                if verbose:
                    top_choices = []
                    for i in np.argsort(probs)[-3:][::-1]:
                        if probs[i] > 0:
                            top_choices.append(f"{chr(i + ord('a'))}({probs[i]:.3f})")
                    print(f"Step {steps}: HMM guessing '{letter}' from {top_choices}")

            if letter in guessed_letters:
                repeated_guesses += 1
                if verbose:
                    print(f"  Repeated guess! ('{letter}')")
                continue

            guessed_letters.add(letter)

            if letter in target_word:
                # Reveal the letter
                for i, char in enumerate(target_word):
                    if char == letter:
                        masked_word[i] = letter
                if verbose:
                    print(f"  Correct! Word: {' '.join(masked_word)}")
            else:
                wrong_guesses += 1
                if verbose:
                    print(f"  Wrong! Mistakes: {wrong_guesses}/{max_wrong}")

        won = '_' not in masked_word
        if verbose:
            if won:
                print(f"Won in {steps} steps!")
            else:
                print(f" Lost! Word was: {target_word}")

        # Note: This tracks stats for the *class instance* if you were to reuse it.
        # The evaluate() method resets its own counters, which is correct.
        self.games_played += 1
        if won:
            self.games_won += 1
        self.total_wrong_guesses += wrong_guesses
        self.total_repeated_guesses += repeated_guesses

        return won, steps, wrong_guesses, repeated_guesses

    def _get_rl_state(self, masked_word, guessed_letters, wrong_guesses, max_wrong):
        """Get RL state representation (FULL HYBRID STATE)"""
        # --- Get Base Environment State ---
        masked_vec = [ord(c) - ord('a') if c != '_' else 26 for c in masked_word]
        guessed_vec = [1 if chr(i + ord('a')) in guessed_letters else 0 for i in range(26)]

        max_length = 15
        if len(masked_vec) < max_length:
            masked_vec += [27] * (max_length - len(masked_vec))
        else:
            masked_vec = masked_vec[:max_length]

        env_state = masked_vec + guessed_vec + [wrong_guesses / max_wrong]
        env_state_np = np.array(env_state, dtype=np.float32)

        # --- Get HMM Probability State ---
        hmm_probs = self.hmm_model.get_letter_probabilities(''.join(masked_word), guessed_letters)

        # --- Concatenate to create FULL HYBRID STATE ---
        full_state = np.concatenate((env_state_np, hmm_probs))
        return full_state

    def evaluate(self, word_list, num_games=1000, verbose=False, use_rl=None):
        """Evaluate the player"""
        if use_rl is None:
            use_rl = self.use_rl

        # Ensure RL agent is in evaluation mode (epsilon=0) for fair comparison
        original_epsilon = -1
        if use_rl and hasattr(self, 'rl_agent'):
            original_epsilon = self.rl_agent.epsilon
            self.rl_agent.epsilon = 0.0 # Pure exploitation
            print(f"\nEvaluating RL-Player (Epsilon=0) on {num_games} games...")
        else:
            print(f"\nEvaluating HMM-Player on {num_games} games...")

        wins = 0
        total_steps = 0
        total_wrong_guesses = 0
        total_repeated_guesses = 0

        test_words = random.sample(word_list, min(num_games, len(word_list)))

        for i, word in enumerate(test_words):
            if verbose and (i == 0 or (i+1) % 100 == 0):
                print(f"Played {i+1}/{len(test_words)} games...")

            won, steps, wrong_guesses, repeated_guesses = self.play_game(
                word, verbose=False, use_rl=use_rl
            )

            if won:
                wins += 1
            total_steps += steps
            total_wrong_guesses += wrong_guesses
            total_repeated_guesses += repeated_guesses

        num_games_played = len(test_words)
        success_rate = wins / num_games_played
        avg_steps = total_steps / num_games_played
        avg_wrong = total_wrong_guesses / num_games_played
        avg_repeated = total_repeated_guesses / num_games_played

        # Calculate final score according to the problem statement
        final_score = (success_rate * 2000) - (total_wrong_guesses * 5) - (total_repeated_guesses * 2)

        print(f"\n{'RL-' if use_rl else 'HMM-'}Player Results:")
        print(f"Success Rate: {success_rate:.3f} ({wins}/{num_games_played})")
        print(f"Average Steps per Game: {avg_steps:.2f}")
        print(f"Average Wrong Guesses: {avg_wrong:.2f}")
        print(f"Average Repeated Guesses: {avg_repeated:.2f}")
        print(f"Final Score: {final_score:.2f}")

        # Restore original epsilon if it was changed
        if original_epsilon != -1:
            self.rl_agent.epsilon = original_epsilon
            print(f"Restored RL Agent epsilon to {self.rl_agent.epsilon:.3f}")

        return success_rate, avg_steps, avg_wrong, avg_repeated, final_score

def demo_hybrid_solution():
    print("HYBRID HMM-RL SOLUTION")
    print("="*60)

    # Initialize and train HMM
    hmm_solver = AdvancedHMMHangman('corpus.txt')
    hmm_solver.load_corpus()
    if not hmm_solver.words:
        print("Corpus loading failed. Exiting.")
        return None, None
    hmm_solver.train_hmm()
    hmm_solver.analyze_corpus()

    # Create hybrid player
    hybrid_player = HybridHangmanPlayer(hmm_solver, use_rl=True)

    # Train RL component
    print("\n" + "="*60)
    print("TRAINING RL AGENT")
    print("="*60)
    rewards = hybrid_player.train_rl(episodes=10000)

    # Demo games
    print("\n" + "="*60)
    print("DETAILED RL-HYBRID DEMO")
    print("="*60)

    demo_words = ['python', 'hangman', 'computer', 'algorithm', 'machine']
    for word in demo_words:
        print(f"\nPlaying with word: {word} (RL mode - Epsilon=0 for Demo)")
        if hasattr(hybrid_player, 'rl_agent'):
            original_epsilon = hybrid_player.rl_agent.epsilon
            hybrid_player.rl_agent.epsilon = 0.0 # Force exploitation for demo

        hybrid_player.play_game(word, verbose=True, use_rl=True)

        if hasattr(hybrid_player, 'rl_agent'):
            hybrid_player.rl_agent.epsilon = original_epsilon # Restore

    # Evaluation
    print("\n" + "="*60)
    print("COMPREHENSIVE EVALUATION")
    print("="*60)

    try:
        with open('test.txt', 'r') as f:
            test_words = [word.strip().lower() for word in f.readlines()]
        test_words = [word for word in test_words if re.match('^[a-z]+$', word)]
        print(f"Loaded {len(test_words)} test words")
    except FileNotFoundError:
        print("test.txt not found. Using training words for evaluation (not recommended).")
        test_words = hmm_solver.words

    if not test_words:
        print("No test words available. Skipping evaluation.")
        return hmm_solver, hybrid_player

    # Evaluate RL hybrid
    print("\nRL-Hybrid Evaluation (Epsilon=0.0):")
    rl_success, rl_steps, rl_wrong, rl_repeated, rl_score = hybrid_player.evaluate(
        test_words, num_games=2000, verbose=True, use_rl=True
    )

    print("\n" + "="*60)
    print("FINAL RL-HYBRID AGENT SCORE")
    print("="*60)
    print(f"{'Success Rate':<25} {rl_success:<10.3f}")
    print(f"{'Avg Wrong Guesses':<25} {rl_wrong:<10.2f}")
    print(f"{'Avg Repeated Guesses':<25} {rl_repeated:<10.2f}")
    print(f"{'Final Score':<25} {rl_score:<10.2f}")

    return hmm_solver, hybrid_player

# Run the complete solution
hmm_solver, hybrid_player = demo_hybrid_solution()

HYBRID HMM-RL SOLUTION
Loading corpus...
Loaded 49979 valid words
Calculating bigram probabilities...
Bigram probabilities calculated
Calculating trigram probabilities...
Trigram probabilities calculated

Training Enhanced HMM models with trigrams...
Training HMM for word length 11 with 5452 words
Training HMM for word length 6 with 3755 words
Training HMM for word length 9 with 6787 words
Training HMM for word length 16 with 698 words
Training HMM for word length 14 with 2019 words
Training HMM for word length 10 with 6465 words
Training HMM for word length 8 with 6348 words
Training HMM for word length 12 with 4292 words
Training HMM for word length 13 with 3094 words
Training HMM for word length 5 with 2340 words
Training HMM for word length 18 with 174 words
Training HMM for word length 4 with 1169 words
Training HMM for word length 3 with 388 words
Training HMM for word length 7 with 5111 words
Training HMM for word length 15 with 1226 words
Training HMM for word length 17 with 37