<a href="https://colab.research.google.com/github/Niharika-Saha/ML-hackathon-Group-1/blob/main/HMM_Hybrid(CFP%2BCountbased)__rl_Q_table_DQN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Step 1: Install required packages
!pip install hmmlearn
!pip install numpy
!pip install matplotlib
!pip install seaborn
# Step 2: Import libraries
import numpy as np
import string
from collections import defaultdict, Counter
import matplotlib.pyplot as plt
import seaborn as sns
from hmmlearn import hmm
import itertools

In [None]:
# Step 3: Load and preprocess the corpus
def load_and_preprocess_corpus(file_path):
    """
    Load the corpus and preprocess words
    - Convert to uppercase
    - Remove words with non-alphabet characters
    - Group by word length
    """
    with open(file_path, 'r') as f:
        words = [line.strip().upper() for line in f if line.strip()]

    # Filter only alphabetic words
    words = [word for word in words if word.isalpha()]

    # Group words by length
    words_by_length = defaultdict(list)
    for word in words:
        words_by_length[len(word)].append(word)

    print(f"Total words after preprocessing: {len(words)}")
    print(f"Word length distribution:")
    for length in sorted(words_by_length.keys()):
        print(f"  Length {length}: {len(words_by_length[length])} words")

    return words, words_by_length

# Load the corpus (upload your corpus.txt first)
words, words_by_length = load_and_preprocess_corpus('corpus.txt')

Total words after preprocessing: 49979
Word length distribution:
  Length 1: 46 words
  Length 2: 84 words
  Length 3: 388 words
  Length 4: 1169 words
  Length 5: 2340 words
  Length 6: 3755 words
  Length 7: 5111 words
  Length 8: 6348 words
  Length 9: 6787 words
  Length 10: 6465 words
  Length 11: 5452 words
  Length 12: 4292 words
  Length 13: 3094 words
  Length 14: 2019 words
  Length 15: 1226 words
  Length 16: 698 words
  Length 17: 375 words
  Length 18: 174 words
  Length 19: 88 words
  Length 20: 40 words
  Length 21: 16 words
  Length 22: 8 words
  Length 23: 3 words
  Length 24: 1 words


In [None]:
# Step 4: Analyze letter frequencies and patterns
def analyze_corpus(words_by_length):
    """
    Analyze letter frequencies and positional distributions
    """
    # Overall letter frequency
    all_letters = ''.join([''.join(words) for words in words_by_length.values()])
    letter_freq = Counter(all_letters)

    print("Overall letter frequency (top 10):")
    for letter, freq in letter_freq.most_common(10):
        print(f"  {letter}: {freq} ({freq/len(all_letters)*100:.2f}%)")

    # Positional frequency analysis for different lengths
    positional_freq = defaultdict(lambda: defaultdict(int))

    for length, word_list in words_by_length.items():
        for word in word_list:
            for pos, letter in enumerate(word):
                positional_freq[length][(pos, letter)] += 1

    return letter_freq, positional_freq

letter_freq, positional_freq = analyze_corpus(words_by_length)

Overall letter frequency (top 10):
  E: 49203 (10.37%)
  A: 42089 (8.87%)
  I: 42047 (8.86%)
  O: 35808 (7.54%)
  R: 33577 (7.07%)
  N: 33314 (7.02%)
  T: 32191 (6.78%)
  S: 29044 (6.12%)
  L: 27406 (5.77%)
  C: 21718 (4.58%)


In [None]:
# Step 5: Prepare HMM training data for each word length
def prepare_hmm_training_data(words_by_length, min_words_threshold=5):
    """
    Prepare training data for HMMs for each word length
    Convert words to numerical sequences for HMM training

    Parameters:
    - words_by_length: Dictionary of words grouped by length
    - min_words_threshold: Minimum number of words required to train HMM for a given length
    """
    # Create letter to index mapping (A=0, B=1, ..., Z=25)
    letters = string.ascii_uppercase
    letter_to_idx = {letter: idx for idx, letter in enumerate(letters)}

    training_data = {}

    # Print available word lengths for debugging
    available_lengths = sorted(words_by_length.keys())
    print(f"Available word lengths: {available_lengths}")
    print(f"Lengths 22, 23, 24 present: {22 in words_by_length}, {23 in words_by_length}, {24 in words_by_length}")

    for length, word_list in words_by_length.items():
        # Include all lengths including 22, 23, 24 if they have enough words
        if len(word_list) < min_words_threshold:
            print(f"Skipping length {length}: only {len(word_list)} words (need at least {min_words_threshold})")
            continue

        sequences = []
        for word in word_list:
            # Convert word to numerical sequence
            seq = [letter_to_idx[char] for char in word]
            sequences.append(seq)

        training_data[length] = np.array(sequences)

        print(f"Length {length}: {len(sequences)} sequences")

    # Specifically check and report on lengths 22, 23, 24
    for length in [22, 23, 24]:
        if length in training_data:
            print(f"‚úì INCLUDED - Length {length}: {len(training_data[length])} sequences")
        elif length in words_by_length:
            print(f"‚úó EXCLUDED - Length {length}: {len(words_by_length[length])} words (below threshold)")
        else:
            print(f"‚úó NOT FOUND - Length {length}: No words in corpus")

    return training_data, letter_to_idx

# Run with lower threshold to include more word lengths
training_data, letter_to_idx = prepare_hmm_training_data(words_by_length, min_words_threshold=1)

Available word lengths: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
Lengths 22, 23, 24 present: True, True, True
Length 11: 5452 sequences
Length 6: 3755 sequences
Length 9: 6787 sequences
Length 16: 698 sequences
Length 14: 2019 sequences
Length 10: 6465 sequences
Length 8: 6348 sequences
Length 12: 4292 sequences
Length 13: 3094 sequences
Length 5: 2340 sequences
Length 18: 174 sequences
Length 4: 1169 sequences
Length 3: 388 sequences
Length 7: 5111 sequences
Length 15: 1226 sequences
Length 17: 375 sequences
Length 22: 8 sequences
Length 19: 88 sequences
Length 2: 84 sequences
Length 1: 46 sequences
Length 20: 40 sequences
Length 21: 16 sequences
Length 23: 3 sequences
Length 24: 1 sequences
‚úì INCLUDED - Length 22: 8 sequences
‚úì INCLUDED - Length 23: 3 sequences
‚úì INCLUDED - Length 24: 1 sequences


In [None]:
# Step 6: HYBRID CFP + Count-based HMM Training
class HybridHangmanModel:
    """
    Combines the best of CFP (accuracy) and HMM (generalization)
    """
    def __init__(self, corpus_words, training_data, letter_to_idx, alpha=0.3):
        self.cfp = CandidateFilterOracle(corpus_words)
        self.count_hmms = train_count_hmm_models(training_data)
        self.letter_to_idx = letter_to_idx
        self.letters = string.ascii_uppercase
        self.alpha = alpha  # Weight for HMM (0-1), CFP weight = 1-alpha

        print(f"üéØ Hybrid Model Initialized: CFP weight={1-alpha:.1f}, HMM weight={alpha:.1f}")

    def get_letter_probabilities(self, masked_word, guessed_letters):
        """
        Combine CFP and Count-based HMM probabilities
        """
        # Get CFP probabilities (high accuracy)
        cfp_probs = self.cfp.get_letter_probabilities(masked_word, guessed_letters)

        # Get HMM probabilities (good generalization)
        hmm_probs = self._get_hmm_probabilities(masked_word, guessed_letters)

        # If CFP has no candidates, rely more on HMM
        if not cfp_probs or max(cfp_probs.values()) == 0:
            return hmm_probs

        # If HMM failed, rely on CFP
        if not hmm_probs:
            return cfp_probs

        # Combine probabilities
        hybrid_probs = {}
        for letter in self.letters:
            if letter not in guessed_letters:
                cfp_prob = cfp_probs.get(letter, 0)
                hmm_prob = hmm_probs.get(letter, 0)

                # Dynamic weighting: if CFP is confident, trust it more
                cfp_confidence = max(cfp_probs.values())
                dynamic_alpha = self.alpha * (1 - cfp_confidence)  # Less HMM weight if CFP is confident

                hybrid_prob = (1 - dynamic_alpha) * cfp_prob + dynamic_alpha * hmm_prob
                hybrid_probs[letter] = hybrid_prob

        # Normalize
        total = sum(hybrid_probs.values())
        if total > 0:
            hybrid_probs = {letter: prob/total for letter, prob in hybrid_probs.items()}

        return hybrid_probs

    def _get_hmm_probabilities(self, masked_word, guessed_letters):
        """Get probabilities from count-based HMM"""
        word_length = len(masked_word)

        if word_length not in self.count_hmms:
            return {}

        model = self.count_hmms[word_length]

        # Prepare observations
        observations = []
        for char in masked_word:
            if char == '_':
                observations.append(-1)  # Missing
            else:
                observations.append(self.letter_to_idx[char])

        try:
            # Get posteriors using count-based HMM
            posteriors = model.forward_backward(observations)

            # Aggregate probabilities for blank positions
            letter_probs = np.zeros(26)
            for pos, char in enumerate(masked_word):
                if char == '_':
                    for letter_idx in range(26):
                        letter = self.idx_to_letter(letter_idx)
                        if letter not in guessed_letters:
                            letter_probs[letter_idx] += posteriors[pos, letter_idx]

            # Normalize
            if np.sum(letter_probs) > 0:
                letter_probs /= np.sum(letter_probs)

            return {self.idx_to_letter(i): letter_probs[i]
                    for i in range(26) if self.idx_to_letter(i) not in guessed_letters}

        except Exception as e:
            return {}

    def idx_to_letter(self, idx):
        """Convert index to letter"""
        return chr(65 + idx)

# COUNT-BASED HMM IMPLEMENTATION (from previous)
class CountBasedHMM:
    def __init__(self):
        self.transition_probs = None
        self.emission_probs = None
        self.initial_probs = None

    def train(self, sequences, alpha=0.1):
        n_states = 26

        # Initialize counts with smoothing
        transition_counts = np.ones((n_states, n_states)) * alpha
        emission_counts = np.ones((n_states, n_states)) * alpha
        initial_counts = np.ones(n_states) * alpha

        # Count transitions and emissions
        for seq in sequences:
            if len(seq) > 0:
                initial_counts[seq[0]] += 1

            for i in range(len(seq)):
                emission_counts[seq[i], seq[i]] += 1
                if i < len(seq) - 1:
                    transition_counts[seq[i], seq[i+1]] += 1

        # Convert to probabilities
        self.initial_probs = initial_counts / np.sum(initial_counts)
        self.transition_probs = transition_counts / np.sum(transition_counts, axis=1, keepdims=True)
        self.emission_probs = emission_counts / np.sum(emission_counts, axis=1, keepdims=True)

    def forward_backward(self, observations):
        n_positions = len(observations)
        n_states = 26

        # Forward pass
        forward = np.zeros((n_positions, n_states))
        for state in range(n_states):
            if observations[0] == -1:
                forward[0, state] = self.initial_probs[state]
            else:
                forward[0, state] = self.initial_probs[state] * self.emission_probs[state, observations[0]]

        for t in range(1, n_positions):
            for j in range(n_states):
                sum_prob = 0
                for i in range(n_states):
                    sum_prob += forward[t-1, i] * self.transition_probs[i, j]

                if observations[t] == -1:
                    forward[t, j] = sum_prob
                else:
                    forward[t, j] = sum_prob * self.emission_probs[j, observations[t]]

        # Backward pass
        backward = np.ones((n_positions, n_states))
        for t in range(n_positions-2, -1, -1):
            for i in range(n_states):
                sum_prob = 0
                for j in range(n_states):
                    if observations[t+1] == -1:
                        emission_prob = 1.0
                    else:
                        emission_prob = self.emission_probs[j, observations[t+1]]
                    sum_prob += self.transition_probs[i, j] * emission_prob * backward[t+1, j]
                backward[t, i] = sum_prob

        # Combine
        posteriors = forward * backward
        posteriors = posteriors / np.sum(posteriors, axis=1, keepdims=True)
        return posteriors


def train_count_hmm_models(training_data):
    """Train count-based HMMs for all word lengths"""
    count_hmms = {}

    print("üöÄ Training Count-based HMMs for Hybrid Model...")
    for length, sequences in training_data.items():
        if len(sequences) < 1:
            continue

        print(f"   üìä Length {length}: {len(sequences)} sequences")
        model = CountBasedHMM()
        model.train(sequences, alpha=0.1)
        count_hmms[length] = model

    print(f"‚úÖ Trained {len(count_hmms)} count-based HMMs")
    return count_hmms

# CFP ORACLE (from previous)
class CandidateFilterOracle:
    def __init__(self, corpus_words):
        self.by_len = defaultdict(list)
        for word in corpus_words:
            w = word.strip().upper()
            if w and w.isalpha():
                self.by_len[len(w)].append(w)

    @staticmethod
    def matches_mask(word, mask, wrong_letters):
        if len(word) != len(mask):
            return False
        if any(letter in word for letter in wrong_letters):
            return False
        for wc, mc in zip(word, mask):
            if mc != '_' and wc != mc:
                return False
        return True

    def get_letter_probabilities(self, masked_word, guessed_letters):
        word_length = len(masked_word)
        revealed_letters = set(char for char in masked_word if char != '_')
        wrong_letters = guessed_letters - revealed_letters

        # Get candidate words
        candidates = [
            word for word in self.by_len.get(word_length, [])
            if self.matches_mask(word, masked_word, wrong_letters)
        ]

        if not candidates:
            return self._fallback_probs(guessed_letters)

        # Count letter frequencies in blank positions
        blank_positions = [i for i, char in enumerate(masked_word) if char == '_']
        letter_counts = Counter()

        for word in candidates:
            for pos in blank_positions:
                letter_counts[word[pos]] += 1

        # Convert to probabilities
        total_count = sum(letter_counts.values())
        prob_dict = {}
        for letter in string.ascii_uppercase:
            if letter not in guessed_letters:
                prob_dict[letter] = letter_counts[letter] / total_count if total_count > 0 else 0

        # Normalize
        total_prob = sum(prob_dict.values())
        if total_prob > 0:
            prob_dict = {letter: prob/total_prob for letter, prob in prob_dict.items()}
        else:
            prob_dict = self._fallback_probs(guessed_letters)

        return prob_dict

    def _fallback_probs(self, guessed_letters):
        available = [l for l in string.ascii_uppercase if l not in guessed_letters]
        prob = 1.0 / len(available) if available else 0
        return {letter: prob for letter in available}

# üöÄ INITIALIZE HYBRID MODEL
print("üéØ Initializing Hybrid CFP + Count-based HMM Model...")
hybrid_model = HybridHangmanModel(words, training_data, letter_to_idx, alpha=0.3)

print("\n" + "="*60)
print("üèÜ HYBRID MODEL READY!")
print("="*60)
print("‚úÖ CFP Oracle: Word-based candidate filtering")
print("‚úÖ Count-based HMM: Statistical pattern learning")
print("‚úÖ Dynamic weighting: Adaptive confidence-based blending")
print("‚úÖ All word lengths supported")
print(f"‚úÖ Initial blend: {70}% CFP + {30}% HMM")

# Update your main HMM reference to use the hybrid model
hangman_hmm = hybrid_model

print("\nüéØ Ready for evaluation with 2000 games!")

üéØ Initializing Hybrid CFP + Count-based HMM Model...
üöÄ Training Count-based HMMs for Hybrid Model...
   üìä Length 11: 5452 sequences
   üìä Length 6: 3755 sequences
   üìä Length 9: 6787 sequences
   üìä Length 16: 698 sequences
   üìä Length 14: 2019 sequences
   üìä Length 10: 6465 sequences
   üìä Length 8: 6348 sequences
   üìä Length 12: 4292 sequences
   üìä Length 13: 3094 sequences
   üìä Length 5: 2340 sequences
   üìä Length 18: 174 sequences
   üìä Length 4: 1169 sequences
   üìä Length 3: 388 sequences
   üìä Length 7: 5111 sequences
   üìä Length 15: 1226 sequences
   üìä Length 17: 375 sequences
   üìä Length 22: 8 sequences
   üìä Length 19: 88 sequences
   üìä Length 2: 84 sequences
   üìä Length 1: 46 sequences
   üìä Length 20: 40 sequences
   üìä Length 21: 16 sequences
   üìä Length 23: 3 sequences
   üìä Length 24: 1 sequences
‚úÖ Trained 24 count-based HMMs
üéØ Hybrid Model Initialized: CFP weight=0.7, HMM weight=0.3

üèÜ HYBRID MO

In [None]:
# Step 9: Reinforcement Learning Agent for Hangman
import numpy as np
import random
from collections import defaultdict, deque
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

class HangmanEnvironment:
    """
    Hangman Game Environment for RL Agent
    """
    def __init__(self, target_word, max_wrong_guesses=6):
        self.target_word = target_word.upper()
        self.max_wrong_guesses = max_wrong_guesses
        self.reset()

    def reset(self):
        """Reset the environment to initial state"""
        self.masked_word = ['_'] * len(self.target_word)
        self.guessed_letters = set()
        self.wrong_guesses = 0
        self.revealed_letters = set()
        self.done = False
        self.won = False

        return self.get_state()

    def get_state(self):
        """Get current state representation"""
        return {
            'masked_word': ''.join(self.masked_word),
            'guessed_letters': self.guessed_letters.copy(),
            'wrong_guesses': self.wrong_guesses,
            'max_wrong_guesses': self.max_wrong_guesses,
            'revealed_letters': self.revealed_letters.copy(),
            'word_length': len(self.target_word)
        }

    def step(self, action_letter, hmm_probabilities=None):
        """
        Execute an action (guess a letter)
        Returns: next_state, reward, done, info
        """
        if self.done:
            raise Exception("Game has already ended")

        letter = action_letter.upper()
        reward = 0
        info = {}

        # Check for repeated guess
        if letter in self.guessed_letters:
            reward = -2  # Penalty for repeated guess
            info['repeated'] = True
            info['correct'] = False
            return self.get_state(), reward, self.done, info

        # Add to guessed letters
        self.guessed_letters.add(letter)

        # Check if letter is in target word
        if letter in self.target_word:
            # Update masked word
            positions_found = []
            for i, char in enumerate(self.target_word):
                if char == letter:
                    self.masked_word[i] = letter
                    positions_found.append(i)

            self.revealed_letters.add(letter)

            # Reward for correct guess
            base_reward = 1.0
            # Bonus for revealing multiple positions
            position_bonus = 0.5 * len(positions_found)
            # Bonus if this completes the word
            completion_bonus = 10.0 if '_' not in self.masked_word else 0

            reward = base_reward + position_bonus + completion_bonus
            info['correct'] = True
            info['positions_found'] = positions_found
            info['completion'] = ('_' not in self.masked_word)

        else:
            # Wrong guess penalty
            self.wrong_guesses += 1
            reward = -1.0
            info['correct'] = False
            info['wrong_guesses_remaining'] = self.max_wrong_guesses - self.wrong_guesses

        # Check game termination
        if '_' not in self.masked_word:
            self.done = True
            self.won = True
            reward += 20.0  # Big bonus for winning
            info['termination'] = 'win'
        elif self.wrong_guesses >= self.max_wrong_guesses:
            self.done = True
            self.won = False
            reward -= 10.0  # Big penalty for losing
            info['termination'] = 'loss'
        else:
            info['termination'] = 'continue'

        # Add HMM confidence bonus/penalty if probabilities are provided
        if hmm_probabilities is not None and letter in hmm_probabilities:
            hmm_confidence = hmm_probabilities[letter]
            confidence_bonus = 0.5 * hmm_confidence
            reward += confidence_bonus
            info['hmm_confidence'] = hmm_confidence

        return self.get_state(), reward, self.done, info

    def render(self):
        """Display current game state"""
        print(f"Word: {' '.join(self.masked_word)}")
        print(f"Guessed: {''.join(sorted(self.guessed_letters))}")
        print(f"Wrong guesses: {self.wrong_guesses}/{self.max_wrong_guesses}")
        print(f"Status: {'WON' if self.won else 'LOST' if self.done else 'IN PROGRESS'}")

class StateEncoder:
    """
    Encodes game state into numerical features for neural network
    """
    def __init__(self, word_length=15):
        self.word_length = word_length
        self.letters = string.ascii_uppercase
        self.letter_to_idx = {letter: idx for idx, letter in enumerate(self.letters)}

    def encode(self, state, hmm_probs=None):
        """
        Encode state into feature vector
        Features:
        - Masked word one-hot encoding (word_length * 27)
        - Guessed letters (26)
        - Wrong guesses count (normalized)
        - HMM probabilities (26)
        - Word length encoding
        """
        features = []

        # 1. Masked word encoding (one-hot for each position)
        masked_encoding = []
        for char in state['masked_word']:
            if char == '_':
                # Unknown position
                pos_encoding = [0] * 26 + [1]  # 26 letters + 1 for unknown
            else:
                # Known letter
                pos_encoding = [0] * 26
                pos_encoding[self.letter_to_idx[char]] = 1
                pos_encoding.append(0)  # Not unknown
            masked_encoding.extend(pos_encoding)

        # Pad or truncate to fixed length
        target_length = self.word_length * 27
        if len(masked_encoding) < target_length:
            masked_encoding.extend([0] * (target_length - len(masked_encoding)))
        else:
            masked_encoding = masked_encoding[:target_length]

        features.extend(masked_encoding)

        # 2. Guessed letters (one-hot)
        guessed_encoding = [0] * 26
        for letter in state['guessed_letters']:
            if letter in self.letter_to_idx:
                guessed_encoding[self.letter_to_idx[letter]] = 1
        features.extend(guessed_encoding)

        # 3. Wrong guesses (normalized)
        wrong_guesses_norm = state['wrong_guesses'] / state['max_wrong_guesses']
        features.append(wrong_guesses_norm)

        # 4. HMM probabilities
        if hmm_probs is not None:
            hmm_encoding = [hmm_probs.get(letter, 0.0) for letter in self.letters]
        else:
            hmm_encoding = [0.0] * 26
        features.extend(hmm_encoding)

        # 5. Word length (normalized)
        word_length_norm = len(state['masked_word']) / 20.0  # Assuming max length 20
        features.append(word_length_norm)

        return np.array(features, dtype=np.float32)

class DQN(nn.Module):
    """
    Deep Q-Network for Hangman RL Agent
    """
    def __init__(self, input_dim, output_dim, hidden_dims=[512, 256, 128]):
        super(DQN, self).__init__()

        layers = []
        prev_dim = input_dim

        for hidden_dim in hidden_dims:
            layers.append(nn.Linear(prev_dim, hidden_dim))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(0.2))
            prev_dim = hidden_dim

        layers.append(nn.Linear(prev_dim, output_dim))

        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)

class HangmanRLAgent:
    """
    Reinforcement Learning Agent for Hangman
    """
    def __init__(self, state_encoder, hmm_model, learning_rate=0.001, gamma=0.99):
        self.state_encoder = state_encoder
        self.hmm_model = hmm_model
        self.gamma = gamma

        # Calculate input dimension
        self.input_dim = (state_encoder.word_length * 27) + 26 + 1 + 26 + 1
        self.output_dim = 26  # One output for each possible letter

        # Neural networks
        self.policy_net = DQN(self.input_dim, self.output_dim)
        self.target_net = DQN(self.input_dim, self.output_dim)
        self.optimizer = optim.Adam(self.policy_net.parameters(), lr=learning_rate)

        # RL parameters
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.batch_size = 64
        self.memory = deque(maxlen=10000)
        self.update_target_every = 1000
        self.steps_done = 0

        # Initialize target network
        self.update_target_network()

    def update_target_network(self):
        """Update target network with policy network weights"""
        self.target_net.load_state_dict(self.policy_net.state_dict())

    def get_action(self, state, training=True):
        """
        Choose action using epsilon-greedy policy
        """
        available_letters = [l for l in string.ascii_uppercase if l not in state['guessed_letters']]

        if not available_letters:
            return None  # No available actions

        # Get HMM probabilities for available letters
        hmm_probs = self.hmm_model.get_letter_probabilities(
            state['masked_word'], state['guessed_letters']
        )

        # Epsilon-greedy exploration
        if training and random.random() < self.epsilon:
            # Explore: choose random available letter
            return random.choice(available_letters)
        else:
            # Exploit: choose best action according to Q-network
            state_tensor = self._state_to_tensor(state, hmm_probs)

            with torch.no_grad():
                q_values = self.policy_net(state_tensor)

            # Convert Q-values to action probabilities, masking unavailable letters
            action_probs = {}
            for letter in string.ascii_uppercase:
                if letter in available_letters:
                    action_idx = self.state_encoder.letter_to_idx[letter]
                    action_probs[letter] = q_values[0, action_idx].item()
                else:
                    action_probs[letter] = -float('inf')

            # Choose action with highest Q-value
            return max(action_probs, key=action_probs.get)

    def _state_to_tensor(self, state, hmm_probs):
        """Convert state to tensor for neural network"""
        state_features = self.state_encoder.encode(state, hmm_probs)
        return torch.FloatTensor(state_features).unsqueeze(0)

    def remember(self, state, action, reward, next_state, done, hmm_probs):
        """Store experience in replay memory"""
        state_features = self.state_encoder.encode(state, hmm_probs)

        # Get next state HMM probabilities
        next_hmm_probs = self.hmm_model.get_letter_probabilities(
            next_state['masked_word'], next_state['guessed_letters']
        )
        next_state_features = self.state_encoder.encode(next_state, next_hmm_probs)

        action_idx = self.state_encoder.letter_to_idx[action]

        self.memory.append((
            state_features,
            action_idx,
            reward,
            next_state_features,
            done
        ))

    def replay(self):
        """Train on batch from replay memory"""
        if len(self.memory) < self.batch_size:
            return

        batch = random.sample(self.memory, self.batch_size)
        states, actions, rewards, next_states, dones = zip(*batch)

        states = torch.FloatTensor(np.array(states))
        actions = torch.LongTensor(actions).unsqueeze(1)
        rewards = torch.FloatTensor(rewards)
        next_states = torch.FloatTensor(np.array(next_states))
        dones = torch.BoolTensor(dones)

        # Current Q values
        current_q_values = self.policy_net(states).gather(1, actions).squeeze()

        # Next Q values from target network
        with torch.no_grad():
            next_q_values = self.target_net(next_states).max(1)[0]
            next_q_values[dones] = 0.0  # No future rewards for terminal states

        # Target Q values
        target_q_values = rewards + self.gamma * next_q_values

        # Compute loss
        loss = F.mse_loss(current_q_values, target_q_values)

        # Optimize
        self.optimizer.zero_grad()
        loss.backward()
        # Gradient clipping
        torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0)
        self.optimizer.step()

        # Update exploration rate
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

        # Update target network
        self.steps_done += 1
        if self.steps_done % self.update_target_every == 0:
            self.update_target_network()

        return loss.item()

class RLTrainer:
    """
    Trainer for RL Agent
    """
    def __init__(self, agent, words_dataset):
        self.agent = agent
        self.words_dataset = words_dataset

    def train(self, episodes=10000, eval_every=1000, num_eval_games=100):
        """Train the RL agent"""
        print("üöÄ Starting RL Training...")
        print(f"Episodes: {episodes}, Evaluation every: {eval_every} episodes")

        training_stats = {
            'episode_rewards': [],
            'episode_lengths': [],
            'success_rates': [],
            'epsilon_values': []
        }

        for episode in range(episodes):
            # Sample a random word
            target_word = random.choice(self.words_dataset)
            env = HangmanEnvironment(target_word)

            state = env.reset()
            total_reward = 0
            steps = 0

            while not env.done:
                # Get HMM probabilities for current state
                hmm_probs = self.agent.hmm_model.get_letter_probabilities(
                    state['masked_word'], state['guessed_letters']
                )

                # Choose action
                action = self.agent.get_action(state, training=True)

                if action is None:
                    break  # No available actions

                # Take action
                next_state, reward, done, info = env.step(action, hmm_probs)

                # Store experience
                self.agent.remember(state, action, reward, next_state, done, hmm_probs)

                # Train
                loss = self.agent.replay()

                state = next_state
                total_reward += reward
                steps += 1

            # Log episode statistics
            training_stats['episode_rewards'].append(total_reward)
            training_stats['episode_lengths'].append(steps)
            training_stats['epsilon_values'].append(self.agent.epsilon)

            # Print progress
            if (episode + 1) % 100 == 0:
                avg_reward = np.mean(training_stats['episode_rewards'][-100:])
                avg_length = np.mean(training_stats['episode_lengths'][-100:])
                print(f"Episode {episode + 1}: Avg Reward = {avg_reward:.2f}, "
                      f"Avg Length = {avg_length:.2f}, Epsilon = {self.agent.epsilon:.3f}")

            # Evaluate
            if (episode + 1) % eval_every == 0:
                success_rate = self.evaluate(num_eval_games, training=False)
                training_stats['success_rates'].append(success_rate)
                print(f"üéØ Evaluation after {episode + 1} episodes: Success Rate = {success_rate:.3f}")

        return training_stats

    def evaluate(self, num_games=100, training=False):
        """Evaluate the trained agent"""
        wins = 0

        for i in range(num_games):
            target_word = random.choice(self.words_dataset)
            env = HangmanEnvironment(target_word)
            state = env.reset()

            while not env.done:
                action = self.agent.get_action(state, training=training)

                if action is None:
                    break

                # Get HMM probabilities for reward calculation
                hmm_probs = self.agent.hmm_model.get_letter_probabilities(
                    state['masked_word'], state['guessed_letters']
                )

                next_state, reward, done, info = env.step(action, hmm_probs)
                state = next_state

            if env.won:
                wins += 1

        return wins / num_games

# Step 10: Initialize and Train RL Agent
print("üéØ Initializing RL Agent...")

# Create state encoder
state_encoder = StateEncoder(word_length=15)

# Initialize RL agent
rl_agent = HangmanRLAgent(state_encoder, hangman_hmm, learning_rate=0.001, gamma=0.99)

print("‚úÖ RL Agent Initialized!")
print(f"   Input dimension: {rl_agent.input_dim}")
print(f"   Output dimension: {rl_agent.output_dim}")
print(f"   Initial epsilon: {rl_agent.epsilon}")

# Train RL agent
print("\nüöÄ Starting RL Training...")
trainer = RLTrainer(rl_agent, words[:5000])  # Use first 5000 words for training

# Train for a reasonable number of episodes
training_stats = trainer.train(episodes=5000, eval_every=1000, num_eval_games=200)

print("\n‚úÖ RL Training Complete!")

# Step 11: Final Evaluation with RL Agent
def rl_official_evaluation(rl_agent, test_words, num_games=2000, max_wrong_guesses=6):
    """Official evaluation using the trained RL agent"""
    print("üéØ RL OFFICIAL EVALUATION - 2000 HANGMAN GAMES")
    print("=" * 60)

    results = {
        'games_won': 0,
        'games_lost': 0,
        'total_wrong_guesses': 0,
        'total_repeated_guesses': 0,
        'games_details': [],
        'score_breakdown': defaultdict(int)
    }

    for game_id in range(min(num_games, len(test_words))):
        if game_id % 500 == 0:
            print(f"üéÆ Played {game_id}/{num_games} games...")

        target_word = test_words[game_id]
        env = HangmanEnvironment(target_word, max_wrong_guesses)
        state = env.reset()

        while not env.done:
            action = rl_agent.get_action(state, training=False)

            if action is None:
                break

            # Get HMM probabilities for reward (not used in evaluation, just for consistency)
            hmm_probs = rl_agent.hmm_model.get_letter_probabilities(
                state['masked_word'], state['guessed_letters']
            )

            next_state, reward, done, info = env.step(action, hmm_probs)
            state = next_state

        # Record results
        game_result = {
            'game_id': game_id + 1,
            'target_word': target_word,
            'won': env.won,
            'wrong_guesses': env.wrong_guesses,
            'repeated_guesses': len([l for l in env.guessed_letters if l not in env.revealed_letters]),
            'total_guesses': len(env.guessed_letters),
            'final_word': ''.join(env.masked_word)
        }

        if env.won:
            results['games_won'] += 1
        else:
            results['games_lost'] += 1

        results['total_wrong_guesses'] += env.wrong_guesses
        results['total_repeated_guesses'] += game_result['repeated_guesses']
        results['games_details'].append(game_result)

    # Calculate official score
    official_score = calculate_official_score(results, num_games)

    # Print results
    print_official_results(results, official_score, num_games)

    return results, official_score

# Run RL evaluation
print("\n" + "="*70)
print("üèÜ RL AGENT FINAL EVALUATION")
print("="*70)

rl_results, rl_official_score = rl_official_evaluation(rl_agent, test_words_eval, num_games=2000)

# Save RL results
def save_rl_results(results, score, filename="rl_evaluation_results.txt"):
    """Save RL evaluation results"""
    with open(filename, 'w') as f:
        f.write("RL HANGMAN AGENT EVALUATION RESULTS\\n")
        f.write("=" * 50 + "\\n\\n")

        f.write(f"FINAL SCORE: {score:.2f}\\n\\n")

        f.write("SUMMARY STATISTICS:\\n")
        f.write(f"Games Played: {len(results['games_details'])}\\n")
        f.write(f"Games Won: {results['games_won']}\\n")
        f.write(f"Success Rate: {results['games_won']/len(results['games_details']):.3f}\\n")
        f.write(f"Total Wrong Guesses: {results['total_wrong_guesses']}\\n")
        f.write(f"Total Repeated Guesses: {results['total_repeated_guesses']}\\n\\n")

        f.write("FIRST 10 GAME RESULTS:\\n")
        for game in results['games_details'][:10]:
            status = "WON" if game['won'] else "LOST"
            f.write(f"Game {game['game_id']}: {game['target_word']} -> {status} "
                   f"(Wrong: {game['wrong_guesses']}, Repeated: {game['repeated_guesses']})\\n")

    print(f"üìÑ RL results saved to {filename}")

save_rl_results(rl_results, rl_official_score)

# Compare with original HMM performance
print("\n" + "="*70)
print("üìä PERFORMANCE COMPARISON: HMM vs RL")
print("="*70)
print(f"HMM Only - Final Score: {official_score:.2f}")
print(f"RL Agent - Final Score: {rl_official_score:.2f}")
print(f"Improvement: {rl_official_score - official_score:+.2f}")

# Final assessment
assess_performance(rl_official_score)

print(f"\\nüéâ RL EVALUATION COMPLETE!")
print(f"üìä Check 'rl_evaluation_results.txt' for detailed results")

üéØ Initializing RL Agent...
‚úÖ RL Agent Initialized!
   Input dimension: 459
   Output dimension: 26
   Initial epsilon: 1.0

üöÄ Starting RL Training...
üöÄ Starting RL Training...
Episodes: 5000, Evaluation every: 1000 episodes
Episode 100: Avg Reward = -9.05, Avg Length = 9.47, Epsilon = 0.012
Episode 200: Avg Reward = -7.52, Avg Length = 10.33, Epsilon = 0.010
Episode 300: Avg Reward = -5.57, Avg Length = 10.35, Epsilon = 0.010
Episode 400: Avg Reward = -5.13, Avg Length = 10.63, Epsilon = 0.010
Episode 500: Avg Reward = -3.61, Avg Length = 11.02, Epsilon = 0.010
Episode 600: Avg Reward = -3.47, Avg Length = 11.02, Epsilon = 0.010
Episode 700: Avg Reward = -1.71, Avg Length = 10.85, Epsilon = 0.010
Episode 800: Avg Reward = 2.58, Avg Length = 10.83, Epsilon = 0.010
Episode 900: Avg Reward = 2.70, Avg Length = 11.04, Epsilon = 0.010
Episode 1000: Avg Reward = -4.30, Avg Length = 10.95, Epsilon = 0.010
üéØ Evaluation after 1000 episodes: Success Rate = 0.220
Episode 1100: Avg R

In [None]:
# Step 8: Test the HMM with example game states
def test_hmm_predictions():
    """Test HMM predictions with various game states"""

    test_cases = [
        # (masked_word, guessed_letters, description)
        (['_', '_', '_', '_', '_'], set(), "Empty 5-letter word"),
        (['A', '_', '_', '_', '_'], set('A'), "5-letter word starting with A"),
        (['_', '_', '_', 'E', '_'], set('E'), "5-letter word with E at position 3"),
        (['_', '_', '_', '_', '_', '_'], set(), "Empty 6-letter word"),
    ]

    for masked_word, guessed_letters, description in test_cases:
        print(f"\n{description}:")
        print(f"Masked word: {' '.join(masked_word)}")
        print(f"Guessed letters: {''.join(sorted(guessed_letters))}")

        probs = hangman_hmm.get_letter_probabilities(masked_word, guessed_letters)

        # Show top 5 predictions
        top_letters = sorted(probs.items(), key=lambda x: x[1], reverse=True)[:5]
        print("Top 5 letter predictions:")
        for letter, prob in top_letters:
            print(f"  {letter}: {prob:.4f}")

test_hmm_predictions()


Empty 5-letter word:
Masked word: _ _ _ _ _
Guessed letters: 
Top 5 letter predictions:
  A: 0.1073
  E: 0.0981
  O: 0.0694
  R: 0.0676
  I: 0.0634

5-letter word starting with A:
Masked word: A _ _ _ _
Guessed letters: A
Top 5 letter predictions:
  E: 0.1023
  I: 0.0946
  O: 0.0846
  N: 0.0731
  S: 0.0701

5-letter word with E at position 3:
Masked word: _ _ _ E _
Guessed letters: E
Top 5 letter predictions:
  R: 0.1329
  A: 0.0755
  N: 0.0711
  L: 0.0675
  T: 0.0672

Empty 6-letter word:
Masked word: _ _ _ _ _ _
Guessed letters: 
Top 5 letter predictions:
  E: 0.1116
  A: 0.1021
  R: 0.0712
  I: 0.0709
  N: 0.0615


In [None]:
# Step 7: OFFICIAL EVALUATION - 2000 Games with Official Scoring Formula
def official_evaluation(hangman_hmm, test_words, num_games=2000, max_wrong_guesses=6):
    """
    Official evaluation as per problem statement
    Plays 2000 games and calculates the official score
    """
    import numpy as np
    from collections import defaultdict

    print("üéØ OFFICIAL EVALUATION - 2000 HANGMAN GAMES")
    print("=" * 60)
    print(f"Playing {num_games} games with {max_wrong_guesses} wrong guesses allowed per game")
    print()

    # Initialize metrics
    results = {
        'games_won': 0,
        'games_lost': 0,
        'total_wrong_guesses': 0,
        'total_repeated_guesses': 0,
        'games_details': [],
        'score_breakdown': defaultdict(int)
    }

    # Play specified number of games
    for game_id in range(min(num_games, len(test_words))):
        if game_id % 500 == 0:
            print(f"üéÆ Played {game_id}/{num_games} games...")

        target_word = test_words[game_id]
        game_result = play_single_game(hangman_hmm, target_word, max_wrong_guesses, game_id + 1)

        # Aggregate results
        if game_result['won']:
            results['games_won'] += 1
        else:
            results['games_lost'] += 1

        results['total_wrong_guesses'] += game_result['wrong_guesses']
        results['total_repeated_guesses'] += game_result['repeated_guesses']
        results['games_details'].append(game_result)

    # Calculate official score
    official_score = calculate_official_score(results, num_games)

    # Print comprehensive results
    print_official_results(results, official_score, num_games)

    return results, official_score

def play_single_game(hangman_hmm, target_word, max_wrong_guesses, game_id):
    """
    Play a single Hangman game and return detailed results
    """
    masked_word = ['_'] * len(target_word)
    guessed_letters = set()
    wrong_guesses = 0
    repeated_guesses = 0
    game_log = []

    # Game loop
    while wrong_guesses < max_wrong_guesses and '_' in masked_word:
        # Get letter probabilities from HMM
        probs = hangman_hmm.get_letter_probabilities(masked_word, guessed_letters)

        if not probs:
            # No probabilities available, use fallback
            available_letters = [chr(i) for i in range(65, 91) if chr(i) not in guessed_letters]
            if not available_letters:
                break
            next_letter = available_letters[0]
        else:
            # Choose letter with highest probability
            next_letter = max(probs.items(), key=lambda x: x[1])[0]

        # Check for repeated guess
        if next_letter in guessed_letters:
            repeated_guesses += 1
            game_log.append(f"Repeated guess: {next_letter}")
            continue

        # Add to guessed letters
        guessed_letters.add(next_letter)

        # Check if letter is in target word
        if next_letter in target_word:
            # Update masked word
            for i, char in enumerate(target_word):
                if char == next_letter:
                    masked_word[i] = next_letter
            game_log.append(f"Correct: {next_letter} -> {' '.join(masked_word)}")
        else:
            wrong_guesses += 1
            game_log.append(f"Wrong: {next_letter} ({wrong_guesses}/{max_wrong_guesses} wrong)")

    # Determine game outcome
    won = '_' not in masked_word
    actual_word = ''.join(target_word)
    guessed_word = ''.join(masked_word)

    return {
        'game_id': game_id,
        'target_word': actual_word,
        'won': won,
        'wrong_guesses': wrong_guesses,
        'repeated_guesses': repeated_guesses,
        'total_guesses': len(guessed_letters),
        'final_word': guessed_word,
        'game_log': game_log
    }

def calculate_official_score(results, num_games):
    """
    Calculate official score using the formula:
    Final Score = (Success Rate * 2000) - (Total Wrong Guesses * 5) - (Total Repeated Guesses * 2)
    """
    success_rate = results['games_won'] / num_games
    total_wrong_guesses = results['total_wrong_guesses']
    total_repeated_guesses = results['total_repeated_guesses']

    score = (success_rate * 2000) - (total_wrong_guesses * 5) - (total_repeated_guesses * 2)

    # Store breakdown for analysis
    results['score_breakdown']['success_component'] = success_rate * 2000
    results['score_breakdown']['wrong_penalty'] = total_wrong_guesses * 5
    results['score_breakdown']['repeated_penalty'] = total_repeated_guesses * 2
    results['score_breakdown']['success_rate'] = success_rate
    results['score_breakdown']['total_games'] = num_games

    return score

def print_official_results(results, official_score, num_games):
    """
    Print comprehensive evaluation results
    """
    print("\n" + "=" * 70)
    print("üèÜ OFFICIAL EVALUATION RESULTS")
    print("=" * 70)

    # Basic statistics
    print(f"üìä GAME STATISTICS:")
    print(f"   Total Games Played: {num_games}")
    print(f"   Games Won: {results['games_won']}")
    print(f"   Games Lost: {results['games_lost']}")
    print(f"   Success Rate: {results['games_won']/num_games:.3f} ({results['games_won']}/{num_games})")

    print(f"\nüéØ PERFORMANCE METRICS:")
    print(f"   Total Wrong Guesses: {results['total_wrong_guesses']}")
    print(f"   Total Repeated Guesses: {results['total_repeated_guesses']}")
    print(f"   Average Wrong Guesses per Game: {results['total_wrong_guesses']/num_games:.2f}")
    print(f"   Average Repeated Guesses per Game: {results['total_repeated_guesses']/num_games:.2f}")

    print(f"\nüí∞ SCORE BREAKDOWN:")
    print(f"   Success Component: {results['score_breakdown']['success_component']:.2f}")
    print(f"   Wrong Guesses Penalty: -{results['score_breakdown']['wrong_penalty']:.2f}")
    print(f"   Repeated Guesses Penalty: -{results['score_breakdown']['repeated_penalty']:.2f}")
    print(f"   FINAL SCORE: {official_score:.2f}")

    # Additional analysis
    print(f"\nüìà ADDITIONAL INSIGHTS:")

    # Word length analysis
    won_by_length = defaultdict(int)
    lost_by_length = defaultdict(int)

    for game in results['games_details']:
        length = len(game['target_word'])
        if game['won']:
            won_by_length[length] += 1
        else:
            lost_by_length[length] += 1

    print(f"   Performance by Word Length:")
    for length in sorted(set(won_by_length.keys()) | set(lost_by_length.keys())):
        won = won_by_length[length]
        total = won + lost_by_length.get(length, 0)
        if total > 0:
            success_rate = won / total
            print(f"     Length {length}: {won}/{total} won ({success_rate:.1%})")

    # Efficiency analysis
    total_correct_guesses = sum(game['total_guesses'] - game['wrong_guesses'] for game in results['games_details'])
    total_guesses = sum(game['total_guesses'] for game in results['games_details'])
    efficiency = total_correct_guesses / total_guesses if total_guesses > 0 else 0

    print(f"   Guess Efficiency: {efficiency:.1%} ({total_correct_guesses}/{total_guesses} correct guesses)")

# Load test words for evaluation
def load_test_words_for_evaluation(file_path, num_words=2000):
    """Load test words for official evaluation"""
    with open(file_path, 'r') as f:
        words = [line.strip().upper() for line in f if line.strip()]

    words = [word for word in words if word.isalpha()]

    if len(words) < num_words:
        print(f"‚ö†Ô∏è  Warning: Only {len(words)} test words available, using all of them")
        return words
    else:
        return words[:num_words]

# üöÄ RUN OFFICIAL EVALUATION
print("Loading test words for official evaluation...")
test_words_eval = load_test_words_for_evaluation('test.txt', num_words=2000)

print(f"Loaded {len(test_words_eval)} test words for evaluation")
print("Starting official 2000-game evaluation...")

# Run the official evaluation
results, official_score = official_evaluation(hangman_hmm, test_words_eval, num_games=2000, max_wrong_guesses=6)

# Save detailed results
def save_detailed_results(results, official_score, filename="evaluation_results.txt"):
    """Save detailed evaluation results to file"""
    with open(filename, 'w') as f:
        f.write("OFFICIAL HANGMAN EVALUATION RESULTS\n")
        f.write("=" * 50 + "\n\n")

        f.write(f"FINAL SCORE: {official_score:.2f}\n\n")

        f.write("SUMMARY STATISTICS:\n")
        f.write(f"Games Played: {results['score_breakdown']['total_games']}\n")
        f.write(f"Games Won: {results['games_won']}\n")
        f.write(f"Success Rate: {results['score_breakdown']['success_rate']:.3f}\n")
        f.write(f"Total Wrong Guesses: {results['total_wrong_guesses']}\n")
        f.write(f"Total Repeated Guesses: {results['total_repeated_guesses']}\n\n")

        f.write("SCORE BREAKDOWN:\n")
        f.write(f"Success Component: {results['score_breakdown']['success_component']:.2f}\n")
        f.write(f"Wrong Guesses Penalty: -{results['score_breakdown']['wrong_penalty']:.2f}\n")
        f.write(f"Repeated Guesses Penalty: -{results['score_breakdown']['repeated_penalty']:.2f}\n\n")

        f.write("FIRST 10 GAME RESULTS:\n")
        for game in results['games_details'][:10]:
            status = "WON" if game['won'] else "LOST"
            f.write(f"Game {game['game_id']}: {game['target_word']} -> {status} "
                   f"(Wrong: {game['wrong_guesses']}, Repeated: {game['repeated_guesses']})\n")

    print(f"üìÑ Detailed results saved to {filename}")

# Save results
save_detailed_results(results, official_score)

# Performance assessment
def assess_performance(score):
    """Assess the performance based on final score"""
    print("\n" + "=" * 50)
    print("üìã PERFORMANCE ASSESSMENT")
    print("=" * 50)

    if score >= 1500:
        assessment = "EXCELLENT üèÜ"
        feedback = "Outstanding performance! Your agent is highly efficient."
    elif score >= 1000:
        assessment = "VERY GOOD ü•à"
        feedback = "Strong performance with good success rate and efficiency."
    elif score >= 500:
        assessment = "GOOD ü•â"
        feedback = "Solid performance with room for optimization."
    elif score >= 0:
        assessment = "FAIR üìä"
        feedback = "Basic functionality achieved, needs improvement in efficiency."
    else:
        assessment = "NEEDS WORK üîß"
        feedback = "Focus on improving success rate and reducing wrong guesses."

    print(f"FINAL SCORE: {score:.2f}")
    print(f"ASSESSMENT: {assessment}")
    print(f"FEEDBACK: {feedback}")

# Final assessment
assess_performance(official_score)

print(f"\nüéâ EVALUATION COMPLETE!")
print(f"üìä Check 'evaluation_results.txt' for detailed game-by-game results")

Loading test words for official evaluation...
Loaded 2000 test words for evaluation
Starting official 2000-game evaluation...
üéØ OFFICIAL EVALUATION - 2000 HANGMAN GAMES
Playing 2000 games with 6 wrong guesses allowed per game

üéÆ Played 0/2000 games...
üéÆ Played 500/2000 games...
üéÆ Played 1000/2000 games...
üéÆ Played 1500/2000 games...

üèÜ OFFICIAL EVALUATION RESULTS
üìä GAME STATISTICS:
   Total Games Played: 2000
   Games Won: 703
   Games Lost: 1297
   Success Rate: 0.351 (703/2000)

üéØ PERFORMANCE METRICS:
   Total Wrong Guesses: 10213
   Total Repeated Guesses: 0
   Average Wrong Guesses per Game: 5.11
   Average Repeated Guesses per Game: 0.00

üí∞ SCORE BREAKDOWN:
   Success Component: 703.00
   Wrong Guesses Penalty: -51065.00
   Repeated Guesses Penalty: -0.00
   FINAL SCORE: -50362.00

üìà ADDITIONAL INSIGHTS:
   Performance by Word Length:
     Length 2: 0/2 won (0.0%)
     Length 3: 0/9 won (0.0%)
     Length 4: 4/37 won (10.8%)
     Length 5: 10/91 won (

In [None]:
# Step 7: UNIVERSAL EVALUATION - Works for both HMM and RL agents
def universal_evaluation(agent, test_words, num_games=2000, max_wrong_guesses=6, agent_type="hmm"):
    """
    Universal evaluation that works for both HMM and RL agents
    """
    import numpy as np
    from collections import defaultdict

    print(f"üéØ UNIVERSAL EVALUATION - {num_games} HANGMAN GAMES")
    print("=" * 60)
    print(f"Agent Type: {agent_type.upper()}")
    print(f"Playing {num_games} games with {max_wrong_guesses} wrong guesses allowed per game")
    print()

    # Initialize metrics
    results = {
        'games_won': 0,
        'games_lost': 0,
        'total_wrong_guesses': 0,
        'total_repeated_guesses': 0,
        'games_details': [],
        'score_breakdown': defaultdict(int)
    }

    # Play specified number of games
    for game_id in range(min(num_games, len(test_words))):
        if game_id % 500 == 0:
            print(f"üéÆ Played {game_id}/{num_games} games...")

        target_word = test_words[game_id]

        if agent_type.lower() == "hmm":
            game_result = play_single_game_hmm(agent, target_word, max_wrong_guesses, game_id + 1)
        else:  # RL agent
            game_result = play_single_game_rl(agent, target_word, max_wrong_guesses, game_id + 1)

        # Aggregate results
        if game_result['won']:
            results['games_won'] += 1
        else:
            results['games_lost'] += 1

        results['total_wrong_guesses'] += game_result['wrong_guesses']
        results['total_repeated_guesses'] += game_result['repeated_guesses']
        results['games_details'].append(game_result)

    # Calculate official score
    official_score = calculate_official_score(results, num_games)

    # Print comprehensive results
    print_universal_results(results, official_score, num_games, agent_type)

    return results, official_score

def play_single_game_hmm(hmm_agent, target_word, max_wrong_guesses, game_id):
    """
    Play a single game with HMM agent (original logic)
    """
    masked_word = ['_'] * len(target_word)
    guessed_letters = set()
    wrong_guesses = 0
    repeated_guesses = 0
    game_log = []

    # Game loop
    while wrong_guesses < max_wrong_guesses and '_' in masked_word:
        # Get letter probabilities from HMM
        probs = hmm_agent.get_letter_probabilities(masked_word, guessed_letters)

        if not probs:
            # No probabilities available, use fallback
            available_letters = [chr(i) for i in range(65, 91) if chr(i) not in guessed_letters]
            if not available_letters:
                break
            next_letter = available_letters[0]
        else:
            # Choose letter with highest probability
            next_letter = max(probs.items(), key=lambda x: x[1])[0]

        # Check for repeated guess
        if next_letter in guessed_letters:
            repeated_guesses += 1
            game_log.append(f"Repeated guess: {next_letter}")
            continue

        # Add to guessed letters
        guessed_letters.add(next_letter)

        # Check if letter is in target word
        if next_letter in target_word:
            # Update masked word
            for i, char in enumerate(target_word):
                if char == next_letter:
                    masked_word[i] = next_letter
            game_log.append(f"Correct: {next_letter} -> {' '.join(masked_word)}")
        else:
            wrong_guesses += 1
            game_log.append(f"Wrong: {next_letter} ({wrong_guesses}/{max_wrong_guesses} wrong)")

    # Determine game outcome
    won = '_' not in masked_word
    actual_word = ''.join(target_word)
    guessed_word = ''.join(masked_word)

    return {
        'game_id': game_id,
        'target_word': actual_word,
        'won': won,
        'wrong_guesses': wrong_guesses,
        'repeated_guesses': repeated_guesses,
        'total_guesses': len(guessed_letters),
        'final_word': guessed_word,
        'game_log': game_log
    }

def play_single_game_rl(rl_agent, target_word, max_wrong_guesses, game_id):
    """
    Play a single game with RL agent
    """
    # Create environment for RL agent
    env = HangmanEnvironment(target_word, max_wrong_guesses)
    state = env.reset()

    game_log = []
    repeated_guesses = 0

    # Game loop
    while not env.done:
        # RL agent chooses action directly
        action = rl_agent.get_action(state, training=False)

        if action is None:
            break

        # Check for repeated guess (RL agents shouldn't do this, but just in case)
        if action in env.guessed_letters:
            repeated_guesses += 1
            game_log.append(f"Repeated guess: {action}")
            continue

        # Get HMM probabilities for logging (if available)
        hmm_probs = None
        if hasattr(rl_agent, 'hmm_model'):
            hmm_probs = rl_agent.hmm_model.get_letter_probabilities(
                state['masked_word'], state['guessed_letters']
            )

        # Take action in environment
        next_state, reward, done, info = env.step(action, hmm_probs)

        # Log the result
        if info.get('correct', False):
            game_log.append(f"Correct: {action} -> {' '.join(next_state['masked_word'])}")
        else:
            if not info.get('repeated', False):
                game_log.append(f"Wrong: {action} ({env.wrong_guesses}/{max_wrong_guesses} wrong)")

        state = next_state

    # Determine game outcome
    won = env.won
    actual_word = target_word
    guessed_word = ''.join(env.masked_word)

    return {
        'game_id': game_id,
        'target_word': actual_word,
        'won': won,
        'wrong_guesses': env.wrong_guesses,
        'repeated_guesses': repeated_guesses,
        'total_guesses': len(env.guessed_letters),
        'final_word': guessed_word,
        'game_log': game_log
    }

def calculate_official_score(results, num_games):
    """Same as before"""
    success_rate = results['games_won'] / num_games
    total_wrong_guesses = results['total_wrong_guesses']
    total_repeated_guesses = results['total_repeated_guesses']

    score = (success_rate * 2000) - (total_wrong_guesses * 5) - (total_repeated_guesses * 2)

    # Store breakdown for analysis
    results['score_breakdown']['success_component'] = success_rate * 2000
    results['score_breakdown']['wrong_penalty'] = total_wrong_guesses * 5
    results['score_breakdown']['repeated_penalty'] = total_repeated_guesses * 2
    results['score_breakdown']['success_rate'] = success_rate
    results['score_breakdown']['total_games'] = num_games

    return score

def print_universal_results(results, official_score, num_games, agent_type):
    """
    Print comprehensive evaluation results for any agent type
    """
    print("\n" + "=" * 70)
    print(f"üèÜ {agent_type.upper()} AGENT EVALUATION RESULTS")
    print("=" * 70)

    # Basic statistics
    print(f"üìä GAME STATISTICS:")
    print(f"   Agent Type: {agent_type.upper()}")
    print(f"   Total Games Played: {num_games}")
    print(f"   Games Won: {results['games_won']}")
    print(f"   Games Lost: {results['games_lost']}")
    print(f"   Success Rate: {results['games_won']/num_games:.3f} ({results['games_won']}/{num_games})")

    print(f"\nüéØ PERFORMANCE METRICS:")
    print(f"   Total Wrong Guesses: {results['total_wrong_guesses']}")
    print(f"   Total Repeated Guesses: {results['total_repeated_guesses']}")
    print(f"   Average Wrong Guesses per Game: {results['total_wrong_guesses']/num_games:.2f}")
    print(f"   Average Repeated Guesses per Game: {results['total_repeated_guesses']/num_games:.2f}")

    print(f"\nüí∞ SCORE BREAKDOWN:")
    print(f"   Success Component: {results['score_breakdown']['success_component']:.2f}")
    print(f"   Wrong Guesses Penalty: -{results['score_breakdown']['wrong_penalty']:.2f}")
    print(f"   Repeated Guesses Penalty: -{results['score_breakdown']['repeated_penalty']:.2f}")
    print(f"   FINAL SCORE: {official_score:.2f}")

    # Additional analysis
    print(f"\nüìà ADDITIONAL INSIGHTS:")

    # Word length analysis
    from collections import defaultdict
    won_by_length = defaultdict(int)
    lost_by_length = defaultdict(int)

    for game in results['games_details']:
        length = len(game['target_word'])
        if game['won']:
            won_by_length[length] += 1
        else:
            lost_by_length[length] += 1

    print(f"   Performance by Word Length:")
    for length in sorted(set(won_by_length.keys()) | set(lost_by_length.keys())):
        won = won_by_length[length]
        total = won + lost_by_length.get(length, 0)
        if total > 0:
            success_rate = won / total
            print(f"     Length {length}: {won}/{total} won ({success_rate:.1%})")

    # Efficiency analysis
    total_correct_guesses = sum(game['total_guesses'] - game['wrong_guesses'] for game in results['games_details'])
    total_guesses = sum(game['total_guesses'] for game in results['games_details'])
    efficiency = total_correct_guesses / total_guesses if total_guesses > 0 else 0

    print(f"   Guess Efficiency: {efficiency:.1%} ({total_correct_guesses}/{total_guesses} correct guesses)")

# üöÄ RUN UNIVERSAL EVALUATION FOR BOTH AGENT TYPES
def evaluate_all_agents(hmm_agent, rl_agents_dict, test_words, num_games=2000):
    """
    Evaluate all agents and compare performance
    """
    print("ü§ñ COMPREHENSIVE AGENT EVALUATION")
    print("=" * 60)

    all_results = {}

    # Evaluate HMM agent
    print("\nüîç Evaluating HMM Agent...")
    hmm_results, hmm_score = universal_evaluation(
        hmm_agent, test_words, num_games, agent_type="hmm"
    )
    all_results['HMM_Agent'] = {'results': hmm_results, 'score': hmm_score}

    # Evaluate each RL agent
    for rl_agent_name, rl_agent in rl_agents_dict.items():
        print(f"\nüîç Evaluating {rl_agent_name}...")
        rl_results, rl_score = universal_evaluation(
            rl_agent, test_words, num_games, agent_type=rl_agent_name.lower()
        )
        all_results[rl_agent_name] = {'results': rl_results, 'score': rl_score}

    # Print comparison
    print_comparison(all_results)

    return all_results

def print_comparison(all_results):
    """
    Print comparison between all agents
    """
    print("\n" + "=" * 70)
    print("üèÜ FINAL AGENT COMPARISON")
    print("=" * 70)

    best_score = -float('inf')
    best_agent = None

    for agent_name, data in all_results.items():
        score = data['score']
        success_rate = data['results']['score_breakdown']['success_rate']

        print(f"\n{agent_name}:")
        print(f"  Success Rate: {success_rate:.3f} ({data['results']['games_won']}/2000)")
        print(f"  Final Score: {score:.2f}")
        print(f"  Avg Wrong Guesses: {data['results']['total_wrong_guesses']/2000:.2f}")

        if score > best_score:
            best_score = score
            best_agent = agent_name

    print(f"\nüéØ BEST PERFORMING AGENT: {best_agent}")
    print(f"üèÜ BEST SCORE: {best_score:.2f}")

# Usage Example:
# After your current HMM evaluation, add this:

print("\n" + "="*70)
print("üöÄ STARTING RL AGENT EVALUATION")
print("="*70)

# Your RL agents (assuming you've trained them)
rl_agents_dict = {
    "DQN_Agent": dqn_agent,
    "QTable_Agent": qtable_agent
}

# Evaluate all agents
all_agent_results = evaluate_all_agents(
    hangman_hmm,
    rl_agents_dict,
    test_words_eval,
    num_games=2000
)

print("\nüéâ UNIVERSAL EVALUATION COMPLETE!")