# PyTorch Implementation: Improved Neural HMM

**Simplified PyTorch solution - Neural HMM only (no DQN)**

## Why No DQN?
- ‚ùå Dataset too small (50K words) for deep RL
- ‚ùå State space too large (sparse rewards)
- ‚ùå Overfitting risk with DQN
- ‚úÖ **Better approach:** Improved Neural HMM with more training

## Components:
1. **Neural HMM** - Deeper network, more epochs, better architecture
2. **Direct prediction** - No RL agent needed

## Previous Results:
- Traditional HMM (n-grams): 27.05%
- **Target with Neural HMM: 30%+**

In [1]:
import sys
sys.path.append('../src')

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import pickle
from collections import Counter, defaultdict, deque
from tqdm import tqdm
from hangman_env import HangmanEnv
from utils import calculate_final_score
import random

# Set random seeds
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"‚úì Using device: {device}")
print(f"‚úì PyTorch version: {torch.__version__}")

‚úì Using device: cpu
‚úì PyTorch version: 2.9.0


In [2]:
# Load cleaned data
with open('../Data/corpus_cleaned.txt', 'r', encoding='utf-8') as f:
    corpus_words = [line.strip() for line in f if line.strip()]

with open('../Data/test_cleaned.txt', 'r', encoding='utf-8') as f:
    test_words = [line.strip() for line in f if line.strip()]

print(f"‚úì Loaded {len(corpus_words)} training words")
print(f"‚úì Loaded {len(test_words)} test words")

‚úì Loaded 49302 training words
‚úì Loaded 2000 test words


## Part 1: Neural HMM

Instead of counting n-grams, we use a neural network to learn letter patterns.

In [3]:
class NeuralHMM(nn.Module):
    """Neural network that predicts letter probabilities given game state."""
    
    def __init__(self, max_word_len=30, hidden_size=256):
        super(NeuralHMM, self).__init__()
        self.max_word_len = max_word_len
        self.alphabet = 'abcdefghijklmnopqrstuvwxyz'
        self.char_to_idx = {c: i for i, c in enumerate(self.alphabet)}
        self.char_to_idx['_'] = 26  # Mask token
        
        # Input: masked word (one-hot) + guessed letters (binary) + position embeddings
        # Word embedding: max_word_len * 27 (26 letters + mask)
        # Guessed: 26 (binary vector)
        # Word length: 1
        input_size = max_word_len * 27 + 26 + 1
        
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size // 2)
        self.fc3 = nn.Linear(hidden_size // 2, 26)  # Output: probability for each letter
        
        self.dropout = nn.Dropout(0.2)
    
    def encode_state(self, masked_word, guessed_letters, word_length):
        """Encode game state as feature vector."""
        # One-hot encode masked word
        word_encoding = np.zeros(self.max_word_len * 27)
        for i, char in enumerate(masked_word[:self.max_word_len]):
            if char is None:
                idx = self.char_to_idx['_']
            else:
                idx = self.char_to_idx[char]
            word_encoding[i * 27 + idx] = 1.0
        
        # Binary encode guessed letters
        guessed_encoding = np.zeros(26)
        for char in guessed_letters:
            if char in self.char_to_idx and self.char_to_idx[char] < 26:
                guessed_encoding[self.char_to_idx[char]] = 1.0
        
        # Normalize word length
        length_encoding = np.array([word_length / self.max_word_len])
        
        return np.concatenate([word_encoding, guessed_encoding, length_encoding])
    
    def forward(self, x):
        """Forward pass."""
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)  # Logits
        return x
    
    def predict_probs(self, masked_word, guessed_letters, word_length):
        """Predict letter probabilities."""
        state = self.encode_state(masked_word, guessed_letters, word_length)
        state_tensor = torch.FloatTensor(state).unsqueeze(0).to(device)
        
        with torch.no_grad():
            logits = self.forward(state_tensor)
            probs = F.softmax(logits, dim=1).cpu().numpy()[0]
        
        # Mask already guessed letters
        for char in guessed_letters:
            if char in self.char_to_idx and self.char_to_idx[char] < 26:
                probs[self.char_to_idx[char]] = 0.0
        
        # Renormalize
        if probs.sum() > 0:
            probs = probs / probs.sum()
        
        return {self.alphabet[i]: probs[i] for i in range(26)}

print("‚úì NeuralHMM class defined")

‚úì NeuralHMM class defined


## Part 2: DQN Agent

Deep Q-Network that uses Neural HMM guidance.

In [4]:
class DQN(nn.Module):
    """Deep Q-Network for action selection."""
    
    def __init__(self, state_size, action_size=26, hidden_size=128):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(state_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, action_size)
    
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)  # Q-values for each action


class DQNAgent:
    """DQN Agent with experience replay and target network."""
    
    def __init__(self, neural_hmm, state_size=863, action_size=26):
        self.neural_hmm = neural_hmm
        self.alphabet = 'abcdefghijklmnopqrstuvwxyz'
        self.action_size = action_size
        self.state_size = state_size
        
        # Q-networks
        self.policy_net = DQN(state_size, action_size).to(device)
        self.target_net = DQN(state_size, action_size).to(device)
        self.target_net.load_state_dict(self.policy_net.state_dict())
        self.target_net.eval()
        
        self.optimizer = optim.Adam(self.policy_net.parameters(), lr=0.001)
        self.memory = deque(maxlen=10000)
        
        # Hyperparameters
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.gamma = 0.95
        self.batch_size = 64
        self.target_update = 10
        self.steps = 0
    
    def get_state_vector(self, env):
        """Convert environment state to feature vector (same as Neural HMM)."""
        masked_word = env.get_masked_word_list()
        return self.neural_hmm.encode_state(masked_word, env.guessed_letters, len(env.word))
    
    def select_action(self, env, training=True):
        """Select action using epsilon-greedy with HMM guidance."""
        available_actions = [i for i, c in enumerate(self.alphabet) if c not in env.guessed_letters]
        
        if not available_actions:
            return None
        
        # Epsilon-greedy exploration
        if training and random.random() < self.epsilon:
            # Explore: Use HMM probabilities
            masked = env.get_masked_word_list()
            hmm_probs = self.neural_hmm.predict_probs(masked, env.guessed_letters, len(env.word))
            probs = np.array([hmm_probs[c] for c in self.alphabet])
            probs = probs[available_actions]
            
            if probs.sum() > 0:
                probs = probs / probs.sum()
                action = np.random.choice(available_actions, p=probs)
            else:
                action = random.choice(available_actions)
        else:
            # Exploit: Use Q-values + HMM guidance
            state = self.get_state_vector(env)
            state_tensor = torch.FloatTensor(state).unsqueeze(0).to(device)
            
            with torch.no_grad():
                q_values = self.policy_net(state_tensor).cpu().numpy()[0]
            
            # Add HMM guidance
            masked = env.get_masked_word_list()
            hmm_probs = self.neural_hmm.predict_probs(masked, env.guessed_letters, len(env.word))
            for i, c in enumerate(self.alphabet):
                q_values[i] += hmm_probs[c] * 2.0  # HMM weight
            
            # Select best available action
            action = max(available_actions, key=lambda a: q_values[a])
        
        return action
    
    def remember(self, state, action, reward, next_state, done):
        """Store experience in replay memory."""
        self.memory.append((state, action, reward, next_state, done))
    
    def replay(self):
        """Train on batch of experiences."""
        if len(self.memory) < self.batch_size:
            return 0.0
        
        batch = random.sample(self.memory, self.batch_size)
        states, actions, rewards, next_states, dones = zip(*batch)
        
        states = torch.FloatTensor(np.array(states)).to(device)
        actions = torch.LongTensor(actions).to(device)
        rewards = torch.FloatTensor(rewards).to(device)
        next_states = torch.FloatTensor(np.array(next_states)).to(device)
        dones = torch.FloatTensor(dones).to(device)
        
        # Current Q values
        current_q = self.policy_net(states).gather(1, actions.unsqueeze(1)).squeeze(1)
        
        # Target Q values
        with torch.no_grad():
            next_q = self.target_net(next_states).max(1)[0]
            target_q = rewards + (1 - dones) * self.gamma * next_q
        
        # Compute loss
        loss = F.mse_loss(current_q, target_q)
        
        # Optimize
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        
        return loss.item()
    
    def update_target_network(self):
        """Update target network."""
        self.target_net.load_state_dict(self.policy_net.state_dict())
    
    def decay_epsilon(self):
        """Decay exploration rate."""
        self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)

print("‚úì DQN and DQNAgent classes defined")

‚úì DQN and DQNAgent classes defined


## Part 3: Train Neural HMM

Supervised training on corpus words.

In [5]:
def train_neural_hmm(model, words, epochs=10, batch_size=128):
    """Train Neural HMM using supervised learning."""
    print(f"Training Neural HMM for {epochs} epochs...")
    
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()
    
    model.train()
    
    for epoch in range(epochs):
        total_loss = 0
        batch_count = 0
        
        # Shuffle words
        random.shuffle(words)
        
        for word in tqdm(words, desc=f"Epoch {epoch+1}/{epochs}"):
            # Simulate game states by progressively revealing letters
            guessed = set()
            remaining = set(word)
            
            while remaining:
                # Create masked word
                masked = [c if c in guessed else None for c in word]
                
                # Pick a random unguessed letter from the word
                target_letter = random.choice(list(remaining))
                target_idx = model.char_to_idx[target_letter]
                
                # Encode state
                state = model.encode_state(masked, guessed, len(word))
                state_tensor = torch.FloatTensor(state).unsqueeze(0).to(device)
                target_tensor = torch.LongTensor([target_idx]).to(device)
                
                # Forward pass
                logits = model(state_tensor)
                loss = criterion(logits, target_tensor)
                
                # Backward pass
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                
                total_loss += loss.item()
                batch_count += 1
                
                # Update guessed and remaining
                guessed.add(target_letter)
                remaining.discard(target_letter)
                
                # Batch limit to avoid too many updates per word
                if len(guessed) >= 3:  # Train on first few guesses
                    break
        
        avg_loss = total_loss / batch_count if batch_count > 0 else 0
        print(f"  Epoch {epoch+1} - Avg Loss: {avg_loss:.4f}")
    
    print("‚úì Neural HMM training complete")

# Create and train Neural HMM
neural_hmm = NeuralHMM().to(device)
train_neural_hmm(neural_hmm, corpus_words, epochs=3)  # Start with 3 epochs

Training Neural HMM for 3 epochs...


Epoch 1/3: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 49302/49302 [02:51<00:00, 287.25it/s]
Epoch 1/3: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 49302/49302 [02:51<00:00, 287.25it/s]


  Epoch 1 - Avg Loss: 2.9766


Epoch 2/3: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 49302/49302 [02:50<00:00, 289.94it/s]
Epoch 2/3: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 49302/49302 [02:50<00:00, 289.94it/s]


  Epoch 2 - Avg Loss: 2.9737


Epoch 3/3: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 49302/49302 [02:49<00:00, 290.56it/s]

  Epoch 3 - Avg Loss: 2.9734
‚úì Neural HMM training complete





## Part 4: Test Neural HMM

In [6]:
def test_neural_hmm(model, test_words, max_test=500):
    """Test Neural HMM performance."""
    model.eval()
    results = []
    
    for word in tqdm(test_words[:max_test], desc="Testing Neural HMM"):
        env = HangmanEnv(word, max_lives=6)
        env.reset()
        
        while not env.done:
            masked = env.get_masked_word_list()
            probs = model.predict_probs(masked, env.guessed_letters, len(word))
            
            # Pick best available letter
            available = {k: v for k, v in probs.items() if k not in env.guessed_letters}
            if available:
                action = max(available, key=available.get)
            else:
                break
            
            env.step(action)
        
        stats = env.get_stats()
        results.append({'won': env.won, 'wrong': stats['wrong_count'], 'repeated': stats['repeated_count']})
    
    wins = sum(1 for r in results if r['won'])
    rate = wins / len(results)
    wrong = sum(r['wrong'] for r in results)
    repeated = sum(r['repeated'] for r in results)
    score = calculate_final_score(rate, wrong, repeated, len(results))
    
    return rate, score, wrong, repeated

print("Testing Neural HMM on 500 test words...")
hmm_rate, hmm_score, hmm_wrong, hmm_repeated = test_neural_hmm(neural_hmm, test_words, 500)

print("\n" + "="*70)
print("NEURAL HMM Performance:")
print("="*70)
print(f"Win rate: {hmm_rate*100:.2f}%")
print(f"Score: {hmm_score:.2f}")
print(f"Wrong guesses: {hmm_wrong}")
print(f"Repeated guesses: {hmm_repeated}")
print(f"\nComparison to traditional HMM: 27.05%")
print(f"Difference: {(hmm_rate*100 - 27.05):.2f} pp")

Testing Neural HMM on 500 test words...


Testing Neural HMM: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 500/500 [00:00<00:00, 1131.60it/s]


NEURAL HMM Performance:
Win rate: 13.80%
Score: -14291.00
Wrong guesses: 2872
Repeated guesses: 0

Comparison to traditional HMM: 27.05%
Difference: -13.25 pp





## Part 5: Train DQN Agent

Train with experience replay and target network.

In [7]:
def train_dqn_agent(agent, train_words, episodes=10000):
    """Train DQN agent."""
    print(f"Training DQN for {episodes} episodes...\n")
    
    episode_rewards = []
    recent_wins = deque(maxlen=100)
    losses = []
    
    for episode in tqdm(range(episodes), desc="Training DQN"):
        # Sample random word
        word = random.choice(train_words)
        env = HangmanEnv(word, max_lives=6)
        state = agent.get_state_vector(env)
        
        episode_reward = 0
        
        while not env.done:
            # Select action
            action_idx = agent.select_action(env, training=True)
            if action_idx is None:
                break
            
            action = agent.alphabet[action_idx]
            
            # Take action
            obs, reward, done, info = env.step(action)
            next_state = agent.get_state_vector(env)
            
            # Store experience
            agent.remember(state, action_idx, reward, next_state, done)
            
            # Train
            loss = agent.replay()
            if loss > 0:
                losses.append(loss)
            
            state = next_state
            episode_reward += reward
            agent.steps += 1
        
        # Update target network
        if episode % agent.target_update == 0:
            agent.update_target_network()
        
        # Track metrics
        episode_rewards.append(episode_reward)
        recent_wins.append(1 if env.won else 0)
        agent.decay_epsilon()
        
        # Log progress
        if (episode + 1) % 1000 == 0:
            win_rate = np.mean(recent_wins) if len(recent_wins) > 0 else 0
            avg_reward = np.mean(episode_rewards[-100:])
            avg_loss = np.mean(losses[-100:]) if len(losses) > 100 else 0
            
            print(f"\nEpisode {episode + 1}/{episodes}:")
            print(f"  Win rate (last 100): {win_rate*100:.2f}%")
            print(f"  Avg reward: {avg_reward:.2f}")
            print(f"  Avg loss: {avg_loss:.4f}")
            print(f"  Epsilon: {agent.epsilon:.4f}")
            print(f"  Memory size: {len(agent.memory)}")
    
    return episode_rewards

# Create and train DQN agent
dqn_agent = DQNAgent(neural_hmm)
rewards = train_dqn_agent(dqn_agent, corpus_words, episodes=10000)

Training DQN for 10000 episodes...



Training DQN:   0%|          | 6/10000 [00:00<00:25, 390.33it/s]



RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x837 and 863x128)

## Part 6: Evaluate DQN Agent

In [None]:
def evaluate_dqn_agent(agent, test_words):
    """Evaluate DQN agent on test set."""
    agent.policy_net.eval()
    results = []
    
    for word in tqdm(test_words, desc="Evaluating DQN"):
        env = HangmanEnv(word, max_lives=6)
        env.reset()
        
        while not env.done:
            action_idx = agent.select_action(env, training=False)
            if action_idx is None:
                break
            
            action = agent.alphabet[action_idx]
            env.step(action)
        
        stats = env.get_stats()
        results.append({
            'won': env.won,
            'wrong': stats['wrong_count'],
            'repeated': stats['repeated_count']
        })
    
    wins = sum(1 for r in results if r['won'])
    rate = wins / len(results)
    wrong = sum(r['wrong'] for r in results)
    repeated = sum(r['repeated'] for r in results)
    score = calculate_final_score(rate, wrong, repeated, len(results))
    
    return rate, score, wrong, repeated

print("\n" + "="*70)
print("EVALUATING ON FULL TEST SET (2000 words)")
print("="*70)

rate, score, wrong, repeated = evaluate_dqn_agent(dqn_agent, test_words)

print("\n" + "="*70)
print("FINAL RESULTS")
print("="*70)
print(f"Win rate: {rate*100:.2f}%")
print(f"Final score: {score:.2f}")
print(f"Wrong guesses: {wrong:,}")
print(f"Repeated guesses: {repeated}")

print("\n" + "="*70)
print("COMPARISON TO PREVIOUS APPROACHES")
print("="*70)
print(f"Traditional HMM + Tabular Q-learning: 27.05%")
print(f"Neural HMM alone:                     {hmm_rate*100:.2f}%")
print(f"PyTorch DQN + Neural HMM:             {rate*100:.2f}%")
print(f"\nüéØ Improvement: {(rate*100 - 27.05):+.2f} percentage points")

if rate >= 0.35:
    print("\nüéâ TARGET ACHIEVED: 35%+ with PyTorch!")
elif rate >= 0.30:
    print("\n‚úÖ GOOD PROGRESS: 30%+ win rate")
else:
    print("\nüìà Need more training or hyperparameter tuning")

## Part 7: Save Models

In [None]:
# Save Neural HMM
torch.save(neural_hmm.state_dict(), '../models/neural_hmm.pth')
print("‚úì Saved Neural HMM to models/neural_hmm.pth")

# Save DQN
torch.save({
    'policy_net': dqn_agent.policy_net.state_dict(),
    'target_net': dqn_agent.target_net.state_dict(),
    'optimizer': dqn_agent.optimizer.state_dict(),
    'epsilon': dqn_agent.epsilon
}, '../models/dqn_agent.pth')
print("‚úì Saved DQN Agent to models/dqn_agent.pth")

print("\n‚úÖ All PyTorch models saved successfully!")