# Simple Neural HMM for Hangman

**No DQN - Just a well-trained Neural Network**

## Why Simple?
- âœ… Dataset is small (50K words) - perfect for supervised learning
- âœ… Direct letter prediction is simpler and more effective
- âœ… No need for complex RL (sparse rewards, huge state space)
- âœ… Traditional HMM got 27% - we can beat it with neural nets!

## Architecture:
- **Input:** Masked word + guessed letters + word length (863 dims)
- **Hidden:** 3 layers (512 â†’ 256 â†’ 128)
- **Output:** 26 letter probabilities
- **Training:** 10 epochs with data augmentation

In [None]:
import sys
sys.path.append('../src')

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
from collections import Counter
from tqdm import tqdm
from hangman_env import HangmanEnv
from utils import calculate_final_score
import random

# Seeds
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"âœ“ Device: {device}")
print(f"âœ“ PyTorch: {torch.__version__}")

In [None]:
# Load data
with open('../Data/corpus_cleaned.txt', 'r') as f:
    corpus_words = [line.strip() for line in f if line.strip()]

with open('../Data/test_cleaned.txt', 'r') as f:
    test_words = [line.strip() for line in f if line.strip()]

print(f"âœ“ Training words: {len(corpus_words):,}")
print(f"âœ“ Test words: {len(test_words):,}")

In [None]:
class ImprovedNeuralHMM(nn.Module):
    """Deeper, better neural network for letter prediction."""
    
    def __init__(self, max_word_len=30):
        super(ImprovedNeuralHMM, self).__init__()
        self.max_word_len = max_word_len
        self.alphabet = 'abcdefghijklmnopqrstuvwxyz'
        self.char_to_idx = {c: i for i, c in enumerate(self.alphabet)}
        self.char_to_idx['_'] = 26
        
        # Input size: word (30*27) + guessed (26) + length (1) = 863
        input_size = max_word_len * 27 + 26 + 1
        
        # Deeper network
        self.fc1 = nn.Linear(input_size, 512)
        self.bn1 = nn.BatchNorm1d(512)
        self.fc2 = nn.Linear(512, 256)
        self.bn2 = nn.BatchNorm1d(256)
        self.fc3 = nn.Linear(256, 128)
        self.bn3 = nn.BatchNorm1d(128)
        self.fc4 = nn.Linear(128, 26)
        
        self.dropout = nn.Dropout(0.3)
    
    def encode_state(self, masked_word, guessed_letters, word_length):
        """Encode game state."""
        # One-hot word
        word_enc = np.zeros(self.max_word_len * 27)
        for i, char in enumerate(masked_word[:self.max_word_len]):
            idx = self.char_to_idx.get(char, 26) if char else 26
            word_enc[i * 27 + idx] = 1.0
        
        # Binary guessed
        guessed_enc = np.zeros(26)
        for char in guessed_letters:
            if char in self.char_to_idx and self.char_to_idx[char] < 26:
                guessed_enc[self.char_to_idx[char]] = 1.0
        
        # Normalized length
        length_enc = np.array([word_length / self.max_word_len])
        
        return np.concatenate([word_enc, guessed_enc, length_enc])
    
    def forward(self, x):
        """Forward pass."""
        x = F.relu(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = F.relu(self.bn2(self.fc2(x)))
        x = self.dropout(x)
        x = F.relu(self.bn3(self.fc3(x)))
        x = self.dropout(x)
        x = self.fc4(x)
        return x
    
    def predict_probs(self, masked_word, guessed_letters, word_length):
        """Predict letter probabilities."""
        state = self.encode_state(masked_word, guessed_letters, word_length)
        state_tensor = torch.FloatTensor(state).unsqueeze(0).to(device)
        
        with torch.no_grad():
            logits = self.forward(state_tensor)
            probs = F.softmax(logits, dim=1).cpu().numpy()[0]
        
        # Mask guessed
        for char in guessed_letters:
            if char in self.char_to_idx and self.char_to_idx[char] < 26:
                probs[self.char_to_idx[char]] = 0.0
        
        # Normalize
        if probs.sum() > 0:
            probs = probs / probs.sum()
        
        return {self.alphabet[i]: probs[i] for i in range(26)}

print("âœ“ ImprovedNeuralHMM defined")

In [None]:
def generate_training_data(words, samples_per_word=3):
    """Generate training samples with data augmentation."""
    training_samples = []
    
    for word in tqdm(words, desc="Generating training data"):
        # Multiple samples per word with different masking strategies
        for _ in range(samples_per_word):
            # Random progression through word
            letters = list(word)
            random.shuffle(letters)
            
            guessed = set()
            remaining = set(word)
            
            for letter in letters:
                if letter in remaining:
                    # Current masked state
                    masked = [c if c in guessed else None for c in word]
                    
                    training_samples.append({
                        'masked': masked,
                        'guessed': guessed.copy(),
                        'length': len(word),
                        'target': letter
                    })
                    
                    guessed.add(letter)
                    remaining.discard(letter)
                    
                    # Stop after a few guesses to focus on early game
                    if len(guessed) >= 4:
                        break
    
    random.shuffle(training_samples)
    return training_samples

print("âœ“ Data generation function defined")

In [None]:
def train_model(model, training_samples, epochs=10, batch_size=256):
    """Train the model."""
    print(f"\nTraining for {epochs} epochs on {len(training_samples):,} samples...\n")
    
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.5)
    criterion = nn.CrossEntropyLoss()
    
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        
        # Mini-batch training
        for i in tqdm(range(0, len(training_samples), batch_size), desc=f"Epoch {epoch+1}/{epochs}"):
            batch = training_samples[i:i+batch_size]
            
            # Prepare batch
            states = []
            targets = []
            
            for sample in batch:
                state = model.encode_state(sample['masked'], sample['guessed'], sample['length'])
                target_idx = model.char_to_idx[sample['target']]
                states.append(state)
                targets.append(target_idx)
            
            states = torch.FloatTensor(np.array(states)).to(device)
            targets = torch.LongTensor(targets).to(device)
            
            # Forward + backward
            optimizer.zero_grad()
            logits = model(states)
            loss = criterion(logits, targets)
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
        
        avg_loss = total_loss / (len(training_samples) / batch_size)
        scheduler.step(avg_loss)
        
        print(f"  Avg Loss: {avg_loss:.4f} | LR: {optimizer.param_groups[0]['lr']:.6f}")
    
    print("\nâœ“ Training complete!")

print("âœ“ Training function defined")

In [None]:
# Generate training data (3 samples per word)
training_samples = generate_training_data(corpus_words, samples_per_word=3)

In [None]:
# Create and train model
model = ImprovedNeuralHMM().to(device)
train_model(model, training_samples, epochs=10, batch_size=256)

In [None]:
def evaluate_model(model, test_words):
    """Evaluate on test set."""
    model.eval()
    results = []
    
    for word in tqdm(test_words, desc="Evaluating"):
        env = HangmanEnv(word, max_lives=6)
        env.reset()
        
        while not env.done:
            masked = env.get_masked_word_list()
            probs = model.predict_probs(masked, env.guessed_letters, len(word))
            
            available = {k: v for k, v in probs.items() if k not in env.guessed_letters}
            if available:
                action = max(available, key=available.get)
            else:
                break
            
            env.step(action)
        
        stats = env.get_stats()
        results.append({
            'won': env.won,
            'wrong': stats['wrong_count'],
            'repeated': stats['repeated_count']
        })
    
    wins = sum(1 for r in results if r['won'])
    rate = wins / len(results)
    wrong = sum(r['wrong'] for r in results)
    repeated = sum(r['repeated'] for r in results)
    score = calculate_final_score(rate, wrong, repeated, len(results))
    
    return rate, score, wrong, repeated

print("âœ“ Evaluation function defined")

In [None]:
# Evaluate on full test set
print("\n" + "="*70)
print("EVALUATING ON 2000 TEST WORDS")
print("="*70)

rate, score, wrong, repeated = evaluate_model(model, test_words)

print("\n" + "="*70)
print("FINAL RESULTS")
print("="*70)
print(f"Win rate: {rate*100:.2f}%")
print(f"Score: {score:.2f}")
print(f"Wrong guesses: {wrong:,}")
print(f"Repeated: {repeated}")

print("\n" + "="*70)
print("COMPARISON")
print("="*70)
print(f"Traditional HMM (n-grams):  27.05%")
print(f"Neural HMM (PyTorch):       {rate*100:.2f}%")
print(f"\nðŸŽ¯ Improvement: {(rate*100 - 27.05):+.2f} pp")

if rate >= 0.30:
    print("\nâœ… SUCCESS: Beat traditional HMM with neural network!")
else:
    print("\nðŸ“ˆ Close! May need more training or architecture tuning.")

In [None]:
# Save model
torch.save(model.state_dict(), '../models/neural_hmm_simple.pth')
print("\nâœ“ Model saved to models/neural_hmm_simple.pth")