In [54]:
import os                                                                                                                                                                                                                                                                                                                                
import random
import string
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import Dataset, DataLoader

##########################################
# 1. The Custom LSTM Model
##########################################                                                                                
class HangmanGuessModel(nn.Module):
    """
    A model that takes the masked word + guessed letters as input
    and outputs a distribution over the 26 possible letters (a-z).
    It uses an embedding + LSTM to capture sequential patterns
    resembling the word structure.
    """
    def __init__(self, embed_dim=64, hidden_dim=128, num_layers=1):
        super(HangmanGuessModel, self).__init__()
        # Our vocabulary is just the alphabet + underscore (_), plus a few possible spaces
        self.alphabet = list(string.ascii_lowercase)  # 26 letters
        self.extra_symbols = ['_', ' ']               # underscores and space for masked word
        self.vocab = self.alphabet + self.extra_symbols
        self.vocab_size = len(self.vocab)  # 28 in total (26 letters + '_' + ' ')
        
        # Embedding layer
        self.embedding = nn.Embedding(self.vocab_size, embed_dim)
        
        # LSTM to process the sequence of characters
        self.lstm = nn.LSTM(input_size=embed_dim, hidden_size=hidden_dim, num_layers=num_layers, batch_first=True)
        
        # Fully connected output layer
        self.fc = nn.Linear(hidden_dim, 26)  # 26 possible guesses (a-z)
        
    def forward(self, masked_word_str, guessed_letters,device):
        """
        masked_word_str: something like '_ _ a _ e'
        guessed_letters: a list of characters already guessed.
        
        Returns: A 1 x 26 tensor representing the probability
                 for each letter a-z.
        """
        device=next(self.parameters()).device
        # Convert masked_word_str and guessed_letters to indices from self.vocab
        raw_word = masked_word_str.split()
        
        input_indices = []
        for symbol in raw_word:
            if symbol in self.vocab:
                input_indices.append(self.vocab.index(symbol))
            else:
                # If something unexpected occurs, map it to underscore
                input_indices.append(self.vocab.index('_'))
        
        # Add guessed letters as well
        for letter in guessed_letters:
            if letter in self.alphabet:
                input_indices.append(self.vocab.index(letter))
        
        # Convert to tensor of shape [1, seq_len]
        input_tensor = torch.tensor(input_indices, dtype=torch.long).unsqueeze(0).to(device)  # batch of size 1
        # Embeddings
        embedded = self.embedding(input_tensor)  # shape: [1, seq_len, embed_dim]
        
        # LSTM
        output, (h, c) = self.lstm(embedded)  # output: [1, seq_len, hidden_dim]
        
        # Take the last hidden state from LSTM
        last_hidden = output[:, -1, :]  # shape: [1, hidden_dim]
        
        # Fully connected layer
        logits = self.fc(last_hidden)  # shape: [1, 26]
        
        # Return raw logits; caller can apply softmax or other logic
        return logits


##########################################
# 2. Improved Guess Function
##########################################

def improved_guess_fn(model, masked_word, guessed_letters):
    """
    Uses the trained model to predict the next letter.
    Applies a probability mask to exclude already-guessed letters.
    """
    model.eval()
    with torch.no_grad():
        logits = model(masked_word, guessed_letters,device)  # shape: [1, 26]
        probabilities = torch.softmax(logits, dim=1)  # shape: [1, 26]
    
    # Create a mask for already-guessed letters
    mask = torch.ones_like(probabilities)
    
    # Zero out the probabilities of letters we've already guessed
    for letter in guessed_letters:
        if letter in string.ascii_lowercase:
            idx = ord(letter) - 97
            mask[0, idx] = 0
    
    masked_prob = probabilities * mask
    predicted_index = torch.argmax(masked_prob, dim=1).item()
    
    return chr(predicted_index + 97)  # Convert back to 'a'-'z'


##########################################
# 3. Example of a Custom Dataset
##########################################
class HangmanDataset(Dataset):
    """
    Creates samples for training from a list of words.
    Each sample is artificially generated by partially revealing
    the word and specifying guessed letters. The label is the next correct letter.
    
    This is an oversimplification, but it often suffices to teach
    the model to guess missing letters.
    """
    def __init__(self, word_list, num_samples=10000, max_mask_frac=0.5):
        """
        word_list: list of possible words
        num_samples: how many samples to create
        max_mask_frac: fraction of letters we mask at max
        """
        self.word_list = word_list
        self.samples = []
        self.max_mask_frac = max_mask_frac
        
        self.alphabet = list(string.ascii_lowercase)
        self.extra_symbols = ['_', ' ']
        self.vocab = self.alphabet + self.extra_symbols
        self.vocab_map = {c: i for i, c in enumerate(self.vocab)}
        
        self._generate_samples(num_samples)
    
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, idx):
        return self.samples[idx]
    
    def _generate_samples(self, num_samples):
        for _ in range(num_samples):
            word = random.choice(self.word_list)
            if len(word) == 0:
                continue
            
            # Mask some letters
            mask_count = random.randint(1, max(1, int(len(word) * self.max_mask_frac)))
            letters_to_keep = random.sample(range(len(word)), len(word) - mask_count)
            
            # Build masked word
            masked_chars = []
            for i, c in enumerate(word):
                if i in letters_to_keep:
                    masked_chars.append(c)
                else:
                    masked_chars.append('_')
            
            # Pick a target letter to guess
            masked_positions = [i for i, c in enumerate(masked_chars) if c == '_']
            target_pos = random.choice(masked_positions)
            target_letter = word[target_pos]
            
            # Generate guessed letters
            guessed_letters = []
            for i in letters_to_keep:
                if random.random() < 0.8:  # 80% chance to include it
                    guessed_letters.append(word[i])
            
            masked_word_str = ' '.join(masked_chars)  # e.g. "_ a _ _ t"
            label_idx = ord(target_letter) - 97
            
            self.samples.append((masked_word_str, guessed_letters, label_idx))


##########################################
# 4. Collate Function for DataLoader
##########################################
def hangman_collate_fn(batch):
    """
    Collates the batch into inputs and labels.
    """
    masked_words = []
    guessed_letters_batch = []
    labels = []
    
    for masked_word_str, guessed_list, label_idx in batch:
        masked_words.append(masked_word_str)
        guessed_letters_batch.append(guessed_list)
        labels.append(label_idx)
    
    labels = torch.tensor(labels, dtype=torch.long)
    return masked_words, guessed_letters_batch, labels


##########################################
# 5. The Training Loop
##########################################
# def train_hangman_rnn(
#     model, 
#     train_loader,
#     epochs=5,
#     lr=1e-3,
#     device=torch.device("cpu")
# ):                                                                                
    # """                                                            
    # Trains the HangmanGuessModel on the provided dataloader.
    # """
    # model.to(device)
    # optimizer = optim.Adam(model.parameters(), lr=lr)
    # criterion = nn.CrossEntropyLoss()
    
    # model.train()
    # for epoch in range(epochs):
    #     total_loss = 0.0
    #     for batch_idx, (masked_words, guessed_letters_batch, labels) in enumerate(train_loader):
    #         batch_losses = []
    #         for mw_str, g_letters, lbl in zip(masked_words, guessed_letters_batch, labels):
    #             logits = model(mw_str, g_letters)
    #             logits = logits.to(device)
    #             lbl = lbl.to(device)
    #             loss = criterion(logits, lbl.unsqueeze(0))  # shape: [1, 26], [1]
    #             batch_losses.append(loss)
            
    #         # Combine all sample losses in the batch
    #         batch_loss = torch.stack(batch_losses).mean()
            
    #         # Backprop
    #         optimizer.zero_grad()
    #         batch_loss.backward()
    #         optimizer.step()
            
    #         total_loss += batch_loss.item()
        
    #     print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_loader):.4f}")


##########################################
# 6. Putting It All Together in CynapticsHangman
##########################################
class CynapticsHangman():
    """
    Modified version where we plug in our improved model and guess function.
    """
    def __init__(self):
        self.guessed_letters = []
        self.train_dataset_location = r"C:\Users\gupta\Downloads\train.txt" 
        self.validation_dataset_location = r"C:\Users\gupta\Downloads\valid.txt"                    
        self.train_dictionary = self.build_dictionary(self.train_dataset_location)
        self.validation_dictionary = self.build_dictionary(self.validation_dataset_location)
        
        self.lives_remaining = 6
        # Our improved model is an LSTM-based approach
        self.model = HangmanGuessModel()
    
    def build_dictionary(self, dictionary_file_location):
        text_file = open(dictionary_file_location,"r")
        full_dictionary = text_file.read().splitlines()
        text_file.close()
        return full_dictionary
    
    # New return_status method definition
    def return_status(self, unmasked_word, masked_word, guessed_letter):
        """
        This method updates the lives remaining and the current masked word based on the guessed letter.
        It returns the updated status of the game: "ongoing", "success", or "failed".
        """
        # Check if the guessed letter is correct
        if guessed_letter in unmasked_word.replace(' ', ''):
            # Reveal all instances of guessed_letter in the masked word
            word_no_spaces = unmasked_word.replace(' ', '')
            new_mask_chars = []
            masked_chars = masked_word.split()
            
            # Reveal the guessed letter in the masked word
            for i, c in enumerate(word_no_spaces):
                if c == guessed_letter or masked_chars[i] != '_':
                    new_mask_chars.append(c)
                else:
                    new_mask_chars.append('_')
            masked_word = ' '.join(new_mask_chars)
            
            # Check if the word is completely revealed (i.e., no more '_')
            if '_' in masked_word:
                return "ongoing", self.lives_remaining, "Guess correct", masked_word
            else:
                return "success", self.lives_remaining, "Guess correct", masked_word
        else:
            # If guessed letter is incorrect, reduce lives
            self.lives_remaining -= 1
            if self.lives_remaining == 0:
                return "failed", self.lives_remaining, "Guess incorrect", masked_word
            else:
                return "ongoing", self.lives_remaining, "Guess incorrect", masked_word
    
    def guess(self, masked_word, lives_left):
        # Use the improved guess function with our trained model
        next_guess = improved_guess_fn(self.model, masked_word, self.guessed_letters)
        return next_guess
    
    # load/save weights
    def init_model(self, model_path):
        self.model = HangmanGuessModel()
        state_dict = torch.load(model_path, map_location=device if device else "cpu")
        self.model.load_state_dict( state_dict)
        self.model.eval()
        print(f"Model weights loaded from {model_path}")
    
    def save_model_weights(self, file_path):
        torch.save(self.model.state_dict(), file_path)
        print(f"Model weights saved to {file_path}")
    
    ######################################
    # The training routine
    ######################################
    def train(self, total_epochs=10, batch_size=32, train_samples=20000):
        print("Creating training dataset...")
        dataset = HangmanDataset(self.train_dictionary, num_samples=train_samples)
        loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=hangman_collate_fn)
        
        print("Training started...")
        train_hangman_rnn(
            model=self.model,
            train_loader=loader,
            epochs=total_epochs,
            lr=1e-3,
            device=device if device else torch.device("cpu")                    
        )
        
        # Save the weights
        weights_file = "hangman_model_weights_lstm.pth"
        self.save_model_weights(weights_file)
    
    ###################
    # Hangman Game
    ###################
    def start_game(self, game_id, verbose=True):
        self.guessed_letters = []
        self.guessed_letters.append(' ')
        word = random.choice(self.validation_dictionary)
        masked_word = '_' * len(word)
        masked_word = ' '.join(masked_word)
        word = ' '.join(word)
        self.lives_remaining = 6
        
        if verbose:
            print("New game started! Game ID: {0}\nLives: {1}\nWord: {2}\nMasked: {3}".format(game_id, self.lives_remaining, word, masked_word))
        
        self.max_tries = len(word) + 6
        
        while self.lives_remaining > 0 and self.max_tries > 0:
            guessed_letter = self.guess(masked_word, self.lives_remaining)
            self.guessed_letters.append(guessed_letter)
            
            if verbose:
                print(f"Guessing letter: {guessed_letter}")
            
            # Using the new return_status method
            status, self.lives_remaining, res, masked_word = self.return_status(word, masked_word, guessed_letter)
            
            if verbose:
                print("Status:", res, "Masked Word:", masked_word, "Lives:", self.lives_remaining)
            
            if status == "success":
                if verbose:
                    print(f"Game {game_id} success!\n")
                return True, 1
            elif status == "failed":
                if verbose:
                    print(f"Game {game_id} failed.\n")
                return False, 0
            
        # If we exit the loop without success or fail, we ran out of tries
        if verbose:
            print(f"Game {game_id} ended (out of tries).")
        return False, 0


In [66]:
def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")                                                                                                    
  
  
  
  
  
    print("Using device:", device)
    
    # Initialize the CynapticsHangman game agent
    agent = CynapticsHangman()


In [70]:
   # Or, if the model is already trained, load the weights    
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_path = r"C:\users\gupta\hangman_model_weights_lstm.pth"   
agent = CynapticsHangman()
agent.init_model(model_path)                    
# Start a game
total_games = 100
successes = 0
for game_id in range(1, total_games + 1):
    success, _ = agent.start_game(game_id, verbose=True)
    if success:
        successes += 1

    # Print the success rate
print(f"\nSuccess rate: {successes}/{total_games} games")


  state_dict = torch.load(model_path, map_location=device if device else "cpu")


Model weights loaded from C:\users\gupta\hangman_model_weights_lstm.pth
New game started! Game ID: 1
Lives: 6
Word: t h e r m o c l i n a l
Masked: _ _ _ _ _ _ _ _ _ _ _ _
Guessing letter: t
Status: Guess correct Masked Word: t _ _ _ _ _ _ _ _ _ _ _ Lives: 6
Guessing letter: r
Status: Guess correct Masked Word: t _ _ r _ _ _ _ _ _ _ _ Lives: 6
Guessing letter: i
Status: Guess correct Masked Word: t _ _ r _ _ _ _ i _ _ _ Lives: 6
Guessing letter: n
Status: Guess correct Masked Word: t _ _ r _ _ _ _ i n _ _ Lives: 6
Guessing letter: s
Status: Guess incorrect Masked Word: t _ _ r _ _ _ _ i n _ _ Lives: 5
Guessing letter: e
Status: Guess correct Masked Word: t _ e r _ _ _ _ i n _ _ Lives: 5
Guessing letter: m
Status: Guess correct Masked Word: t _ e r m _ _ _ i n _ _ Lives: 5
Guessing letter: o
Status: Guess correct Masked Word: t _ e r m o _ _ i n _ _ Lives: 5
Guessing letter: c
Status: Guess correct Masked Word: t _ e r m o c _ i n _ _ Lives: 5
Guessing letter: l
Status: Guess correct Ma