In [43]:
import random
import re
import collections
import nltk
from nltk.corpus import words
nltk.download('words')

[nltk_data] Downloading package words to
[nltk_data]     C:\Users\naidu\AppData\Roaming\nltk_data...
[nltk_data]   Package words is already up-to-date!


True

In [48]:
class HangmanGame:
    def __init__(self):
        self.guessed_letters = []
        self.successful_games = 0
        self.total_games = 0
        self.word_list = set(words.words())
        self.max_lives = 6

        full_dictionary_location = "words_250000_train.txt"
        self.full_dictionary = self.build_dictionary(full_dictionary_location) 
        self.excluded_words = self.build_excluded_words_set(full_dictionary_location)  # Use the correct attribute name here
        self.full_dictionary_common_letter_sorted = collections.Counter("".join(self.full_dictionary)).most_common()
        self.ngram_2, self.ngram_3, self.ngram_4, self.ngram_5 = self.build_ngram_dicts()

        self.current_dictionary = []


        
    def build_excluded_words_set(self, full_dictionary_location):
        with open(full_dictionary_location, "r") as text_file:
            excluded_words = set(text_file.read().splitlines())
        return excluded_words

    def generate_random_word(self):
        word_list = words.words()
        return random.choice(word_list)

    def build_ngram_dicts(self):
        ngram_2 = collections.defaultdict(int)
        ngram_3 = collections.defaultdict(int)
        ngram_4 = collections.defaultdict(int)
        ngram_5 = collections.defaultdict(int)
        
        for word in self.full_dictionary:
            if not self.is_valid_word(word):
                continue
            # Build 2-gram dict
            for i in range(len(word)-1):
                ngram_2[word[i:i+2]] += 1
                
            # Build 3-gram dict
            for i in range(len(word)-2):
                ngram_3[word[i:i+3]] += 1
            
            # Build 4-gram dict
            for i in range(len(word)-3):
                ngram_4[word[i:i+4]] += 1

            # Build 5-gram dict
            for i in range(len(word)-4):
                ngram_5[word[i:i+5]] += 1
        
        return dict(ngram_2), dict(ngram_3), dict(ngram_4), dict(ngram_5)

    def is_valid_word(self, word, threshold=3):
        count = 1
        prev_char = word[0]
        for char in word[1:]:
            if char == prev_char:
                count += 1
                if count > threshold:
                    return False
            else:
                count = 1
            prev_char = char
        return True

    def generate_patterns(self, word, n):
        if n == 2:
            return [word[i:i+2] for i in range(len(word)-1) if "_" in word[i:i+2] and word[i:i+2] != "__"]
        elif n == 3:
            return [word[i:i+3] for i in range(len(word)-2) if word[i:i+3].count("_") == 1]
        elif n == 4:
            return [word[i:i+4] for i in range(len(word)-3) if word[i:i+4].count("_") == 1]
        elif n == 5:
            return [word[i:i+5] for i in range(len(word)-3) if word[i:i+5].count("_") == 1]
        else:
            return []

    def guess(self, word):
        clean_word = word[::2]
        
        letter_counts = {}

        # 5-grams with weight 4
        for pattern in self.generate_patterns(clean_word, 5):
            print(f"Generated 5-gram pattern: {pattern}")
            for ngram, count in self.ngram_5.items():
                pattern_regex = re.compile(pattern.replace("_", "."))
                if pattern_regex.match(ngram):
                    letter = ngram[pattern.index("_")]
                    letter_counts[letter] = letter_counts.get(letter, 0) + count * 5
    
        # 4-grams with weight 3
        for pattern in self.generate_patterns(clean_word, 4):
            print(f"Generated 4-gram pattern: {pattern}")
            for ngram, count in self.ngram_4.items():
                pattern_regex = re.compile(pattern.replace("_", "."))
                if pattern_regex.match(ngram):
                    letter = ngram[pattern.index("_")]
                    letter_counts[letter] = letter_counts.get(letter, 0) + count * 4
        
        # 3-grams with weight 2
        for pattern in self.generate_patterns(clean_word, 3):
            print(f"Generated 3-gram pattern: {pattern}")
            for ngram, count in self.ngram_3.items():
                pattern_regex = re.compile(pattern.replace("_", "."))
                if pattern_regex.match(ngram):
                    letter = ngram[pattern.index("_")]
                    letter_counts[letter] = letter_counts.get(letter, 0) + count * 3
        
        # 2-grams with weight 1
        for pattern in self.generate_patterns(clean_word, 2):
            print(f"Generated 2-gram pattern: {pattern}")
            for ngram, count in self.ngram_2.items():
                pattern_regex = re.compile(pattern.replace("_", "."))
                if pattern_regex.match(ngram):
                    letter = ngram[pattern.index("_")]
                    letter_counts[letter] = letter_counts.get(letter, 0) + count

        if not letter_counts:
            for letter, _ in self.full_dictionary_common_letter_sorted:
                if letter not in self.guessed_letters:
                    return letter    
            return random.choice('abcdefghijklmnopqrstuvwxyz')

        unguessed_letter_counts = {k: v for k, v in letter_counts.items() if k not in self.guessed_letters}
        return max(unguessed_letter_counts, key=unguessed_letter_counts.get, default="_")

    def build_dictionary(self, dictionary_file_location):
        with open(dictionary_file_location, "r") as text_file:
            full_dictionary = text_file.read().splitlines()
        return full_dictionary

    def display_word(self, word, guessed_letters):
        displayed_word = ""
        for letter in word:
            if letter in guessed_letters:
                displayed_word += letter + " "
            else:
                displayed_word += "_ "
        return displayed_word.strip()

    def start_game(self, word=None, verbose=True):
        self.guessed_letters = []
        self.current_dictionary = self.full_dictionary

        self.total_games += 1

        # Filter the word_list to exclude words in the excluded_words set
        valid_words = [w for w in self.word_list if w not in self.excluded_words]

        if word is None or word not in valid_words:
            print("Choosing a random word from valid words list.")
            if not valid_words:
                print("No valid words available.")
                return False
            word = random.choice(valid_words)

        if word in self.excluded_words:
            print(f"The provided word '{word}' is excluded.")
            return False

        lives = self.max_lives
        guessed_word = ["_"] * len(word)

        print("Word:", self.display_word(word, self.guessed_letters))

        while lives > 0:
            guess_letter = self.guess(" ".join(guessed_word))
            self.guessed_letters.append(guess_letter)

            # Check if the guessed letter is in the word
            if guess_letter in word:
                for i, letter in enumerate(word):
                    if letter == guess_letter:
                        guessed_word[i] = guess_letter
                if "_" not in guessed_word:
                    self.successful_games += 1  # Increment successful games count
                    print("Word:", self.display_word(word, self.guessed_letters))
                    print("Congratulations! You've guessed the word!")
                    return True
            else:
                lives -= 1
                print("Incorrect guess. Lives remaining:", lives)
                print("Guessed letters:", " ".join(self.guessed_letters))

            print("Word:", self.display_word(word, self.guessed_letters))

        print("Out of lives. The word was:", word)
        return False

    def print_success_rate(self):
        if self.total_games > 0:
            success_rate = (self.successful_games / self.total_games) * 100
            print("Success Rate:", success_rate, "%")

In [49]:
# Create an instance of the HangmanGame
hangman_game = HangmanGame()

# Start the game with a word (provide None to generate a random word)
hangman_game.start_game(word=None)
    

Choosing a random word from valid words list.
Word: _ _ _ _ _ _ _ _ _ _
Word: _ _ _ _ _ _ e _ _ _
Generated 2-gram pattern: _e
Generated 2-gram pattern: e_
Incorrect guess. Lives remaining: 5
Guessed letters: e r
Word: _ _ _ _ _ _ e _ _ _
Generated 2-gram pattern: _e
Generated 2-gram pattern: e_
Incorrect guess. Lives remaining: 4
Guessed letters: e r n
Word: _ _ _ _ _ _ e _ _ _
Generated 2-gram pattern: _e
Generated 2-gram pattern: e_
Incorrect guess. Lives remaining: 3
Guessed letters: e r n s
Word: _ _ _ _ _ _ e _ _ _
Generated 2-gram pattern: _e
Generated 2-gram pattern: e_
Incorrect guess. Lives remaining: 2
Guessed letters: e r n s d
Word: _ _ _ _ _ _ e _ _ _
Generated 2-gram pattern: _e
Generated 2-gram pattern: e_
Word: t _ _ _ _ _ e t _ _
Generated 3-gram pattern: _et
Generated 3-gram pattern: et_
Generated 2-gram pattern: t_
Generated 2-gram pattern: _e
Generated 2-gram pattern: t_
Word: t _ i _ _ _ e t _ _
Generated 3-gram pattern: t_i
Generated 3-gram pattern: _et
Generated

False

In [47]:
hangman_game.print_success_rate()

Success Rate: 100.0 %
