In [27]:
import string
import random
from collections import Counter
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split

# List of common words
word_list = open(r'/content/sample_data/Hangman Word List.txt').readlines()

# Split the word list into training and testing sets
train_words, test_words = train_test_split(word_list, test_size=0.2, random_state=42)

# Function to train the n-gram model
def train_ngram_model(word_list, n=3):
    model = {}
    for word in word_list:
        word = '<' + word + '>' # Add start and end tokens
        grams = ngrams(word, n)
        for gram in grams:
            key = gram[:-1]
            next_char = gram[-1]
            if key in model:
                model[key][next_char] += 1
            else:
                model[key] = Counter({next_char: 1})
    return model

# Function to get the word length from the user
def get_word_length():
    while True:
        try:
            word_length = int(input("Enter the length of the word: "))
            if word_length > 0:
                return word_length
            else:
                print("Word length must be a positive integer.")
        except ValueError:
            print("Invalid input. Please enter a positive integer.")

# Function to get the user's feedback on the machine's guess
def get_user_feedback(guess, word_length):
    feedback = ['_'] * word_length
    while True:
        user_input = input(f"Is '{guess}' in the word? Enter positions (e.g., 1 3) or 'n' if not: ").lower()
        if user_input == 'n':
            return feedback
        try:
            positions = [int(pos) - 1 for pos in user_input.split()]
            if all(0 <= pos < word_length for pos in positions):
                for pos in positions:
                    feedback[pos] = guess
                return feedback
            else:
                print("Invalid positions. Please try again.")
        except ValueError:
            print("Invalid input. Please try again.")

# Function to generate the machine's guess using the n-gram model, word similarity, and most used letters
def generate_guess(ngram_model, word_state, used_letters, same_length_words, most_used_letters):
    # Prioritize guessing vowels in the initial guesses
    vowels = set('aeiou')
    if len(used_letters) < 3:
        for vowel in vowels:
            if vowel not in used_letters:
                return vowel

    # Use the n-gram model
    context = '<' + ''.join(word_state)
    if len(context) >= 3:
        context = context[-3:]
        if context in ngram_model:
            next_char_counter = ngram_model[context]
            for char, _ in next_char_counter.most_common():
                if char not in used_letters:
                    return char

    # Use word similarity with same-length words
    vectorizer = CountVectorizer(analyzer='char', ngram_range=(1, 3))
    word_vectors = vectorizer.fit_transform(same_length_words)
    query_vector = vectorizer.transform([''.join(word_state)])
    similarities = cosine_similarity(query_vector, word_vectors)[0]
    sorted_indices = similarities.argsort()[::-1]

    for idx in sorted_indices:
        word = same_length_words[idx]
        for char in word:
            if char not in used_letters and char not in word_state:
                return char

    # Prioritize guessing the most used letters
    for char in most_used_letters:
        if char not in used_letters and char not in word_state:
            return char

    # If all else fails, choose a random unused letter
    return random.choice([char for char in string.ascii_lowercase if char not in used_letters])

# Function to play the game
def play_game(word_list, ngram_model, most_used_letters):
    word = random.choice(word_list)
    word_length = len(word)
    same_length_words = [w for w in word_list if len(w) == word_length]
    used_letters = set()
    remaining_guesses = 10
    word_state = ['_'] * word_length

    #print(f"The word has {word_length} letters: {''.join(word_state)}")

    while remaining_guesses > 0:
        guess = generate_guess(ngram_model, word_state, used_letters, same_length_words, most_used_letters)
        used_letters.add(guess)
        feedback = ['_'] * word_length
        for i, char in enumerate(word):
            if char == guess:
                feedback[i] = char

        if all(char == '_' for char in feedback):
            remaining_guesses -= 1
            #print(f"Oops! The letter '{guess}' is not in the word. Remaining guesses: {remaining_guesses}")
        else:
            for i, char in enumerate(feedback):
                if char != '_':
                    word_state[i] = char
            #print(f"Good guess! The word is now: {''.join(word_state)}")

        if '_' not in word_state:
            print(f"Congratulations! The machine guessed the word: {''.join(word_state)}")
            return True

    print(f"Sorry, the machine ran out of guesses. The word was: {word}")
    return False

# Train the n-gram model on the training set
ngram_model = train_ngram_model(train_words)

# Get the most used letters from the training set
letter_counts = Counter(''.join(train_words))
most_used_letters = [char for char, count in letter_counts.most_common()]

# Play the game and evaluate the accuracy
num_correct = 0
num_games = 100

for _ in range(num_games):
    if play_game(test_words, ngram_model, most_used_letters):
        num_correct += 1

accuracy = num_correct / num_games
print(f"Accuracy: {accuracy:.2f}")

Sorry, the machine ran out of guesses. The word was: vr

Sorry, the machine ran out of guesses. The word was: unanticipating

Congratulations! The machine guessed the word: japes

Congratulations! The machine guessed the word: retroacts

Congratulations! The machine guessed the word: silic

Congratulations! The machine guessed the word: anthrarufin

Sorry, the machine ran out of guesses. The word was: grubbing

Congratulations! The machine guessed the word: gramercies

Congratulations! The machine guessed the word: rippier

Congratulations! The machine guessed the word: thrawart

Congratulations! The machine guessed the word: homologs

Congratulations! The machine guessed the word: scrawls

Sorry, the machine ran out of guesses. The word was: cardshark

Congratulations! The machine guessed the word: biddies

Sorry, the machine ran out of guesses. The word was: rembert

Congratulations! The machine guessed the word: hydramnion

Congratulations! The machine guessed the word: transforming

In [29]:
import string
import random
from collections import Counter
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split

# List of common words
word_list = open(r'/content/sample_data/Hangman Word List.txt').readlines()

# Split the word list into training and testing sets
train_words, test_words = train_test_split(word_list, test_size=0.2, random_state=42)

# Function to train the n-gram model
def train_ngram_model(word_list, n=3):
    model = {}
    for word in word_list:
        word = '<' + word + '>' # Add start and end tokens
        grams = ngrams(word, n)
        for gram in grams:
            key = gram[:-1]
            next_char = gram[-1]
            if key in model:
                model[key][next_char] += 1
            else:
                model[key] = Counter({next_char: 1})
    return model

# Function to get the word length from the user
def get_word_length():
    while True:
        try:
            word_length = int(input("Enter the length of the word: "))
            if word_length > 0:
                return word_length
            else:
                print("Word length must be a positive integer.")
        except ValueError:
            print("Invalid input. Please enter a positive integer.")

# Function to get the user's feedback on the machine's guess
def get_user_feedback(guess, word_length):
    feedback = ['_'] * word_length
    while True:
        user_input = input(f"Is '{guess}' in the word? Enter positions (e.g., 1 3) or 'n' if not: ").lower()
        if user_input == 'n':
            return feedback
        try:
            positions = [int(pos) - 1 for pos in user_input.split()]
            if all(0 <= pos < word_length for pos in positions):
                for pos in positions:
                    feedback[pos] = guess
                return feedback
            else:
                print("Invalid positions. Please try again.")
        except ValueError:
            print("Invalid input. Please try again.")

# Function to generate the machine's guess using the n-gram model, word similarity, and most used letters
def generate_guess(ngram_model, word_state, used_letters, same_length_words, most_used_letters):
    # Prioritize guessing vowels in the initial guesses
    vowels = set('aeiou')
    if len(used_letters) < 3:
        for vowel in vowels:
            if vowel not in used_letters:
                return vowel

    # Use the n-gram model
    context = '<' + ''.join(word_state)
    if len(context) >= 3:
        context = context[-3:]
        if context in ngram_model:
            next_char_counter = ngram_model[context]
            for char, _ in next_char_counter.most_common():
                if char not in used_letters:
                    return char

    # Use word similarity with same-length words
    vectorizer = CountVectorizer(analyzer='char', ngram_range=(1, 3))
    word_vectors = vectorizer.fit_transform(same_length_words)
    query_vector = vectorizer.transform([''.join(word_state)])
    similarities = cosine_similarity(query_vector, word_vectors)[0]
    sorted_indices = similarities.argsort()[::-1]

    for idx in sorted_indices:
        word = same_length_words[idx]
        for char in word:
            if char not in used_letters and char not in word_state:
                return char

    # Prioritize guessing the most used letters
    for char in most_used_letters:
        if char not in used_letters and char not in word_state:
            return char

    # If all else fails, choose a random unused letter
    return random.choice([char for char in string.ascii_lowercase if char not in used_letters])

# Function to play the game
def play_game(word_list, ngram_model, most_used_letters):
    word = random.choice(word_list)
    word_length = len(word)
    same_length_words = [w for w in word_list if len(w) == word_length]
    used_letters = set()
    remaining_guesses = 10
    word_state = ['_'] * word_length

    #print(f"The word has {word_length} letters: {''.join(word_state)}")

    while remaining_guesses > 0:
        guess = generate_guess(ngram_model, word_state, used_letters, same_length_words, most_used_letters)
        used_letters.add(guess)
        feedback = ['_'] * word_length
        for i, char in enumerate(word):
            if char == guess:
                feedback[i] = char

        if all(char == '_' for char in feedback):
            remaining_guesses -= 1
            #print(f"Oops! The letter '{guess}' is not in the word. Remaining guesses: {remaining_guesses}")
        else:
            for i, char in enumerate(feedback):
                if char != '_':
                    word_state[i] = char
            #print(f"Good guess! The word is now: {''.join(word_state)}")

        if '_' not in word_state:
            print(f"Congratulations! The machine guessed the word: {''.join(word_state)}")
            return True

    print(f"Sorry, the machine ran out of guesses. The word was: {word}")
    return False

# Train the n-gram model on the training set
ngram_model = train_ngram_model(train_words)

# Get the most used letters from the training set
letter_counts = Counter(''.join(train_words))
most_used_letters = [char for char, count in letter_counts.most_common()]

# Play the game and evaluate the accuracy
num_correct = 0
num_games = 10

for _ in range(num_games):
    if play_game(test_words, ngram_model, most_used_letters):
        num_correct += 1

accuracy = num_correct / num_games
print(f"Accuracy: {accuracy:.2f}")

Congratulations! The machine guessed the word: jay

Congratulations! The machine guessed the word: shies

Congratulations! The machine guessed the word: scrapiness

Congratulations! The machine guessed the word: trisha

Congratulations! The machine guessed the word: fletcherized

Congratulations! The machine guessed the word: misthrive

Congratulations! The machine guessed the word: churners

Congratulations! The machine guessed the word: apina

Congratulations! The machine guessed the word: blackbine

Congratulations! The machine guessed the word: gulancha

Accuracy: 1.00
