In [None]:
import json
import requests
import random
import string
import secrets
import time
import re
import collections

from collections import defaultdict

try:
    from urllib.parse import parse_qs, urlencode, urlparse
except ImportError:
    from urlparse import parse_qs, urlparse
    from urllib import urlencode

from requests.packages.urllib3.exceptions import InsecureRequestWarning

requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

In [None]:
import numpy as np

def softmax_weights(max_n, temperature=4):
    n_values = np.arange(1, max_n + 1)
    exp_values = np.exp(n_values / temperature)  # Scale down the exponent further
    softmax_weights = exp_values / np.sum(exp_values)  # Normalize

    return dict(zip(n_values, softmax_weights))


def balanced_sigmoid_weights(ratio, ngram_range=(2, 5), mid_ratio=0.55, scale=1.5):
    n_values = np.arange(ngram_range[0], ngram_range[1] + 1)
    mean_n = np.mean(n_values)  # Center around the middle n-gram (3.5 for range 2-5)

    # Sigmoid-like weight scaling
    weights = 1 / (1 + np.exp(-scale * (ratio - mid_ratio) * (n_values - mean_n)))

    # Normalize the weights to ensure they sum to 1
    normalized_weights = weights / np.sum(weights)

    return dict(zip(n_values, normalized_weights))

# API Usage Examples

In [None]:
class HangmanAPI(object):
    def __init__(self, access_token=None, session=None, timeout=None):
        self.hangman_url = self.determine_hangman_url()
        self.access_token = access_token
        self.session = session or requests.Session()
        self.timeout = timeout
        self.guessed_letters = []

        full_dictionary_location = "words_250000_train.txt"
        self.full_dictionary = self.build_dictionary(full_dictionary_location)

        self.full_dictionary_common_letter_sorted = collections.Counter("".join(self.full_dictionary)).most_common()

        self.current_dictionary = []
        self.fuzzy_dictionary = []

        self.ngram_models = {n: self.build_ngram(self.full_dictionary, n) for n in range(1, 8)}
        print(self.ngram_models.keys())
        print(self.ngram_models[7])

        self.forward_counts, self.backward_counts = self.compute_conditional_counts(self.full_dictionary)
        self.first_probs, self.last_probs = self.compute_first_last_counts()


    @staticmethod
    def determine_hangman_url():
        links = ['https://trexsim.com', 'https://sg.trexsim.com']

        data = {link: 0 for link in links}

        for link in links:

            requests.get(link)

            for i in range(10):
                s = time.time()
                requests.get(link)
                data[link] = time.time() - s

        link = sorted(data.items(), key=lambda x: x[1])[0][0]
        link += '/trexsim/hangman'
        return link

    def guess(self, word): # word input example: "_ p p _ e "
        ###############################################
        # Replace with your own "guess" function here #
        ###############################################

        # clean the word so that we strip away the space characters
        # replace "_" with "." as "." indicates any character in regular expressions
        print('input word: ', word)
        clean_word = word[::2].replace("_",".")
        print('clean word: ', clean_word)

        # find length of input word
        len_input = len(word)
        # find length of passed word
        len_word = len(clean_word)
        print('len input ', len_input)
        print('len clean ', len_word)

        # grab current dictionary of possible words from self object, initialize new possible words dictionary to empty
        current_dictionary = self.current_dictionary
        new_dictionary = []

        # iterate through all of the words in the old plausible dictionary
        for dict_word in current_dictionary:

            # continue if the word is not of the appropriate length
            if len(dict_word) != len_word:
                continue

            # if dictionary word is a possible match then add it to the current dictionary
            if re.match(clean_word,dict_word):
                #print('dict word: ', dict_word, ' with length ', len(dict_word))
                new_dictionary.append(dict_word)

        # overwrite old possible words dictionary with updated version
        self.current_dictionary = new_dictionary


        # count occurrence of all characters in possible word matches
        full_dict_string = "".join(new_dictionary)

        c = collections.Counter(full_dict_string)
        sorted_letter_count = c.most_common()

        guess_letter = '!'

        # return most frequently occurring letter in all possible words that hasn't been guessed yet
        for letter,instance_count in sorted_letter_count:
            if letter not in self.guessed_letters:
                guess_letter = letter
                break

        # if no word matches in training dictionary, default back to ordering of full dictionary
        if guess_letter == '!':
            sorted_letter_count = self.full_dictionary_common_letter_sorted
            for letter,instance_count in sorted_letter_count:
                if letter not in self.guessed_letters:
                    guess_letter = letter
                    break

        return guess_letter


    # Implementing n-gram model for Hangman
    def build_ngram(self, words, n):
        """Builds an n-gram frequency dictionary from a list of words."""
        ngram_counts = defaultdict(int)
        for word in words:
            if len(word) >= n:
                for i in range(len(word) - n + 1):
                    ngram = word[i:i+n]
                    ngram_counts[ngram] += 1
        return ngram_counts

    def compute_first_last_counts(self):
        first_counts = defaultdict(int)
        last_counts = defaultdict(int)

        for word in self.full_dictionary:
            if len(word) > 0:
                first_counts[word[0]] += 1
                last_counts[word[-1]] += 1

        # Normalize to get probabilities
        total_first = sum(first_counts.values())
        total_last = sum(last_counts.values())

        first_probs = {k: v / total_first for k, v in first_counts.items()}
        last_probs = {k: v / total_last for k, v in last_counts.items()}

        return first_probs, last_probs

    # cond count
    def compute_conditional_counts(self, words, ngram_range=(2, 5)):
        forward_counts = {n: defaultdict(int) for n in range(ngram_range[0], ngram_range[1] + 1)}
        backward_counts = {n: defaultdict(int) for n in range(ngram_range[0], ngram_range[1] + 1)}

        for word in words:
            length = len(word)

            for n in range(ngram_range[0], ngram_range[1] + 1):
                if length >= n:
                    for i in range(length - n + 1):
                        # Forward n-gram
                        ngram = word[i:i + n]
                        forward_counts[n][ngram] += 1

                        # Backward n-gram (reversed)
                        backward_ngram = word[i:i + n][::-1]  # Reverse the n-gram
                        backward_counts[n][backward_ngram] += 1

        return forward_counts, backward_counts

    # positional encoding using cond count
    def apply_positional_encoding(self, word, ngram_range=(2, 5)):

        fir_missing = False
        las_missing = False
        masked_word = word[::2]
        known_positions = [i for i, char in enumerate(masked_word) if char != "_"]
        total_known = len(known_positions)
        total_length = len(masked_word)
        thres = 0.1
        if total_known / total_length < thres:
            return None  # Skip if less than certain % are known

        # Calculate weights based on known ratio (done once per call)
        sigmoid_weights = balanced_sigmoid_weights(ratio = total_known / total_length, ngram_range = ngram_range, mid_ratio = 0.5 * (1+thres))

        forward_char_positional_probs = defaultdict(float)
        backward_char_positional_probs = defaultdict(float)
        mask_char_positional_probs = defaultdict(float)
        first_probs = defaultdict(float)
        last_probs = defaultdict(float)

        # Iterate over missing letters
        for i, char in enumerate(masked_word):
            if char == "_":
                for n in range(ngram_range[0], ngram_range[1] + 1):

                    # Forward Conditional Probability
                    if i - (n - 1) >= 0:  # Enough context on the left
                        context = masked_word[i - (n - 1):i] + "."  # "." for missing letter
                        possible_matches = [k for k in self.forward_counts[n] if re.match(context, k)]

                        # Calculate conditional probabilities
                        total_match_count = sum(self.forward_counts[n][k] for k in possible_matches)
                        for match in possible_matches:
                            forward_char_positional_probs[match[-1]] += (self.forward_counts[n][match] / total_match_count) * sigmoid_weights[n]

                    # Backward Conditional Probability
                    if i + (n - 1) < total_length:  # Enough context on the right
                        context = masked_word[i + 1:i + n][::-1] + "."  # Reverse the context with "."
                        possible_matches = [k for k in self.backward_counts[n] if re.match(context, k)]

                        # Calculate conditional probabilities
                        total_match_count = sum(self.backward_counts[n][k] for k in possible_matches)
                        for match in possible_matches:
                            backward_char_positional_probs[match[0]] += (self.backward_counts[n][match] / total_match_count) * sigmoid_weights[n]

                    # Mask Conditional Probability (Middle Missing Letter)
                    if 0 < i < total_length - 1 and n in [3, 4, 5]:
                        if n == 3:
                            context = masked_word[i - 1:i] + "." + masked_word[i + 1:i + 2]
                            m_idx = 1
                        elif n == 4 and i <= total_length - 3:
                            context = masked_word[i - 1:i] + "." + masked_word[i + 1:i + 3]
                            m_idx = 1
                        elif n == 4 and i >= 2:
                            context = masked_word[i - 2:i] + "." + masked_word[i + 1:i + 2]
                            m_idx = 2
                        elif n == 5 and i >= 2 and i <= total_length - 3:
                            context = masked_word[i - 2:i] + "." + masked_word[i + 1:i + 3]
                            m_idx = 2

                        possible_matches = [k for k in self.forward_counts[n] if re.match(context, k)]
                        total_match_count = sum(self.forward_counts[n][k] for k in possible_matches)

                        for match in possible_matches:
                            mask_char_positional_probs[match[m_idx]] += (self.forward_counts[n][match] / total_match_count) * sigmoid_weights[n]

                # First and Last Letter Probability
                if i == 0:  # Missing first letter
                    fir_missing = True
                    for char, prob in self.first_probs.items():
                        first_probs[char] += prob
                if i == total_length - 1:  # Missing last letter
                    las_missing = True
                    for char, prob in self.last_probs.items():
                        last_probs[char] += prob



        # Combine the forward and backward results
        combined_probs = defaultdict(float)
        for char in forward_char_positional_probs:
            combined_probs[char] += forward_char_positional_probs[char]

        for char in backward_char_positional_probs:
            combined_probs[char] += backward_char_positional_probs[char]

        # Normalize probabilities
        total_prob = sum(combined_probs.values())
        if total_prob > 0:
            for char in combined_probs:
                combined_probs[char] /= total_prob

        mask_total_prob = sum(mask_char_positional_probs.values())
        if mask_total_prob > 0:
            for char in mask_char_positional_probs:
                combined_probs[char] = 0.67 * combined_probs[char] + 0.33 * mask_char_positional_probs[char] / mask_total_prob

        fir_las_probs = defaultdict(float)
        if fir_missing:
            for char, prob in first_probs.items():
                fir_las_probs[char] += prob
        if las_missing:
            for char, prob in last_probs.items():
                fir_las_probs[char] += prob

        total_fir_las_prob = sum(fir_las_probs.values())
        if total_fir_las_prob > 0 and len(fir_las_probs) > 0:
            for char in fir_las_probs:
                combined_probs[char] = 0.8 * combined_probs[char] + 0.2 * fir_las_probs[char] / total_fir_las_prob

        return combined_probs

    # n-gram guess method
    def guess_ngram(self, word):
        # Step 1: Clean the word input and prepare it for matching
        # print('Input word:', word)
        clean_word = word[::2].replace("_", ".")
        print('Clean word:', clean_word)

        # Step 2: Define word lengths
        len_input = len(word)
        len_word = len(clean_word)
        # print('Input length:', len_input)
        # print('Clean word length:', len_word)

        # Step 3: Get the current dictionary of possible words
        current_dictionary = self.current_dictionary
        new_dictionary = []

        # Step 4: Filter possible words based on the clean word
        for dict_word in current_dictionary:
            # Skip words that don't match the length
            if len(dict_word) != len_word:
                continue

            # Check if the word matches the pattern using regex
            if re.match(clean_word, dict_word):
                new_dictionary.append(dict_word)

        print('size of new_dict: ', len(new_dictionary))
        if len(new_dictionary) == 0: # directly call fallback approach
            print("Fallback triggered: No matching words in new_dict======")
            return self.fallback_global_ngram()


        # Update the current dictionary
        self.current_dictionary = new_dictionary

        # Step 5: Build n-gram frequencies
        total_char_frequencies = collections.defaultdict(float)
        print('\n', list(self.ngram_models.keys())[-1], ' settings')

        for n in range(1, list(self.ngram_models.keys())[-1]+1):
            ngram_counts = collections.Counter()
            for word in new_dictionary:
                for i in range(len(word) - n + 1):
                    ngram = word[i:i + n]
                    ngram_counts[ngram] += 1

            # Calculate total characters in all n-grams
            total_chars = sum(count * n for count in ngram_counts.values())

            char_frequencies = collections.Counter()
            for ngram, count in ngram_counts.items():
                for char in ngram:
                    char_frequencies[char] += count

            #print('\n there are # char here ', len(char_frequencies) , ' with total count of chars ', total_chars)
            # Normalize by total characters across all n-grams
            normalized_frequencies = {char: freq / total_chars for char, freq in char_frequencies.items()}
            #print('sum of normalized freq ' , sum(normalized_frequencies.values()) , ' in setting ', n)

            # Update the total_char_frequencies by averaging over n-gram settings
            for char, freq in normalized_frequencies.items():
                total_char_frequencies[char] += freq / (list(self.ngram_models.keys())[-1])  # Average over n-gram settings

            if n == 5:
              print('total_char_freq len' , len(total_char_frequencies))
              print('total_char_freq sum' , round(sum(total_char_frequencies.values()),1) , '\n')

        # Step 6: Select the next letter to guess
        guess_letter = '!'
        sorted_letter_count = sorted(total_char_frequencies.items(), key=lambda x: -x[1])

        for letter, _ in sorted_letter_count:
            if letter not in self.guessed_letters:
                guess_letter = letter
                break

        # Fallback using global n-gram models if no match
        if guess_letter == '!':
            print("Fallback triggered: end of func......")
            global_char_frequencies = collections.defaultdict(float)
            global_ngram_char_frequencies = []

            # Calculate normalized frequencies for each global n-gram
            for n in range(1, list(self.ngram_models.keys())[-1]+1):
                global_ngram_counts = self.ngram_models[n]
                total_global_chars = sum(count * n for count in global_ngram_counts.values())

                char_frequencies = collections.Counter()
                for ngram, count in global_ngram_counts.items():
                    for char in ngram:
                        char_frequencies[char] += count

                # Normalize by total characters for each n-gram setting
                normalized_frequencies = {char: freq / total_global_chars for char, freq in char_frequencies.items()}
                global_ngram_char_frequencies.append(normalized_frequencies)

            # Average the normalized frequencies across n-gram settings
            for normalized_frequencies in global_ngram_char_frequencies:
                for char, freq in normalized_frequencies.items():
                    global_char_frequencies[char] += freq / (list(self.ngram_models.keys())[-1])  # Average over n-gram settings

            # Select the most common letter from the global fallback
            sorted_global_letter_count = sorted(global_char_frequencies.items(), key=lambda x: -x[1])
            for letter, _ in sorted_global_letter_count:
                if letter not in self.guessed_letters:
                    guess_letter = letter
                    break

        return guess_letter


    # n-gram guess method with softmax weight
    def guess_ngram_sftmx(self, word):

        # Step 1: Clean the word input and prepare it for matching
        #print('Input word:', word)
        clean_word = word[::2].replace("_", ".")
        print('\n  Clean word:', clean_word)

        # Step 2: Define word lengths
        len_input = len(word)
        len_word = len(clean_word)
        # print('Input length:', len_input)
        # print('Clean word length:', len_word)

        # Step 3: Get the current dictionary of possible words
        current_dictionary = self.current_dictionary
        new_dictionary = []

        # Step 4: Filter possible words based on the clean word
        for dict_word in current_dictionary:
            # Skip words that don't match the length
            if len(dict_word) != len_word:
                continue

            # Check if the word matches the pattern using regex
            if re.match(clean_word, dict_word):
                new_dictionary.append(dict_word)

        #print('size of matched new_dict: ', len(new_dictionary))
        if (len(new_dictionary) == 0) or ( (len(new_dictionary) < 20) and (len(new_dictionary)==len(self.current_dictionary)) ): # directly call fallback approach
            print("Fallback triggered ======")
            fuzzy_new_dictionary = self.fallback_fuzzy_ngram(clean_word, current_dictionary)
            self.fuzzy_dictionary = fuzzy_new_dictionary
            if len(fuzzy_new_dictionary) == 0:
                #print("global dictionary ::::::")
                return self.fallback_global_ngram()
            else:
                #print("fuzzy matching used ::::::")
                new_dictionary = fuzzy_new_dictionary

        # Update the current dictionary
        self.current_dictionary = new_dictionary

        # Step 5: Build n-gram frequencies
        total_char_frequencies = collections.defaultdict(float)
        #print(list(self.ngram_models.keys())[-1], ' basic settings......')

        for n in range(1, list(self.ngram_models.keys())[-1]+1):

            # at this setting level
            ngram_counts = collections.Counter()
            for word in new_dictionary:
                for i in range(len(word) - n + 1):
                    ngram = word[i:i + n]
                    ngram_counts[ngram] += 1

            # Calculate total characters in all n-grams
            total_chars = sum(count * n for count in ngram_counts.values())

            char_frequencies = collections.Counter()
            for ngram, count in ngram_counts.items():
                for char in ngram:
                    char_frequencies[char] += count

            #print('\n there are # char here ', len(char_frequencies) , ' with total count of chars ', total_chars)
            # Normalize by total characters across all n-grams at this level
            normalized_frequencies = {char: freq / total_chars for char, freq in char_frequencies.items()}
            #print('sum of normalized freq ' , sum(normalized_frequencies.values()) , ' in setting ', n)

            # Update the total_char_frequencies by averaging over n-gram settings
            for char, freq in normalized_frequencies.items():
                total_char_frequencies[char] += freq * softmax_weights(list(self.ngram_models.keys())[-1])[n]  # sftmx Average over n-gram settings

        # Normalize since some n-gram setting doesnot exist for certain words
        total_char_frequencies_sum = sum(total_char_frequencies.values())
        if total_char_frequencies_sum > 0:
            for char in total_char_frequencies:
                total_char_frequencies[char] /= total_char_frequencies_sum

        # Step 6: Select the next letter to guess
        guess_letter = '!'
        sorted_letter_count = sorted(total_char_frequencies.items(), key=lambda x: -x[1])
        for letter, _ in sorted_letter_count:
            if letter not in self.guessed_letters:
                guess_letter = letter
                return guess_letter, total_char_frequencies

        # Fallback using global n-gram models if no match
        if guess_letter == '!':
            print("Fallback triggered: end of guess func ::::::")
            return self.fallback_global_ngram()

    def fallback_fuzzy_ngram(self, clean_word, current_dictionary):
        base_ratio = 0.4  # Initial mismatch ratio
        max_mismatch = int(len(clean_word) * base_ratio)
        fuzzy_matches = []

        # Initial pass with base ratio
        for dict_word in current_dictionary:
            if len(dict_word) == len(clean_word):
                mismatch_count = sum(1 for a, b in zip(dict_word, clean_word) if b != '.' and a != b)
                if mismatch_count <= max_mismatch:
                    fuzzy_matches.append(dict_word)

        # Incrementally search for more matches if not enough
        remaining_words = [w for w in current_dictionary if w not in fuzzy_matches]

        while ( (len(fuzzy_matches) == len(self.fuzzy_dictionary)) or (len(fuzzy_matches) < 20) ) and (max_mismatch <= int(len(clean_word) * 0.5) + 1):
            max_mismatch += 1  # Increase tolerance
            for dict_word in remaining_words:
                mismatch_count = sum(1 for a, b in zip(dict_word, clean_word) if b != '.' and a != b)
                if mismatch_count <= max_mismatch:
                    fuzzy_matches.append(dict_word)

            # Update remaining words to avoid duplicate checks
            remaining_words = [w for w in remaining_words if w not in fuzzy_matches]

        print(f"Fuzzy match size: {len(fuzzy_matches)} with num letters mismatch: {max_mismatch}")
        return fuzzy_matches


    def fallback_global_ngram(self):
        global_char_frequencies = collections.defaultdict(float)
        global_ngram_char_frequencies = []

        # Calculate normalized frequencies for each global n-gram
        for n in range(1, list(self.ngram_models.keys())[-1] + 1):
            global_ngram_counts = self.ngram_models[n]
            total_global_chars = sum(count * n for count in global_ngram_counts.values())

            char_frequencies = collections.Counter()
            for ngram, count in global_ngram_counts.items():
                for char in ngram:
                    char_frequencies[char] += count

            # Normalize by total characters for each n-gram setting
            normalized_frequencies = {char: freq / total_global_chars for char, freq in char_frequencies.items()}
            global_ngram_char_frequencies.append(normalized_frequencies)

        # Average the normalized frequencies across n-gram settings
        for i in range(1, list(self.ngram_models.keys())[-1]+1):
            normalized_frequencies = global_ngram_char_frequencies[i-1]
            for char, freq in normalized_frequencies.items():
                global_char_frequencies[char] += freq * softmax_weights(list(self.ngram_models.keys())[-1])[i]  # sftmx Average over n-gram settings

        # Normalize since some n-gram setting doesnot exist for certain words
        global_char_frequencies_sum = sum(global_char_frequencies.values())
        if global_char_frequencies_sum > 0:
            for char in global_char_frequencies:
                global_char_frequencies[char] /= global_char_frequencies_sum

        guess_letter = '!'
        # Select the most common letter from the global fallback
        sorted_global_letter_count = sorted(global_char_frequencies.items(), key=lambda x: -x[1])
        for letter, _ in sorted_global_letter_count:
            if letter not in self.guessed_letters:
                guess_letter = letter
                break

        return guess_letter, global_char_frequencies


    # n-gram guess method with softmax weight, cond positional encoding
    def guess_with_positional_encoding(self, word):

        guess_letter, total_char_frequencies = self.guess_ngram_sftmx(word)  # Existing n-gram logic
        positional_probs = self.apply_positional_encoding(word)

        #print('\npositional version: total_char_freq len' , len(total_char_frequencies))
        #print('positional version: total_char_freq sum' , round(sum(total_char_frequencies.values()),1))

        if (positional_probs is None) or (len(positional_probs)==0):
            return guess_letter, total_char_frequencies

        # print('positional version: positional_probs len' , len(positional_probs))
        # print('positional version: positional_probs sum' , round(sum(positional_probs.values()),1))

        combined_probs = defaultdict(float)
        weight_positional = 0.8  # (Dynamically) adjust based on % guessed letters
        weight_ngram = 1 - weight_positional

        # Merge probabilities
        for char in total_char_frequencies:
            combined_probs[char] += total_char_frequencies[char] * weight_ngram

        for char in positional_probs:
            combined_probs[char] += positional_probs[char] * weight_positional

        #print('positional version: combined_probs len' , len(combined_probs))
        #print('positional version: combined_probs sum' , round(sum(combined_probs.values()),1))
        # Final guess
        #guess_letter = max(combined_probs, key=combined_probs.get)
        guess_letter = '!'
        sorted_letter_count = sorted(combined_probs.items(), key=lambda x: -x[1])

        for letter, _ in sorted_letter_count:
            if letter not in self.guessed_letters:
                guess_letter = letter
                break

        return guess_letter, combined_probs

    # NN-version guess
    def guess_with_NN(self, word):

        # step0:
        known_positions = [i for i, char in enumerate(word[::2]) if char != "_"]
        total_known = len(known_positions)
        total_length = len(word[::2])

        # Step 1: Clean the word input and prepare it for NN inference
        clean_word = word[::2]
        print('  Clean word for NN:', clean_word)
        batch_inputs = [clean_word]

        # step2: call the NN inference block
        aux_tensor_realtime, rg_tensor = construct_auxiliary_tensor_realtime(batch_inputs, self.guessed_letters)
        aux_tensor_realtime = aux_tensor_realtime.to(device)
        batch_outputs, batch_outputs_reveal_tensor = GameInferModel(batch_inputs, aux_tensor_realtime, device=device)

        # step 2.5: for beginning and ending status, alloc weight towards PE and NN
        if (total_known / total_length < 0.15 or total_known <= 1) or (total_known / total_length > 0.85 or total_length - total_known <= 2):
            guess_letter_from_pe, combined_probs_from_pe = self.guess_with_positional_encoding(word)
            print('from PE version: ', guess_letter_from_pe)
            pe_prob_tensor = torch.zeros(1, 26, dtype=torch.float32, device=device)
            for letter in combined_probs_from_pe.keys():
                pe_prob_tensor[0][ord(letter) - ord('a')] = combined_probs_from_pe[letter]
            assert abs(pe_prob_tensor[0].sum() - 0.0) > 0.01, "none positive probability in pe_prob_tensor."
            pe_prob_tensor[0] = pe_prob_tensor[0] / torch.sum(pe_prob_tensor[0])
            # step3: output highest prob guess
            top_values_pred, top_indices_pred = torch.topk(batch_outputs*0.1 + torch.log(pe_prob_tensor + 1e-32)*0.9 - 1e32 * rg_tensor, k=5, dim=1)
        else:
            # step3: output highest prob guess
            top_values_pred, top_indices_pred = torch.topk(batch_outputs - 1e32 * rg_tensor, k=5, dim=1)

        # Convert indices to letters (a-z)
        letters_pred = [[chr(97 + idx) for idx in row] for row in top_indices_pred.tolist()]
        print('top_NN_letters: ', letters_pred)

        assert type(letters_pred[0][0])==str, "guessed letter is not str, API real-time version"
        return letters_pred[0][0]

    ##########################################################
    # You'll likely not need to modify any of the code below #
    ##########################################################

    def build_dictionary(self, dictionary_file_location):
        text_file = open(dictionary_file_location,"r")
        full_dictionary = text_file.read().splitlines()
        text_file.close()
        return full_dictionary

    def start_game(self, practice=True, verbose=True):
        # reset guessed letters to empty set and current plausible dictionary to the full dictionary
        self.guessed_letters = []
        self.current_dictionary = self.full_dictionary
        self.fuzzy_dictionary = []

        response = self.request("/new_game", {"practice":practice})
        if response.get('status')=="approved":
            game_id = response.get('game_id')
            word = response.get('word')
            tries_remains = response.get('tries_remains')
            if verbose:
                print("Successfully start a new game! Game ID: {0}. # of tries remaining: {1}. Word: {2}.".format(game_id, tries_remains, word))
            while tries_remains>0:
                # get guessed letter from user code
                #guess_letter = self.guess(word)
                #guess_letter = self.guess_ngram(word)
                #guess_letter, _ = self.guess_ngram_sftmx(word)
                #guess_letter, _ = self.guess_with_positional_encoding(word)
                guess_letter = self.guess_with_NN(word)

                # append guessed letter to guessed letters field in hangman object
                self.guessed_letters.append(guess_letter)
                if verbose:
                    print("Guessing letter: {0}".format(guess_letter))

                try:
                    res = self.request("/guess_letter", {"request":"guess_letter", "game_id":game_id, "letter":guess_letter})
                except HangmanAPIError:
                    print('HangmanAPIError exception caught on request.')
                    continue
                except Exception as e:
                    print('Other exception caught on request.')
                    raise e

                if verbose:
                    print("Sever response: {0}".format(res))
                status = res.get('status')
                tries_remains = res.get('tries_remains')
                if status=="success":
                    if verbose:
                        print("Successfully finished game: {0}".format(game_id))
                    return True
                elif status=="failed":
                    reason = res.get('reason', '# of tries exceeded!')
                    if verbose:
                        print("Failed game: {0}. Because of: {1}".format(game_id, reason))
                    return False
                elif status=="ongoing":
                    word = res.get('word')
        else:
            if verbose:
                print("Failed to start a new game")
        return status=="success"

    def my_status(self):
        return self.request("/my_status", {})

    def request(
            self, path, args=None, post_args=None, method=None):
        if args is None:
            args = dict()
        if post_args is not None:
            method = "POST"

        # Add `access_token` to post_args or args if it has not already been
        # included.
        if self.access_token:
            # If post_args exists, we assume that args either does not exists
            # or it does not need `access_token`.
            if post_args and "access_token" not in post_args:
                post_args["access_token"] = self.access_token
            elif "access_token" not in args:
                args["access_token"] = self.access_token

        time.sleep(0.2)

        num_retry, time_sleep = 50, 2
        for it in range(num_retry):
            try:
                response = self.session.request(
                    method or "GET",
                    self.hangman_url + path,
                    timeout=self.timeout,
                    params=args,
                    data=post_args,
                    verify=False
                )
                break
            except requests.HTTPError as e:
                response = json.loads(e.read())
                raise HangmanAPIError(response)
            except requests.exceptions.SSLError as e:
                if it + 1 == num_retry:
                    raise
                time.sleep(time_sleep)

        headers = response.headers
        if 'json' in headers['content-type']:
            result = response.json()
        elif "access_token" in parse_qs(response.text):
            query_str = parse_qs(response.text)
            if "access_token" in query_str:
                result = {"access_token": query_str["access_token"][0]}
                if "expires" in query_str:
                    result["expires"] = query_str["expires"][0]
            else:
                raise HangmanAPIError(response.json())
        else:
            raise HangmanAPIError('Maintype was not text, or querystring')

        if result and isinstance(result, dict) and result.get("error"):
            raise HangmanAPIError(result)
        return result

class HangmanAPIError(Exception):
    def __init__(self, result):
        self.result = result
        self.code = None
        try:
            self.type = result["error_code"]
        except (KeyError, TypeError):
            self.type = ""

        try:
            self.message = result["error_description"]
        except (KeyError, TypeError):
            try:
                self.message = result["error"]["message"]
                self.code = result["error"].get("code")
                if not self.type:
                    self.type = result["error"].get("type", "")
            except (KeyError, TypeError):
                try:
                    self.message = result["error_msg"]
                except (KeyError, TypeError):
                    self.message = result

        Exception.__init__(self, self.message)

In [None]:
api = HangmanAPI(access_token="c6b7765d218c93758fd1d3d9dcad17", timeout=2000)

Output hidden; open in https://colab.research.google.com to view.

## Playing practice games:
You can use the command below to play up to 100,000 practice games.

In [None]:
api.start_game(practice=1,verbose=True)
[total_practice_runs,total_recorded_runs,total_recorded_successes,total_practice_successes] = api.my_status() # Get my game stats: (# of tries, # of wins)
practice_success_rate = total_practice_successes / total_practice_runs
print('run %d practice games out of an allotted 100,000. practice success rate so far = %.3f' % (total_practice_runs, practice_success_rate))

[1;30;43m流式输出内容被截断，只能显示最后 5000 行内容。[0m
dict word:  belemnoid  with length  9
dict word:  belesprit  with length  9
dict word:  belialist  with length  9
dict word:  belibeled  with length  9
dict word:  beliefful  with length  9
dict word:  believers  with length  9
dict word:  believeth  with length  9
dict word:  belington  with length  9
dict word:  belinurus  with length  9
dict word:  belitoeng  with length  9
dict word:  belittled  with length  9
dict word:  belittler  with length  9
dict word:  belittles  with length  9
dict word:  bellbrook  with length  9
dict word:  bellemina  with length  9
dict word:  bellerive  with length  9
dict word:  bellicism  with length  9
dict word:  bellicose  with length  9
dict word:  bellmaker  with length  9
dict word:  bellmouth  with length  9
dict word:  bellnosed  with length  9
dict word:  bellonian  with length  9
dict word:  bellowing  with length  9
dict word:  bellville  with length  9
dict word:  bellworts  with length  9
dict word

benchmark algo: 22 cumulative practice runs SO FAR, 0.136 ratio

In [None]:
for i in range(56):
    print('Playing ', i, ' th game')
    api.start_game(practice=1,verbose=True)
    [total_practice_runs,total_recorded_runs,total_recorded_successes,total_practice_successes] = api.my_status() # Get my game stats: (# of tries, # of wins)
    practice_success_rate = total_practice_successes / total_practice_runs
    print('run %d practice games out of an allotted 100,000. practice success rate so far = %.3f' % (total_practice_runs, practice_success_rate))
    # DO NOT REMOVE as otherwise the server may lock you out for too high frequency of requests
    time.sleep(0.5)

[1;30;43m流式输出内容被截断，只能显示最后 5000 行内容。[0m
Guessing letter: z
Sever response: {'game_id': '879fbead0483', 'status': 'ongoing', 'tries_remains': 4, 'word': 'i r i _ _ _ e d '}
Clean word: iri...ed
size of new_dict:  3

 5  settings
total_char_freq len 7
total_char_freq sum 1.0 

Guessing letter: s
Sever response: {'game_id': '879fbead0483', 'status': 'ongoing', 'tries_remains': 4, 'word': 'i r i s _ _ e d '}
Clean word: iris..ed
size of new_dict:  0
Guessing letter: a
Sever response: {'game_id': '879fbead0483', 'status': 'ongoing', 'tries_remains': 4, 'word': 'i r i s a _ e d '}
Clean word: irisa.ed
size of new_dict:  0
Guessing letter: o
Sever response: {'game_id': '879fbead0483', 'status': 'ongoing', 'tries_remains': 3, 'word': 'i r i s a _ e d '}
Clean word: irisa.ed
size of new_dict:  0
Guessing letter: n
Sever response: {'game_id': '879fbead0483', 'status': 'ongoing', 'tries_remains': 2, 'word': 'i r i s a _ e d '}
Clean word: irisa.ed
size of new_dict:  0
Guessing letter: t
Sever re

pure n-gram algo v1: 200 cumulative practice runs SO FAR, 0.190 ratio

In [None]:
for i in range(5):
    print('Playing ', i, ' th game')
    api.start_game(practice=1,verbose=True)
    [total_practice_runs,total_recorded_runs,total_recorded_successes,total_practice_successes] = api.my_status() # Get my game stats: (# of tries, # of wins)
    practice_success_rate = total_practice_successes / total_practice_runs
    print('run %d practice games out of an allotted 100,000. practice success rate so far = %.3f' % (total_practice_runs, practice_success_rate))
    # DO NOT REMOVE as otherwise the server may lock you out for too high frequency of requests
    time.sleep(0.5)

Playing  0  th game
Successfully start a new game! Game ID: 262ab7ac9b18. # of tries remaining: 6. Word: _ _ _ _ _ _ _ _ _ _ .
Clean word: ..........
size of new_dict:  26953
7  settings
total_char_freq len 26
total_char_freq sum 1.0 

Guessing letter: e
Sever response: {'game_id': '262ab7ac9b18', 'status': 'ongoing', 'tries_remains': 5, 'word': '_ _ _ _ _ _ _ _ _ _ '}
Clean word: ..........
size of new_dict:  26953
7  settings
total_char_freq len 26
total_char_freq sum 1.0 

Guessing letter: i
Sever response: {'game_id': '262ab7ac9b18', 'status': 'ongoing', 'tries_remains': 5, 'word': '_ _ _ _ _ i _ _ _ _ '}
Clean word: .....i....
size of new_dict:  2894
7  settings
total_char_freq len 26
total_char_freq sum 1.0 

Guessing letter: n
Sever response: {'game_id': '262ab7ac9b18', 'status': 'ongoing', 'tries_remains': 5, 'word': '_ _ _ n _ i _ n _ _ '}
Clean word: ...n.i.n..
size of new_dict:  1
7  settings
total_char_freq len 8
total_char_freq sum 1.0 

Guessing letter: t
Sever response: 

pure n-gram algo v2 (up to 7-gram, add fallback logic, softmax weight across prob of settings): 240 cumulative practice runs SO FAR, 0.192 ratio

In [None]:
for i in range(2):
    print('Playing ', i, ' th game')
    api.start_game(practice=1,verbose=True)
    [total_practice_runs,total_recorded_runs,total_recorded_successes,total_practice_successes] = api.my_status() # Get my game stats: (# of tries, # of wins)
    practice_success_rate = total_practice_successes / total_practice_runs
    #if i % 10 == 0:
    print('run %d practice games out of an allotted 100,000. practice success rate so far = %.3f' % (total_practice_runs, practice_success_rate))
    # DO NOT REMOVE as otherwise the server may lock you out for too high frequency of requests
    time.sleep(0.5)

Playing  0  th game
Successfully start a new game! Game ID: f58b1f83b8a3. # of tries remaining: 6. Word: _ _ _ _ _ _ _ _ _ _ _ .

  Clean word: ...........
size of matched new_dict:  22786
Guessing letter: e
Sever response: {'game_id': 'f58b1f83b8a3', 'status': 'ongoing', 'tries_remains': 6, 'word': '_ _ _ _ _ _ _ _ _ e _ '}

  Clean word: .........e.
size of matched new_dict:  4871
Guessing letter: r
Sever response: {'game_id': 'f58b1f83b8a3', 'status': 'ongoing', 'tries_remains': 5, 'word': '_ _ _ _ _ _ _ _ _ e _ '}

  Clean word: .........e.
size of matched new_dict:  4871
Guessing letter: i
Sever response: {'game_id': 'f58b1f83b8a3', 'status': 'ongoing', 'tries_remains': 5, 'word': '_ _ _ _ _ _ i _ _ e _ '}

  Clean word: ......i..e.
size of matched new_dict:  634
Guessing letter: s
Sever response: {'game_id': 'f58b1f83b8a3', 'status': 'ongoing', 'tries_remains': 5, 'word': 's _ _ _ _ _ i _ _ e _ '}

  Clean word: s.....i..e.
size of matched new_dict:  103
Guessing letter: n
Sever 

n-gram algo with cond positional encoding v1 (with short-to-medium length conditional window, the shorter the cond window, the lower the activation threshold of % known letters for positional encoding; refined fallback logic to increase search space instead of using globe): 1050 cumulative practice runs SO FAR, 0.306 ratio

In [None]:
for i in range(31):
    print('Playing ', i, ' th game')
    api.start_game(practice=1,verbose=True)
    [total_practice_runs,total_recorded_runs,total_recorded_successes,total_practice_successes] = api.my_status() # Get my game stats: (# of tries, # of wins)
    practice_success_rate = total_practice_successes / total_practice_runs
    #if i % 100 == 0:
    print('run %d practice games out of an allotted 100,000. practice success rate so far = %.3f' % (total_practice_runs, practice_success_rate))
    # DO NOT REMOVE as otherwise the server may lock you out for too high frequency of requests
    time.sleep(0.5)

Playing  0  th game
Successfully start a new game! Game ID: 908d5a633b5d. # of tries remaining: 6. Word: _ _ _ _ _ _ _ _ .

  Clean word: ........
size of matched new_dict:  30452
Guessing letter: e
Sever response: {'game_id': '908d5a633b5d', 'status': 'ongoing', 'tries_remains': 6, 'word': 'e _ _ _ _ _ _ e '}

  Clean word: e......e
size of matched new_dict:  222
Guessing letter: r
Sever response: {'game_id': '908d5a633b5d', 'status': 'ongoing', 'tries_remains': 5, 'word': 'e _ _ _ _ _ _ e '}

  Clean word: e......e
size of matched new_dict:  222
Guessing letter: s
Sever response: {'game_id': '908d5a633b5d', 'status': 'ongoing', 'tries_remains': 4, 'word': 'e _ _ _ _ _ _ e '}

  Clean word: e......e
size of matched new_dict:  222
Guessing letter: n
Sever response: {'game_id': '908d5a633b5d', 'status': 'ongoing', 'tries_remains': 3, 'word': 'e _ _ _ _ _ _ e '}

  Clean word: e......e
size of matched new_dict:  222
Guessing letter: d
Sever response: {'game_id': '908d5a633b5d', 'status':

n-gram algo with cond positional encoding v2 (added BOW and EOW prob and middle letter masked prob): 3050 cumulative practice runs SO FAR, 0.384 ratio

# train NN, (
# with vectorized feature expression / fusion, train on incorrect letters numerically;
# optional using mask in loss_fn and vowel / consonant relationship;
# since no hyper-param tuning, val is literally the OOS, the test set is never used in training process, but it is used to calculate some kind of rank IC across 26 letters when we put the NN in the game.
# )

## framework

In [None]:
import torch
from torch.utils.data import Dataset, random_split, DataLoader, SubsetRandomSampler, Subset
import torch.nn as nn
import torch.optim as optim

import numpy as np
from collections import Counter
import random
import time

In [None]:
# Reload the necessary packages and dataset to process the training data
from itertools import combinations
from collections import defaultdict

# Function to generate intermediate words by masking unique letters
def generate_sampled_intermediate_words(word, sample_rate=0.04):
    unique_letters = list(set(word))  # Get unique letters in the word
    intermediate_words = []

    # Generate intermediate words by masking 1 to len(unique_letters)
    for x in range(1, len(unique_letters) + 1):
        combos = list(combinations(unique_letters, x))
        sampled_combos = random.sample(combos, max(1, int(len(combos) * sample_rate)))  # Ensure at least 1 sample

        for combo in sampled_combos:
            masked_word = ''.join(letter if letter not in combo else '_' for letter in word)
            intermediate_words.append(masked_word)

    return intermediate_words


In [None]:
# Generate training data (intermediate_word, original_word) pairs
training_data = []

for i in range(len(api.full_dictionary)):
    word = api.full_dictionary[i]
    intermediate_words = generate_sampled_intermediate_words(word)
    training_data.extend([(masked, word) for masked in intermediate_words])
    if i % 5e4 == 0:
        print(f"Processed {i/len(api.full_dictionary)*100} % words")
        print(len(training_data))

Processed 0.0 % words
1
Processed 21.997360316761988 % words
839741
Processed 43.994720633523976 % words
1711710
Processed 65.99208095028597 % words
2716547
Processed 87.98944126704795 % words
3791046


In [None]:
# Function to generate labels for each masked word sample
def generate_labels(training_data):
    labels = []

    for masked_word, original_word in training_data:
        label = np.zeros(26)  # 26 letters in the alphabet (a-z)

        # Count the occurrences of each letter in the original word
        original_counts = Counter(original_word)

        # Count letters already revealed in the masked word
        revealed_counts = Counter(masked_word.replace('_', ''))

        # Calculate frequencies for missing letters
        missing_positions = masked_word.count('_')
        remaining_letters = [char for char in original_counts if char not in revealed_counts]
        remaining_counts = Counter({char: original_counts[char] for char in remaining_letters})

        # Normalize the counts to get probabilities
        total_missing_letters = sum(remaining_counts.values())
        assert total_missing_letters == missing_positions, 'assert error due to unmatched count of missing letters'
        assert sum(revealed_counts.values()) == len(original_word) - total_missing_letters, 'revealed count error'

        if total_missing_letters > 0:
            normalized_probs = {char: count / total_missing_letters for char, count in remaining_counts.items()}
        else:
            normalized_probs = {}

        # Populate the label tensor
        for idx, letter in enumerate('abcdefghijklmnopqrstuvwxyz'):
            if letter in revealed_counts:
                label[idx] = 1.0  # Already revealed letters
            elif letter in normalized_probs:
                label[idx] = normalized_probs[letter]  # Probabilities of missing correct letters
            else:
                label[idx] = 0.0  # Incorrect letters not in the original word

        # Convert to torch tensor
        labels.append(torch.tensor(label, dtype=torch.float32))

    return labels

In [None]:
# Custom Dataset to handle masked word (string) and label (tensor)
class MaskedWordDataset(Dataset):
    def __init__(self, masked_words, labels):
        self.masked_words = masked_words  # Store masked words directly
        self.labels = labels  # Store tensor labels

    def __len__(self):
        return len(self.masked_words)

    def __getitem__(self, idx):
        return self.masked_words[idx], self.labels[idx]

# Example masked words and labels (from previous function)

In [None]:
masked_words = [item[0] for item in training_data]  # Inputs (masked words)
generated_labels = generate_labels(training_data)    # Corresponding labels

In [None]:
# Create Dataset
dataset = MaskedWordDataset(masked_words, generated_labels)

In [None]:
# Load the dataset from file
loaded_dataset_raw = torch.load('masked_word_dataset_v0108.pth')
print(f"Loaded dataset size: {len(loaded_dataset_raw)}")


  loaded_dataset_raw = torch.load('masked_word_dataset_v0108.pth')


Loaded dataset size: 4366026


In [None]:
# since too slow to train even one epoch with GPU, reduce loaded_dataset
loaded_dataset = Subset( loaded_dataset_raw , list(range(7, len(loaded_dataset_raw), 100)) )

In [None]:
# Define split sizes (80%, 10%, 10%)
train_size = int(0.8 * len(loaded_dataset))
val_size = int(0.1 * len(loaded_dataset))
test_size = len(loaded_dataset) - train_size - val_size

# Perform random split
train_dataset, val_dataset, test_dataset = random_split(loaded_dataset, [train_size, val_size, test_size])

In [None]:
# Custom collate function to handle string inputs
def collate_fn(batch):
    masked_words, labels = zip(*batch)
    return list(masked_words), torch.stack(labels)

In [None]:
# Create DataLoaders for each split
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, collate_fn=collate_fn)

# Display a sample batch for sanity check
for inputs, labels in train_loader:
    print("Sample Batch (Masked Words):", inputs)
    print("Sample Batch (Labels):", labels)
    break

Sample Batch (Masked Words): ['__ck_o__ow_', 'un_omp________', '_nd___ut_blene__', 'aci_ra__', 'ma__s_r_ant', '__________', 'occipi_o__on__li_', '_y_a_____piz_', 'r___r__rm_nc_', 're_u_________t___', '__________', 'thre_d__r_', 'u_____e_bility', '_oxo_opho_o__', '__t_r___ly_acc_arid_', '__mbichrom_t_']
Sample Batch (Labels): tensor([[0.1667, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.1667, 1.0000, 0.0000, 0.0000, 0.3333, 1.0000, 0.0000, 0.0000, 0.0000,
         0.1667, 0.1667, 0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.1111, 0.1111, 0.0000, 0.2222, 0.0000, 0.0000, 0.0000, 0.1111,
         0.0000, 0.0000, 0.1111, 1.0000, 1.0000, 1.0000, 1.0000, 0.0000, 0.1111,
         0.2222, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.1429, 1.0000, 0.0000, 1.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.2857,
         0.0000, 0.0000, 1.0000, 0.0000, 1.0000, 0.0000, 0.1429, 0.0000, 0.0000,
         0.4286, 1.0000, 1.0000, 0.0000

In [None]:
# Function to construct auxiliary tensor
def construct_auxiliary_tensor(batch_inputs, batch_labels, sampling_rate):
    batch_size, num_classes = len(batch_inputs), 26
    aux_tensor = torch.zeros(batch_size, num_classes, 4)
    vowels = {'a', 'e', 'i', 'o', 'u'}
    vowel_resonant_feature = torch.tensor([1 if chr(ord('a') + i) in vowels else 0 for i in range(num_classes)])
    # Broadcast to all samples and assign to Feature 4
    aux_tensor[:, :, 3] = vowel_resonant_feature  # Broadcasting over the batch dimension

    aux_tensor[:, :, 1:3] = -1  # Forward/backward index for masked letters

    for i, (masked_word, label) in enumerate(zip(batch_inputs, batch_labels)):
        # Step 1: Fill with 1 for revealed letters
        revealed_letters = set(masked_word) - {'_'}
        for letter in revealed_letters:
            aux_tensor[i][ord(letter) - ord('a')][0] = 1

        # Step 2: Sample negative labels (-1)
        # Identify positions where the label is 0 (incorrect letters)
        negative_indices = torch.nonzero(label == 0).view(-1).tolist()
        # Randomly sample 20% of these indices (at least 1 if there are any)
        sampled_negatives = torch.tensor(random.sample(negative_indices, max(1, int(len(negative_indices) * sampling_rate))))
        # Mark these positions as -1 in the auxiliary tensor
        aux_tensor[i][sampled_negatives][0] = -1

        # positional index
        for pos, letter in enumerate(masked_word):
            if letter != '_':  # If the letter is revealed
                forward_index = pos
                backward_index = len(masked_word) - pos - 1
                aux_tensor[i,ord(letter) - ord('a'),1] = forward_index  # Forward index
                aux_tensor[i,ord(letter) - ord('a'),2] = backward_index  # Backward index

    return aux_tensor # shape should be, batchsize by 26 by 4

In [None]:
# Iterate through one batch and construct auxiliary tensor
for batch_inputs, batch_labels in train_loader:
    aux_tensor = construct_auxiliary_tensor(batch_inputs, batch_labels, 0.2)
    print("Batch Inputs:", type(batch_inputs), batch_inputs[9])
    print("Batch Labels:", batch_labels.shape, batch_labels[9])
    print("Auxiliary Tensor:", aux_tensor.shape, aux_tensor[9])
    break

Batch Inputs: <class 'list'> gly__gel_ti_
Batch Labels: torch.Size([16, 26]) tensor([0.2500, 0.0000, 0.2500, 0.0000, 1.0000, 0.0000, 1.0000, 0.0000, 1.0000,
        0.0000, 0.0000, 1.0000, 0.0000, 0.2500, 0.2500, 0.0000, 0.0000, 0.0000,
        0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0000])
Auxiliary Tensor: torch.Size([16, 26, 4]) tensor([[ 0., -1., -1.,  1.],
        [ 0., -1., -1.,  0.],
        [ 0., -1., -1.,  0.],
        [ 0., -1., -1.,  0.],
        [ 1.,  6.,  5.,  1.],
        [ 0., -1., -1.,  0.],
        [ 1.,  5.,  6.,  0.],
        [ 0., -1., -1.,  0.],
        [ 1., 10.,  1.,  1.],
        [ 0., -1., -1.,  0.],
        [ 0., -1., -1.,  0.],
        [ 1.,  7.,  4.,  0.],
        [ 0., -1., -1.,  0.],
        [ 0., -1., -1.,  0.],
        [ 0., -1., -1.,  1.],
        [ 0., -1., -1.,  0.],
        [ 0., -1., -1.,  0.],
        [ 0., -1., -1.,  0.],
        [ 0., -1., -1.,  0.],
        [ 1.,  9.,  2.,  0.],
        [ 0., -1., -1.,  1.],
        [ 0., -1.,

In [None]:
import wandb

In [None]:
!wandb login

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [None]:
# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [None]:
checkpoint = torch.load('model_checkpoint_bc_5325.pth',map_location=device)

  checkpoint = torch.load('model_checkpoint_bc_5325.pth',map_location=device)


In [None]:
# Define the training loop
def train_model(train_loader, val_loader, model, optimizer, init_lr, loss_fn, save_model_path="model_checkpoint", num_epochs=1, checkpoint_path=None, noArchitectChange=True):

    wandb.init(
        # set the wandb project where this run will be logged
        project="HangmanWinter2024",
        # track hyperparameters and run metadata
        config={
            "epochs": num_epochs,
            "batch_size": train_loader.batch_size,
            "optimizer": optimizer.__class__.__name__,
            "loss_fn": loss_fn.__class__.__name__,
            "learning_rate": optimizer.param_groups[0]["lr"],
              },
        id="p85rkbzr",  # Use the run ID directly
        resume="must",
        settings=wandb.Settings(init_timeout=300),  # Increase timeout
        )

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)  # Move model to GPU

    total_batches = len(train_loader)  # Get the total number of batches
    model.train()

    train_loss_series = pd.Series(dtype=float)
    val_loss_series = pd.Series(dtype=float)

    # Global batch index across epochs
    # global_batch_index = 0

    # plz change this if you load models from checkpoint!!!!!!
    global_batch_index = 5326  # From checkpoint information
    start_epoch = 2 # 0-indexed

    # Restore model and optimizer from checkpoint
    if checkpoint_path and noArchitectChange:
        print(f"Loading checkpoint from {checkpoint_path}")
        checkpoint = torch.load(checkpoint_path,map_location=device)

        #model.load_state_dict(checkpoint)  # Load weights
        # Load model state
        model.load_state_dict(checkpoint["model_state_dict"])
        # Load optimizer state
        optimizer.load_state_dict(checkpoint["optimizer_state_dict"])

        print(f"Resuming training from global_batch_index {global_batch_index} and start_epoch {start_epoch + 1}")

        # Reinitialize optimizer
        current_lr = init_lr * (0.99 ** (global_batch_index/total_batches ))
        # Update learning rate for all parameter groups
        for param_group in optimizer.param_groups:
            param_group["lr"] = current_lr
            print(f"Model state loaded. Optimizer updated with learning rate: {current_lr:.8f}")

        # Update W&B configuration
        wandb.config.update({
            "resumed": True,
            "global_batch_index": global_batch_index,
        }, allow_val_change=True)

    else:
        print("Starting from scratch w/o pre-loaded optimizier state.")

    # Accumulate training loss for averaging
    cumulative_train_loss = 0.0
    train_batches_since_log = 0

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=1, factor=0.11, threshold=0.005)
    #for epoch in range(num_epochs):
    for epoch in range(start_epoch, num_epochs):
        print(f"\nStarting epoch {epoch + 1}/{num_epochs}")
        # Initialize tqdm progress bar
        # progress_bar = tqdm(total=total_batches, desc=f"Epoch {epoch + 1}/{num_epochs}")

        for batch_index, (batch_inputs, batch_labels) in enumerate(train_loader):

            # Skip processed batches in the first epoch
            if epoch == start_epoch and batch_index < global_batch_index - start_epoch*total_batches:
                continue

            print(f"Batch {batch_index}: head few words: {batch_inputs[:3]}")

            # Calculate dynamic sampling rate
            sampling_rate = 0.2 + (batch_index/total_batches) * (0.5-0.2)
            batch_labels = batch_labels.to(device)
            # Construct auxiliary tensor with the dynamic sampling rate
            aux_tensor = construct_auxiliary_tensor(batch_inputs, batch_labels, sampling_rate)
            aux_tensor = aux_tensor.to(device)

            if noArchitectChange == False:
                # freeze certain layers, optionally =================================================================================
                for name, param in model.named_parameters():
                    if ("projection" not in name) and ("fc1_5" not in name) and ("fc2" not in name):
                        param.requires_grad = False
                # remeber to explicitly unfreeze it externally and remove these when the new layers fine-tuned/warmed-up ============
            else:
                # make sure all the layers is trainable
                for name, param in model.named_parameters():
                    param.requires_grad = True
                #=====================================================================================================================


            # Forward pass
            # Concatenate aux_tensor with the masked words for input to the model
            outputs, outputs_reveal_tensor = model(batch_inputs, aux_tensor, device=device)  # Assuming the model takes both inputs

            loss_mask = (1.0 - outputs_reveal_tensor)  # Mask to exclude revealed letters
            batch_labels_masked = batch_labels * loss_mask
            masked_outputs = outputs * loss_mask
            # penalize neg entropy (max entropy)
            entropy_loss = -(masked_outputs.exp() * masked_outputs).sum(dim=-1).mean()

            loss = loss_fn(masked_outputs, batch_labels_masked / batch_labels_masked.sum(dim=-1, keepdim=True) ) - 0.01 * entropy_loss # Compute loss
            #loss = loss_fn(outputs, batch_labels / batch_labels.sum(dim=-1, keepdim=True) )  # Compute loss

            # Accumulate the loss
            cumulative_train_loss += loss.item()
            train_batches_since_log += 1

            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Log metrics to W&B
            wandb.log({
                "epoch": epoch + 1,
                "batch": batch_index,
                "global_batch_index": global_batch_index,
                "train_loss": cumulative_train_loss / train_batches_since_log,
                "letter_sample_rate": sampling_rate,
            })

            # save ckpt first to save inference time
            if global_batch_index % 0.25e2 == 0 and global_batch_index > 0:

                checkpoint_path = f"{save_model_path}_bc_{global_batch_index}.pth"
                #torch.save(model.state_dict(), checkpoint_path)
                torch.save({
                    "model_state_dict": model.state_dict(),
                    "optimizer_state_dict": optimizer.state_dict(),
                    "global_batch_index": global_batch_index,
                    "epoch": epoch + 1,
                }, checkpoint_path)

                # reduce lr if on plateau
                scheduler.step(cumulative_train_loss/train_batches_since_log)

                print(f"cur learng rate: {optimizer.param_groups[0]['lr']:.8f}")
                wandb.log({'learning_rate': optimizer.param_groups[0]['lr']})
                print(f"Model checkpoint saved at: {checkpoint_path}\n")

                # Early stopping logic
                if optimizer.param_groups[0]['lr'] <= 1e-8:
                    print(
                        f"Early stopping triggered at Global Batch {global_batch_index}. "
                        f"Learning rate ({optimizer.param_groups[0]['lr']:.8f}) is too low. "
                    )
                    wandb.log({"early_stopping": True, "stopped_at_batch": global_batch_index})

                    checkpoint_path = f"{save_model_path}_bc_{global_batch_index-1}_ErlStp.pth"
                    #torch.save(model.state_dict(), checkpoint_path)
                    torch.save({
                        "model_state_dict": model.state_dict(),
                        "optimizer_state_dict": optimizer.state_dict(),
                        "global_batch_index": global_batch_index,
                        "epoch": epoch + 1,
                    }, checkpoint_path)
                    print(f"Model checkpoint saved at: {checkpoint_path}\n")

                    return train_loss_series, val_loss_series

            # Print progress every XXX batches
            if global_batch_index % 2e2 == 0 and global_batch_index > 0:

                # Evaluate validation loss
                val_loss = evaluate_validation_loss(val_loader, model, loss_fn, sampling_rate, device)

                # Calculate average training loss since last log
                avg_train_loss = cumulative_train_loss / train_batches_since_log
                train_batches_since_log = 0  # Reset counter
                cumulative_train_loss = 0.0  # Reset cumulative loss

                # Log validation loss to W&B
                wandb.log({
                    "global_batch_index": global_batch_index,
                    "eval_train_loss": avg_train_loss,
                    "validation_loss": val_loss,
                })
                print(
                    f"Global Batch {global_batch_index}/{total_batches * num_epochs}: "
                    f"Sampling Rate = {sampling_rate:.2f} "
                    f"Training Loss = {avg_train_loss:.3f}, Validation Loss = {val_loss:.3f}\n"
                )

                # Store the losses in the pandas Series
                train_loss_series.at[global_batch_index] = avg_train_loss
                val_loss_series.at[global_batch_index] = val_loss

                # Early stopping logic
                if len(val_loss_series) >= 2:
                    last_few_losses = val_loss_series.iloc[-2:-1]  # Get the last validation losses
                    if val_loss > last_few_losses.max():
                        print(
                            f"Early stopping triggered at Global Batch {global_batch_index}. "
                            f"Latest validation loss ({val_loss:.3f}) is higher than the last few. "
                        )
                        wandb.log({"early_stopping": True, "stopped_at_batch": global_batch_index})

                        checkpoint_path = f"{save_model_path}_bc_{global_batch_index-1}_ErlStp.pth"
                        #torch.save(model.state_dict(), checkpoint_path)
                        torch.save({
                            "model_state_dict": model.state_dict(),
                            "optimizer_state_dict": optimizer.state_dict(),
                            "global_batch_index": global_batch_index,
                            "epoch": epoch + 1,
                        }, checkpoint_path)
                        print(f"Model checkpoint saved at: {checkpoint_path}\n")

                        return train_loss_series, val_loss_series

            global_batch_index += 1

    checkpoint_path = f"{save_model_path}_bc_{global_batch_index-1}.pth"
    torch.save({
                    "model_state_dict": model.state_dict(),
                    "optimizer_state_dict": optimizer.state_dict(),
                    "global_batch_index": global_batch_index,
                    "epoch": epoch + 1,
                }, checkpoint_path)
    print(f"Model checkpoint saved at: {checkpoint_path}\n")
    # Finish W&B run
    wandb.finish()
    return train_loss_series, val_loss_series

# Define validation loss evaluation
def evaluate_validation_loss(val_loader, model, loss_fn, sampling_rate, device):
    model.eval()  # Set model to evaluation mode
    val_loss = 0.0

    random.seed(int(time.time()))
    torch.manual_seed(int(time.time()))

    sample_size = max(1, int(len(val_loader.dataset) * 0.25))

    sampled_indices = random.sample(list(range(len(val_loader.dataset))), sample_size)
    #print('head of sampled indices: ',sampled_indices[:5])
    sampler = SubsetRandomSampler(sampled_indices)
    sampled_val_loader = DataLoader(
        val_loader.dataset,
        batch_size=val_loader.batch_size,
        sampler=sampler,
        num_workers=val_loader.num_workers,
        pin_memory=val_loader.pin_memory,
    )
    print(f"len of sampled val_loader: {len(sampled_val_loader)}")

    with torch.no_grad():  # Disable gradient computation
        for val_inputs, val_labels in sampled_val_loader:

            print(f"head word: {val_inputs[0]}")
            val_labels = val_labels.to(device)
            aux_tensor = construct_auxiliary_tensor(val_inputs, val_labels, sampling_rate)
            aux_tensor = aux_tensor.to(device)

            val_outputs, val_outputs_reveal_tensor = model(val_inputs, aux_tensor, device=device)

            loss_mask = (1.0 - val_outputs_reveal_tensor)  # Mask to exclude revealed letters

            val_labels_masked = val_labels * loss_mask
            masked_val_outputs = val_outputs * loss_mask
            # penalize neg entropy (max entropy)
            entropy_loss = -(masked_val_outputs.exp() * masked_val_outputs).sum(dim=-1).mean()

            loss = loss_fn(masked_val_outputs, val_labels_masked / val_labels_masked.sum(dim=-1, keepdim=True) ) - 0.01 * entropy_loss # Compute loss
            # Accumulate the loss
            val_loss += loss.item()

    val_loss /= len(sampled_val_loader)  # Average validation loss
    model.train()  # Set model back to training mode
    return val_loss

In [None]:
class MyModel(nn.Module):
    def __init__(self, full_dictionary_location="words_250000_train.txt"):
        super(MyModel, self).__init__()

        # Load the full dictionary
        self.full_dictionary = self.build_dictionary(full_dictionary_location)
        # Precompute n-gram models (1-gram to 7-gram)
        self.ngram_models = {
            n: self.build_ngram(self.full_dictionary, n) for n in range(1, 8)
        }
        # Compute forward and backward conditional counts
        self.forward_counts, self.backward_counts = self.compute_conditional_counts(
            self.full_dictionary
        )
        # Compute first and last letter probabilities
        self.first_probs, self.last_probs = self.compute_first_last_counts()

        # Layers
        self.fc1 = nn.Linear(15, 64)
        self.bn1 = nn.BatchNorm1d(64)
        self.fc1_5 = nn.Linear(64, 128)
        self.bn1_5 = nn.BatchNorm1d(128)
        self.fc2 = nn.Linear(128, 32)
        self.bn2 = nn.BatchNorm1d(32)
        self.fc3 = nn.Linear(32, 1)
        self.dropout = nn.Dropout(0.25)  # Single dropout rate for simplicity

        #self.activation_relu = nn.ReLU()
        self.activation_gelu = nn.GELU()
        #self.activation_silu = nn.SiLU()
        self.activation_leakyrelu = nn.LeakyReLU(negative_slope=0.05)
        self.activation_tanh = nn.Tanh()

        self.projection = nn.Linear(15, 32)

    def build_dictionary(self, dictionary_file_location):
        text_file = open(dictionary_file_location,"r")
        full_dictionary = text_file.read().splitlines()
        text_file.close()
        return full_dictionary

    # Implementing n-gram model for Hangman
    def build_ngram(self, words, n):
        """Builds an n-gram frequency dictionary from a list of words."""
        ngram_counts = defaultdict(int)
        for word in words:
            if len(word) >= n:
                for i in range(len(word) - n + 1):
                    ngram = word[i:i+n]
                    ngram_counts[ngram] += 1
        return ngram_counts

    # cond count
    def compute_conditional_counts(self, words, ngram_range=(2, 5)):
        forward_counts = {n: defaultdict(int) for n in range(ngram_range[0], ngram_range[1] + 1)}
        backward_counts = {n: defaultdict(int) for n in range(ngram_range[0], ngram_range[1] + 1)}

        for word in words:
            length = len(word)

            for n in range(ngram_range[0], ngram_range[1] + 1):
                if length >= n:
                    for i in range(length - n + 1):
                        # Forward n-gram
                        ngram = word[i:i + n]
                        forward_counts[n][ngram] += 1

                        # Backward n-gram (reversed)
                        backward_ngram = word[i:i + n][::-1]  # Reverse the n-gram
                        backward_counts[n][backward_ngram] += 1

        return forward_counts, backward_counts

    def compute_first_last_counts(self):
        first_counts = defaultdict(int)
        last_counts = defaultdict(int)

        for word in self.full_dictionary:
            if len(word) > 0:
                first_counts[word[0]] += 1
                last_counts[word[-1]] += 1

        # Normalize to get probabilities
        total_first = sum(first_counts.values())
        total_last = sum(last_counts.values())

        first_probs = {k: v / total_first for k, v in first_counts.items()}
        last_probs = {k: v / total_last for k, v in last_counts.items()}

        return first_probs, last_probs

    def masked_word_to_tensor(self, masked_words, ngram_range=(2, 5), device=None):
        batch_size = len(masked_words)
        reveal_tensor = torch.zeros(batch_size, 26, dtype=torch.float32, device=device)  # Revealed letters for safety
        guess_tensor = torch.zeros(batch_size, 26, 8 + ngram_range[1] - ngram_range[0], dtype=torch.float32, device=device)

        for i, word in enumerate(masked_words):
            # Step 1: Revealed Letters
            for char in word:
                if char != '_':
                    reveal_tensor[i][ord(char) - ord('a')] = 1.0
            # start to fill the guess_tensor
            # since guess_tensor is of shape (batch_size, 26, 8 + ngram_range[1] - ngram_range[0]), for each word in the tensor, we will fill
            # a (26 by 8 + ngram_range[1] - ngram_range[0]) tensor
            # the first feature out of the 8 + ngram_range[1] - ngram_range[0] features is from the guess_ngram_sftmx function, that uses basic n-gram logic.
            # because i want to make things simpler, let us don't use the combination of exact word match and fuzzy word match, instead you can try some similarity measure between
            # our target word and all words in the self.full_dictionary, then filter the top 100, and apply the basic n-gram logic to populate the first feature for this word in the tensor.
            # the second and third feature for this word will borrow the idea from apply_positional_encoding function, which calculates the similar fir_las_probs from self.first_probs, self.last_probs,
            # as well as other logic in the apply_positional_encoding function, such as only activate the first or last letter prob calculation if first or last word is a missing letter.
            # the next 4 features (4th, 5th, 6th, 7th) will also be from apply_positional_encoding function, as how we find the possible_matches based on the location of the missing letter given 4 kinds of n-gram pattern.
            # however, you don't need to replicate the weighting method across different n-gram settings, since we just keep them as separate features, which will be input to neural network to find the optimal weights.
            # the last 1+ngram_range[1]-ngram_range[0] (1+5-2 = 4 features) will be features from the forward and backward conditional prob. Still as what we do in the apply_positional_encoding function,
            # we populate the forward_char_positional_probs and backward_char_positional_probs, then combine them into one feature for this word for this n-gram setting (n = 2, 3, 4, 5). By doing this, we can fill all features
            # for this word. Again, by doing this for each word in this batch, we can fill the guess_word tensor with all the 8 + ngram_range[1] - ngram_range[0] features we need for neural network input.
            #print(word, ' step 1 done')
            # Step 2: Basic n-gram logic (First feature)
            # Similarity-based filtering: Calculate similarity with all words in self.full_dictionary
            target_word = word.replace('_', '.')
            similar_words = [w for w in self.full_dictionary if len(w) == len(word)]
            similar_scores = [
                sum(1 for a, b in zip(target_word, w) if a == b or a == '.')
                for w in similar_words
            ]
            top_words = [
                w for _, w in sorted(zip(similar_scores, similar_words), reverse=True)[:100]
            ]

            # Compute local n-gram distributions
            total_char_frequencies = collections.defaultdict(float)

            for n in range(1, 8):  # Process 1-gram to 7-gram for the local words
                ngram_counts = collections.Counter()
                for top_word in top_words:
                    for j in range(len(top_word) - n + 1):
                        ngram = top_word[j:j + n]
                        ngram_counts[ngram] += 1

                # Calculate total characters in all n-grams
                total_chars = sum(count * n for count in ngram_counts.values())

                # Normalize n-grams to character frequencies
                char_frequencies = collections.Counter()
                for ngram, count in ngram_counts.items():
                    for char in ngram:
                        char_frequencies[char] += count

                # Normalize and merge with overall character frequencies
                if total_chars > 0:
                    normalized_frequencies = {
                        char: freq / total_chars for char, freq in char_frequencies.items()
                    }
                    for char, freq in normalized_frequencies.items():
                        total_char_frequencies[char] += freq * softmax_weights(7)[n]

            # Normalize total_char_frequencies
            total_char_frequencies_sum = sum(total_char_frequencies.values())
            if total_char_frequencies_sum > 0:
                for char in total_char_frequencies:
                    total_char_frequencies[char] /= total_char_frequencies_sum

            # Populate the first feature in guess_tensor
            for char, freq in total_char_frequencies.items():
                guess_tensor[i, ord(char) - ord('a'), 0] = freq

            #print(word, ' step 2 done')
            # Step 3: First and Last Letter Probabilities (Second and Third features)
            if word[0] == '_':  # Missing first letter
                for char, prob in self.first_probs.items():
                    guess_tensor[i, ord(char) - ord('a'), 1] += prob
            if word[-1] == '_':  # Missing last letter
                for char, prob in self.last_probs.items():
                    guess_tensor[i, ord(char) - ord('a'), 2] += prob

            #print(word, ' step 3 done')
            # Step 4: Masked Middle Probabilities (4th to 7th features)
            for j, char in enumerate(word):
                if char == '_':  # Only process missing letters
                    for n in [3, 4, 5]:
                        if n == 3 and 1 <= j < len(word) - 1:
                            context = word[j - 1:j] + '.' + word[j + 1:j + 2]
                            ts_fl_idx = 3
                        elif n == 4 and 1 <= j < len(word) - 2:
                            context = word[j - 1:j] + '.' + word[j + 1:j + 3]
                            ts_fl_idx = 4
                        elif n == 4 and 2 <= j < len(word) - 1:
                            context = word[j - 2:j] + '.' + word[j + 1:j + 2]
                            ts_fl_idx = 5
                        elif n == 5 and 2 <= j <= len(word) - 3:
                            context = word[j - 2:j] + '.' + word[j + 1:j + 3]
                            ts_fl_idx = 6
                        else:
                            continue  # Skip invalid contexts

                        # Match possible n-grams in the forward counts
                        possible_matches = [k for k in self.forward_counts[n] if re.match(context, k)]
                        total_match_count = sum(self.forward_counts[n][k] for k in possible_matches)

                        # Populate probabilities for missing letters
                        for match in possible_matches:
                            for char in match:
                                guess_tensor[i, ord(char) - ord('a'), ts_fl_idx] += (
                                    self.forward_counts[n][match] / total_match_count
                                )
            #print(word, ' step 4 done')
            # Step 5: Forward and Backward Conditional Probabilities (Last 4 features)
            for n in range(ngram_range[0], ngram_range[1] + 1):  # Iterate over n-gram sizes
                forward_probs = torch.zeros(26, dtype=torch.float32)
                backward_probs = torch.zeros(26, dtype=torch.float32)

                for j, char in enumerate(word):
                    if char == '_':  # Only process missing letters
                        # Forward Conditional Probability
                        if j - (n - 1) >= 0:  # Enough context on the left
                            context = word[j - (n - 1):j] + "."  # "." for missing letter
                            possible_matches = [k for k in self.forward_counts[n] if re.match(context, k)]
                            total_match_count = sum(self.forward_counts[n][k] for k in possible_matches)

                            # Populate forward probabilities
                            for match in possible_matches:
                                forward_probs[ord(match[-1]) - ord('a')] += (
                                    self.forward_counts[n][match] / total_match_count
                                )

                        # Backward Conditional Probability
                        if j + (n - 1) < len(word):  # Enough context on the right
                            context = word[j + 1:j + n][::-1] + "."  # Reverse the context with "."
                            possible_matches = [k for k in self.backward_counts[n] if re.match(context, k)]
                            total_match_count = sum(self.backward_counts[n][k] for k in possible_matches)

                            # Populate backward probabilities
                            for match in possible_matches:
                                backward_probs[ord(match[0]) - ord('a')] += (
                                    self.backward_counts[n][match] / total_match_count
                                )

                # Combine forward and backward probabilities
                combined_probs = forward_probs + backward_probs
                combined_probs_sum = combined_probs.sum()
                if combined_probs_sum > 0:
                    combined_probs /= combined_probs_sum  # Normalize

                # Populate the guess tensor for this n-gram size
                guess_tensor[i, :, 7 + n - ngram_range[0]] = combined_probs
            #print(word, ' step 5 done')
        return guess_tensor, reveal_tensor

    def forward(self, masked_words, aux_tensor, device=None):
        guess_tensor, reveal_tensor = self.masked_word_to_tensor(masked_words, device=device) # [batch_size, 26, 11]
        aux_tensor = aux_tensor.to(device) # [batch_size, 26, 4]

        x = torch.cat([guess_tensor, aux_tensor], dim=-1)  # Concatenate along feature dim, last dim. [batch_size, 26, 15]

        # Skip connection
        identity = x  # Store the input for the skip connection
        identity = self.projection(identity)  # [batch_size, 26, 32]

        # Forward pass with updated dropout and BatchNorm placement
        x = self.fc1(x) # [batch_size, 26, 64]
        x = self.bn1(x.permute(0, 2, 1))
        x = self.activation_gelu(x.permute(0, 2, 1))
        x = self.dropout(x)

        x = self.fc1_5(x) # [batch_size, 26, 128]
        x = self.bn1_5(x.permute(0, 2, 1))
        x = self.activation_leakyrelu(x.permute(0, 2, 1))
        x = self.dropout(x)

        x = self.fc2(x) # [batch_size, 26, 32]
        x = self.bn2(x.permute(0, 2, 1))
        x += identity.permute(0, 2, 1)  # Residual connection
        x = self.activation_tanh(x.permute(0, 2, 1))
        x = self.dropout(x)

        x = self.fc3(x) # [batch_size, 26, 1]
        x = torch.squeeze(x, dim=-1)  # [batch_size, 26]
        x = torch.log_softmax(x, dim=-1)  # Log probabilities for KLDivLoss

        return x, reveal_tensor # [batch_size, 26]


In [None]:
# Model instance
model = MyModel()

In [None]:
state_dct_tmp = checkpoint["model_state_dict"]

In [None]:
state_dct_tmp.keys()

odict_keys(['fc1.weight', 'fc1.bias', 'bn1.weight', 'bn1.bias', 'bn1.running_mean', 'bn1.running_var', 'bn1.num_batches_tracked', 'fc1_5.weight', 'fc1_5.bias', 'bn1_5.weight', 'bn1_5.bias', 'bn1_5.running_mean', 'bn1_5.running_var', 'bn1_5.num_batches_tracked', 'fc2.weight', 'fc2.bias', 'bn2.weight', 'bn2.bias', 'bn2.running_mean', 'bn2.running_var', 'bn2.num_batches_tracked', 'fc3.weight', 'fc3.bias', 'projection.weight', 'projection.bias'])

In [None]:
# # Modify new layer weight to handle size mismatch
# with torch.no_grad():  # Avoid tracking gradients during manual initialization

#     # Load the weights from the state_dict
#     old_fc2_weight = state_dct_tmp['fc2.weight']  # Shape: [32, 64]
#     old_fc2_bias = state_dct_tmp['fc2.bias'].unsqueeze(-1)      # Shape: [32, 1]

#     old_proj_weight = state_dct_tmp['projection.weight']  # Shape: [1, 15]
#     old_proj_bias = state_dct_tmp['projection.bias'].unsqueeze(-1)      # Shape: [1, 1]

#     # Initialize the new weights and copy the existing weights
#     new_fc2_weight = torch.zeros(32, 128)  # Shape: [32, 128]
#     new_fc2_weight[:, :64] = old_fc2_weight  # Copy the first few input dimensions

#     new_proj_weight = torch.zeros(32, 15)  # Shape: [32, 15]
#     new_proj_weight[:1, :] = old_proj_weight  # Copy the first few input dimensions

#     # Initialize the new dimensions using Xavier initialization
#     nn.init.xavier_uniform_(old_fc2_bias) # Shape: [32, 1]
#     nn.init.xavier_uniform_(old_proj_bias) # Shape: [32, 1]
#     nn.init.xavier_uniform_(new_fc2_weight[:, 64:])
#     nn.init.xavier_uniform_(new_proj_weight[1:, :])

#     fc1_5_wt = model.fc1_5.weight
#     fc1_5_bs = model.fc1_5.bias.unsqueeze(-1)
#     nn.init.xavier_uniform_(fc1_5_wt)
#     nn.init.xavier_uniform_(fc1_5_bs)

#     # Update the model new layer weight
#     model.fc1_5.weight.data.copy_(fc1_5_wt)
#     model.fc1_5.bias.data.copy_(fc1_5_bs.squeeze(-1))
#     model.fc2.weight.data.copy_(new_fc2_weight)
#     model.fc2.bias.data.copy_(old_fc2_bias.squeeze(-1))
#     model.projection.weight.data.copy_(new_proj_weight)
#     model.projection.bias.data.copy_(old_proj_bias.squeeze(-1))

# # Load the rest of the state dictionary
# state_dct_tmp.pop('fc2.weight')  # Remove from the checkpoint state dict
# state_dct_tmp.pop('fc2.bias')    # Remove from the checkpoint state dict
# state_dct_tmp.pop('projection.weight')  # Remove from the checkpoint state dict
# state_dct_tmp.pop('projection.bias')    # Remove from the checkpoint state dict
# model.load_state_dict(state_dct_tmp, strict=False)  # Load remaining parameters

In [None]:
for ele in model.named_parameters():
  print(ele[0])

fc1.weight
fc1.bias
bn1.weight
bn1.bias
fc1_5.weight
fc1_5.bias
bn1_5.weight
bn1_5.bias
fc2.weight
fc2.bias
bn2.weight
bn2.bias
fc3.weight
fc3.bias
projection.weight
projection.bias


In [None]:
# Optimizer
init_lr = 1e-5

In [None]:
# optimizer = optim.Adam(model.parameters(), lr=init_lr)

In [None]:
optimizer = torch.optim.SGD(model.parameters(), lr=init_lr, momentum=0.9, weight_decay=1e-4)

In [None]:
# OR, if using KL divergence for probability distributions
loss_fn = nn.KLDivLoss(reduction='batchmean')  # Assumes log-softmax output from the model

In [None]:
model.to(device)

MyModel(
  (fc1): Linear(in_features=15, out_features=64, bias=True)
  (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1_5): Linear(in_features=64, out_features=128, bias=True)
  (bn1_5): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc2): Linear(in_features=128, out_features=32, bias=True)
  (bn2): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc3): Linear(in_features=32, out_features=1, bias=True)
  (dropout): Dropout(p=0.25, inplace=False)
  (activation_gelu): GELU(approximate='none')
  (activation_leakyrelu): LeakyReLU(negative_slope=0.05)
  (activation_tanh): Tanh()
  (projection): Linear(in_features=15, out_features=32, bias=True)
)

In [None]:
# Run the training loop
train_loss, val_loss = train_model(
    train_loader=train_loader,
    val_loader=val_loader,
    model=model,
    optimizer=optimizer,
    init_lr=init_lr,
    loss_fn=loss_fn,
    save_model_path="model_checkpoint",
    num_epochs=3,
    checkpoint_path="model_checkpoint_bc_5325.pth",
    noArchitectChange=True,
)

In [None]:
# Finish W&B run
wandb.finish()

In [None]:
# Function to construct auxiliary tensor, API real-time version
def construct_auxiliary_tensor_realtime(batch_inputs, this_guessed_letters):
    batch_size, num_classes = len(batch_inputs), 26
    assert batch_size==1, "batch_size not equal to 1 in construct auxiliary tensor, API real-time version"
    aux_tensor = torch.zeros(batch_size, num_classes, 4)
    rg_tensor = torch.zeros(batch_size, 26, dtype=torch.float32, device=device)

    vowels = {'a', 'e', 'i', 'o', 'u'}
    vowel_resonant_feature = torch.tensor([1 if chr(ord('a') + i) in vowels else 0 for i in range(num_classes)])
    # Broadcast to all samples and assign to Feature 4
    aux_tensor[:, :, 3] = vowel_resonant_feature  # Broadcasting over the batch dimension

    aux_tensor[:, :, 1:3] = -1  # Forward/backward index for masked letters

    for i, masked_word in enumerate(batch_inputs):

        # Step 1: Fill with 1 for revealed letters
        revealed_letters = set(masked_word) - {'_'}
        for letter in revealed_letters:
            aux_tensor[i][ord(letter) - ord('a')][0] = 1

        # Step 2: fill -1 for wrong letters
        # Mark these positions as -1 in the auxiliary tensor
        wrong_letters = set(this_guessed_letters).difference(revealed_letters)
        for letter in wrong_letters:
            aux_tensor[i][ord(letter) - ord('a')][0] = -1

        # positional index
        for pos, letter in enumerate(masked_word):
            if letter != '_':  # If the letter is revealed
                forward_index = pos
                backward_index = len(masked_word) - pos - 1
                aux_tensor[i,ord(letter) - ord('a'),1] = forward_index  # Forward index
                aux_tensor[i,ord(letter) - ord('a'),2] = backward_index  # Backward index

        # step 3
        rg_union = set(this_guessed_letters).union(revealed_letters)
        for letter in rg_union:
            rg_tensor[i][ord(letter) - ord('a')] = 1 # 1 for those letters we dont need guess for next round

    return aux_tensor, rg_tensor

use ckpt 1200 to run the v1 game

In [None]:
checkpoint = torch.load('model_checkpoint_bc_1200.pth', map_location=device)
checkpoint.keys()

  checkpoint = torch.load('model_checkpoint_bc_1200.pth', map_location=device)


In [None]:
# Model instance
GameInferModel = MyModel()
# Optimizer
GameInferoptimizer = optim.Adam(GameInferModel.parameters())

In [None]:
GameInferModel.to(device)

MyModel(
  (fc1): Linear(in_features=12, out_features=64, bias=True)
  (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc2): Linear(in_features=64, out_features=32, bias=True)
  (bn2): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc3): Linear(in_features=32, out_features=1, bias=True)
  (dropout): Dropout(p=0.3, inplace=False)
  (activation): ReLU()
)

In [None]:
# Load model state
GameInferModel.load_state_dict(checkpoint["model_state_dict"])
# Load optimizer state
GameInferoptimizer.load_state_dict(checkpoint["optimizer_state_dict"])

In [None]:
GameInferModel.eval()

MyModel(
  (fc1): Linear(in_features=12, out_features=64, bias=True)
  (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc2): Linear(in_features=64, out_features=32, bias=True)
  (bn2): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc3): Linear(in_features=32, out_features=1, bias=True)
  (dropout): Dropout(p=0.3, inplace=False)
  (activation): ReLU()
)

In [None]:
from scipy.stats import spearmanr

In [None]:
with torch.no_grad():
    for tst_inputs, tst_labels in test_loader:
        tst_labels = tst_labels.to(device)
        aux_tensor = construct_auxiliary_tensor(tst_inputs, tst_labels, 0.2)
        aux_tensor = aux_tensor.to(device)
        print('inputs: ',tst_inputs[0::4])
        print('labels: ',tst_labels[0::4])
        print('aux: ', aux_tensor[0::4])
        tst_outputs, tst_outputs_reveal_tensor = GameInferModel(tst_inputs[0::4], aux_tensor[0::4], device=device)
        print('reveal: ',tst_outputs_reveal_tensor)
        print('outputs: ',tst_outputs)

        top_values, top_indices = torch.topk(tst_labels[0::4] - tst_outputs_reveal_tensor, k=5, dim=1)
        # Convert indices to letters (a-z)
        letters = [[chr(97 + idx) for idx in row] for row in top_indices.tolist()]
        # Print results
        print("\nTop 5 Letters:")
        print(letters)

        top_values_pred, top_indices_pred = torch.topk(tst_outputs - 1e4 * tst_outputs_reveal_tensor, k=5, dim=1)
        # Convert indices to letters (a-z)
        letters_pred = [[chr(97 + idx) for idx in row] for row in top_indices_pred.tolist()]
        # Print results
        print("\nTop 5 Letters Pred:")
        print(letters_pred)

        # Convert tensors to numpy arrays
        tensor1_np = (tst_labels[0::4] - tst_outputs_reveal_tensor).cpu().numpy()
        tensor2_np = (tst_outputs - 1e4 * tst_outputs_reveal_tensor).cpu().numpy()
        # Calculate Spearman rank correlations for each row
        spearman_correlations = [spearmanr(row1, row2).correlation for row1, row2 in zip(tensor1_np, tensor2_np)]
        # Compute mean and median of Spearman correlations
        mean_correlation = np.mean(spearman_correlations)
        median_correlation = np.median(spearman_correlations)
        print("\n Rank Corr Mean Med: ", round(mean_correlation,2), round(median_correlation,2))
        break

inputs:  ['____am__e', 'non__ur__e_', '_a_____s', '_nte____lt']
labels:  tensor([[1.0000, 0.0000, 0.1667, 0.1667, 1.0000, 0.0000, 0.0000, 0.0000, 0.1667,
         0.0000, 0.0000, 0.0000, 1.0000, 0.3333, 0.1667, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.4000, 0.2000, 1.0000, 0.0000, 0.0000, 0.4000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 1.0000, 0.0000, 0.0000, 1.0000,
         0.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [1.0000, 0.0000, 0.1667, 0.0000, 0.0000, 0.0000, 0.0000, 0.1667, 0.0000,
         0.0000, 0.0000, 0.0000, 0.1667, 0.0000, 0.1667, 0.1667, 0.0000, 0.1667,
         1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
        [0.2000, 0.0000, 0.0000, 0.0000, 1.0000, 0.2000, 0.0000, 0.0000, 0.2000,
         0.0000, 0.0000, 1.0000, 0.0000, 1.0000, 0.0000, 0.0000, 0.0000, 0.2000,
         0.0000, 1.0000, 0.2000, 0.0000, 0.0000, 0.0000, 

In [None]:
with torch.no_grad():
    for tst_index, (tst_inputs, tst_labels) in enumerate(test_loader):
        if tst_index % 20 == 0:
            print('\n index: ', tst_index)
            tst_labels = tst_labels.to(device)
            aux_tensor = construct_auxiliary_tensor(tst_inputs, tst_labels, 0.2)
            aux_tensor = aux_tensor.to(device)
            tst_outputs, tst_outputs_reveal_tensor = GameInferModel(tst_inputs, aux_tensor, device=device)

            top_values, top_indices = torch.topk(tst_labels - tst_outputs_reveal_tensor, k=5, dim=1)
            # Convert indices to letters (a-z)
            letters = [[chr(97 + idx) for idx in row] for row in top_indices.tolist()]
            # Print results
            print("\nTop 5 Letters:")
            print(letters)

            top_values_pred, top_indices_pred = torch.topk(tst_outputs - 1e4 * tst_outputs_reveal_tensor, k=5, dim=1)
            # Convert indices to letters (a-z)
            letters_pred = [[chr(97 + idx) for idx in row] for row in top_indices_pred.tolist()]
            # Print results
            print("\nTop 5 Letters Pred:")
            print(letters_pred)

            # Convert tensors to numpy arrays
            tensor1_np = (tst_labels - tst_outputs_reveal_tensor).cpu().numpy()
            tensor2_np = (tst_outputs - 1e4 * tst_outputs_reveal_tensor).cpu().numpy()
            # Calculate Spearman rank correlations for each row
            spearman_correlations = [spearmanr(row1, row2).correlation for row1, row2 in zip(tensor1_np, tensor2_np)]
            # Compute mean and median of Spearman correlations
            mean_correlation = np.mean(spearman_correlations)
            median_correlation = np.median(spearman_correlations)
            print("\n Rank Corr Mean Med: ", round(mean_correlation,2), round(median_correlation,2))


 index:  0

Top 5 Letters:
[['n', 'i', 'c', 'o', 'd'], ['r', 'o', 'e', 't', 'a'], ['e', 'd', 'o', 'm', 'a'], ['a', 'g', 'd', 'i', 'e'], ['c', 'h', 'd', 'b', 'a'], ['y', 'v', 'c', 'b', 'a'], ['i', 'h', 'd', 'r', 'e'], ['a', 'i', 'b', 'l', 'e'], ['h', 'c', 'm', 'p', 'o'], ['e', 'c', 'a', 't', 'b'], ['o', 'i', 'e', 'd', 'b'], ['e', 'm', 'f', 'o', 'i'], ['f', 'a', 'i', 'u', 'r'], ['e', 'h', 'a', 'o', 'c'], ['o', 'c', 'a', 'd', 'b'], ['o', 'r', 'c', 'b', 'a']]

Top 5 Letters Pred:
[['i', 'n', 'r', 's', 'o'], ['e', 's', 'n', 't', 'i'], ['e', 't', 'o', 'i', 'a'], ['e', 'i', 'a', 'r', 'n'], ['s', 'i', 't', 'd', 'c'], ['e', 'y', 's', 'r', 'g'], ['i', 'r', 'h', 'e', 'a'], ['e', 'a', 'c', 'i', 'r'], ['r', 't', 'e', 'n', 'i'], ['e', 'a', 'i', 't', 's'], ['e', 'i', 'o', 'u', 'l'], ['t', 'i', 'e', 'o', 'r'], ['r', 'a', 's', 'd', 'i'], ['a', 'e', 'o', 'n', 'm'], ['e', 'o', 'y', 't', 'u'], ['r', 'i', 'a', 'o', 't']]

 Rank Corr Mean Med:  0.5 0.53

 index:  20

Top 5 Letters:
[['i', 's', 'e', 'u', 'n

In [None]:
for i in range(910):
    print('Playing ', i, ' th game')
    api.start_game(practice=1,verbose=True)
    [total_practice_runs,total_recorded_runs,total_recorded_successes,total_practice_successes] = api.my_status() # Get my game stats: (# of tries, # of wins)
    practice_success_rate = total_practice_successes / total_practice_runs
    if (i+1) % 10 == 0:
        print('run %d practice games out of an allotted 100,000. practice success rate so far = %.3f' % (total_practice_runs, practice_success_rate))
    # DO NOT REMOVE as otherwise the server may lock you out for too high frequency of requests
    time.sleep(0.5)

[1;30;43m流式输出内容被截断，只能显示最后 5000 行内容。[0m
Sever response: {'game_id': '02390bc84064', 'status': 'ongoing', 'tries_remains': 1, 'word': '_ i r e e a t i n g '}
  Clean word for NN: _ireeating

  Clean word: .ireeating
Fuzzy match size: 393 with num letters mismatch: 7
from_PE:  u
top_NN_letters:  [['d', 'b', 'o', 'u', 'h']]
Guessing letter: d
Sever response: {'game_id': '02390bc84064', 'status': 'failed', 'tries_remains': 0, 'word': '_ i r e e a t i n g '}
Failed game: 02390bc84064. Because of: # of tries exceeded!
Playing  834  th game
Successfully start a new game! Game ID: c818dd737943. # of tries remaining: 6. Word: _ _ _ _ _ _ _ _ _ _ _ _ .
  Clean word for NN: ____________

  Clean word: ............
from_PE:  e
top_NN_letters:  [['e', 'i', 'o', 'a', 'n']]
Guessing letter: e
Sever response: {'game_id': 'c818dd737943', 'status': 'ongoing', 'tries_remains': 6, 'word': '_ _ _ _ e _ _ e _ _ _ _ '}
  Clean word for NN: ____e__e____
top_NN_letters:  [['s', 'n', 'r', 't', 'd']]
Guessing l

neural network version 1: 6000 cumulative practice runs SO FAR, 0.401 ratio

use ckpt 5550 to run the v2 game

In [None]:
checkpoint = torch.load('model_checkpoint_bc_5550.pth', map_location=device)
checkpoint.keys()

  checkpoint = torch.load('model_checkpoint_bc_5550.pth', map_location=device)


dict_keys(['model_state_dict', 'optimizer_state_dict', 'global_batch_index', 'epoch'])

In [None]:
# Model instance
GameInferModel = MyModel()
# Optimizer
GameInferoptimizer = optim.SGD(GameInferModel.parameters())

In [None]:
GameInferModel.to(device)

MyModel(
  (fc1): Linear(in_features=15, out_features=64, bias=True)
  (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1_5): Linear(in_features=64, out_features=128, bias=True)
  (bn1_5): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc2): Linear(in_features=128, out_features=32, bias=True)
  (bn2): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc3): Linear(in_features=32, out_features=1, bias=True)
  (dropout): Dropout(p=0.25, inplace=False)
  (activation_gelu): GELU(approximate='none')
  (activation_leakyrelu): LeakyReLU(negative_slope=0.05)
  (activation_tanh): Tanh()
  (projection): Linear(in_features=15, out_features=32, bias=True)
)

In [None]:
checkpoint.keys()
# Load model state
GameInferModel.load_state_dict(checkpoint["model_state_dict"])
# Load optimizer state
GameInferoptimizer.load_state_dict(checkpoint["optimizer_state_dict"])
GameInferModel.eval()
from scipy.stats import spearmanr

In [None]:
with torch.no_grad():
    for tst_index, (tst_inputs, tst_labels) in enumerate(test_loader):
        if tst_index % 80 == 0:
            print('\n index: ', tst_index)
            tst_labels = tst_labels.to(device)
            aux_tensor = construct_auxiliary_tensor(tst_inputs, tst_labels, 0.2)
            aux_tensor = aux_tensor.to(device)
            tst_outputs, tst_outputs_reveal_tensor = GameInferModel(tst_inputs, aux_tensor, device=device)

            top_values, top_indices = torch.topk(tst_labels - tst_outputs_reveal_tensor, k=5, dim=1)
            # Convert indices to letters (a-z)
            letters = [[chr(97 + idx) for idx in row] for row in top_indices.tolist()]
            # Print results
            print("\nTop 5 Letters:")
            print(letters)

            top_values_pred, top_indices_pred = torch.topk(tst_outputs - 1e4 * tst_outputs_reveal_tensor, k=5, dim=1)
            # Convert indices to letters (a-z)
            letters_pred = [[chr(97 + idx) for idx in row] for row in top_indices_pred.tolist()]
            # Print results
            print("\nTop 5 Letters Pred:")
            print(letters_pred)

            # Convert tensors to numpy arrays
            tensor1_np = (tst_labels - tst_outputs_reveal_tensor).cpu().numpy()
            tensor2_np = (tst_outputs - 1e4 * tst_outputs_reveal_tensor).cpu().numpy()
            # Calculate Spearman rank correlations for each row
            spearman_correlations = [spearmanr(row1, row2).correlation for row1, row2 in zip(tensor1_np, tensor2_np)]
            # Compute mean and median of Spearman correlations
            mean_correlation = np.mean(spearman_correlations)
            median_correlation = np.median(spearman_correlations)
            print("\n Rank Corr Mean Med: ", round(mean_correlation,2), round(median_correlation,2))


 index:  0

Top 5 Letters:
[['c', 'n', 'd', 's', 't'], ['r', 'h', 'p', 'i', 'q'], ['d', 'u', 'o', 'a', 'y'], ['l', 's', 't', 'r', 'q'], ['h', 'n', 'c', 'r', 'o'], ['i', 'n', 'u', 'v', 'p'], ['i', 'o', 'u', 'e', 'c'], ['e', 'l', 's', 'i', 'o'], ['p', 'r', 'c', 's', 't'], ['r', 'i', 'g', 'n', 't'], ['o', 'i', 's', 'n', 'r'], ['w', 'o', 'f', 'e', 'd'], ['t', 'i', 'u', 'n', 'r'], ['l', 'e', 'b', 'i', 's'], ['n', 'r', 'v', 'i', 's'], ['o', 'e', 'r', 'c', 's']]

Top 5 Letters Pred:
[['s', 'n', 'r', 'c', 'd'], ['r', 'i', 'a', 'n', 's'], ['d', 'r', 'i', 't', 'a'], ['l', 'r', 'e', 's', 'c'], ['n', 'e', 's', 'r', 'l'], ['i', 'e', 's', 'c', 'o'], ['o', 'e', 'a', 'i', 'c'], ['l', 'e', 'r', 'i', 't'], ['p', 'r', 't', 'e', 'n'], ['r', 'n', 'i', 'l', 's'], ['i', 'e', 'r', 'n', 'o'], ['e', 'i', 't', 's', 'o'], ['a', 'n', 's', 'r', 'i'], ['e', 'i', 'a', 'r', 's'], ['r', 'n', 's', 'l', 'i'], ['e', 'i', 'r', 'a', 't']]

 Rank Corr Mean Med:  0.5 0.5

 index:  80

Top 5 Letters:
[['l', 'i', 'r', 'm', 'q'

In [None]:
tps = total_practice_successes
print(tps)
amt_this = 70
for i in range(amt_this):
    print('Playing ', i, ' th game')
    api.start_game(practice=1,verbose=True)
    [total_practice_runs,total_recorded_runs,total_recorded_successes,total_practice_successes] = api.my_status() # Get my game stats: (# of tries, # of wins)
    practice_success_rate = total_practice_successes / total_practice_runs
    if (i+1) % 10 == 0:
        print('run %d practice games out of an allotted 100,000. practice success rate so far = %.3f' % (total_practice_runs, practice_success_rate))
        print(round((total_practice_successes - tps) / amt_this , 3 ) )
    # DO NOT REMOVE as otherwise the server may lock you out for too high frequency of requests
    time.sleep(0.5)

2781
Playing  0  th game
Successfully start a new game! Game ID: b678492654da. # of tries remaining: 6. Word: _ _ _ _ _ _ _ _ _ _ .
  Clean word for NN: __________

  Clean word: ..........
from PE version:  e
top_NN_letters:  [['e', 'i', 'a', 'o', 'r']]
Guessing letter: e
Sever response: {'game_id': 'b678492654da', 'status': 'ongoing', 'tries_remains': 6, 'word': '_ _ _ _ _ _ _ _ _ e '}
  Clean word for NN: _________e

  Clean word: .........e
from PE version:  a
top_NN_letters:  [['a', 's', 'i', 't', 'r']]
Guessing letter: a
Sever response: {'game_id': 'b678492654da', 'status': 'ongoing', 'tries_remains': 6, 'word': '_ a _ _ _ _ _ _ _ e '}
  Clean word for NN: _a_______e
top_NN_letters:  [['i', 't', 'l', 'r', 's']]
Guessing letter: i
Sever response: {'game_id': 'b678492654da', 'status': 'ongoing', 'tries_remains': 6, 'word': '_ a _ _ _ _ _ i _ e '}
  Clean word for NN: _a_____i_e
top_NN_letters:  [['n', 't', 'r', 'l', 's']]
Guessing letter: n
Sever response: {'game_id': 'b678492654da

neural network version 2: 7000 cumulative practice runs SO FAR, 0.401 ratio

among several candidate checkpoints, use test set hit rate and game real-time hit rate to select the best model......selected xxx

In [None]:
ckpt_candidates = ["model_checkpoint_bc_3500.pth","model_checkpoint_bc_3600.pth","model_checkpoint_bc_3700.pth","model_checkpoint_bc_3800.pth","model_checkpoint_bc_3900.pth","model_checkpoint_bc_4000.pth","model_checkpoint_bc_4100.pth",\
                   "model_checkpoint_bc_4200.pth","model_checkpoint_bc_4300.pth","model_checkpoint_bc_4400.pth","model_checkpoint_bc_4500.pth","model_checkpoint_bc_4600.pth","model_checkpoint_bc_4700.pth","model_checkpoint_bc_4800.pth",\
                   "model_checkpoint_bc_4900.pth","model_checkpoint_bc_5000.pth","model_checkpoint_bc_5100.pth","model_checkpoint_bc_5200.pth","model_checkpoint_bc_5300.pth","model_checkpoint_bc_5400.pth","model_checkpoint_bc_5500.pth"]

In [None]:
res_candidates = {}
for cand in ckpt_candidates:
    print(cand)
    checkpoint = torch.load(cand, map_location=device)
    print(checkpoint.keys())

    # Model instance
    GameInferModel = MyModel()
    # Optimizer
    GameInferoptimizer = optim.SGD(GameInferModel.parameters())

    GameInferModel.to(device)

    # Load model state
    GameInferModel.load_state_dict(checkpoint["model_state_dict"])
    # Load optimizer state
    GameInferoptimizer.load_state_dict(checkpoint["optimizer_state_dict"])
    GameInferModel.eval()

    res_tmp = [0 , 0 , 0]
    with torch.no_grad():
        for tst_index, (tst_inputs, tst_labels) in enumerate(test_loader):
            if tst_index % 5 == 0:
                tst_labels = tst_labels.to(device)
                aux_tensor = construct_auxiliary_tensor(tst_inputs, tst_labels, 0.2)
                aux_tensor = aux_tensor.to(device)
                tst_outputs, tst_outputs_reveal_tensor = GameInferModel(tst_inputs, aux_tensor, device=device)

                top_values, top_indices = torch.topk(tst_labels - tst_outputs_reveal_tensor, k=5, dim=1)
                # Convert indices to letters (a-z)
                letters = [[chr(97 + idx) for idx in row] for row in top_indices.tolist()]

                top_values_pred, top_indices_pred = torch.topk(tst_outputs - 1e4 * tst_outputs_reveal_tensor, k=5, dim=1)
                # Convert indices to letters (a-z)
                letters_pred = [[chr(97 + idx) for idx in row] for row in top_indices_pred.tolist()]
                # Print results

                # Convert tensors to numpy arrays
                tensor1_np = (tst_labels - tst_outputs_reveal_tensor).cpu().numpy()
                tensor2_np = (tst_outputs - 1e4 * tst_outputs_reveal_tensor).cpu().numpy()
                # Calculate Spearman rank correlations for each row
                spearman_correlations = [spearmanr(row1, row2).correlation for row1, row2 in zip(tensor1_np, tensor2_np)]
                # Compute mean and median of Spearman correlations
                mean_correlation = np.mean(spearman_correlations)
                median_correlation = np.median(spearman_correlations)
                std_correlation = np.std(spearman_correlations)

                res_tmp[0] = res_tmp[0] + mean_correlation
                res_tmp[1] = res_tmp[1] + median_correlation
                res_tmp[2] = res_tmp[2] + std_correlation

        res_tmp[0] = res_tmp[0] / np.ceil(len(test_loader) / 5)
        res_tmp[1] = res_tmp[1] / np.ceil(len(test_loader) / 5)
        res_tmp[2] = res_tmp[2] / np.ceil(len(test_loader) / 5)
        print(res_tmp)
    res_candidates[cand] = res_tmp

model_checkpoint_bc_4200.pth
dict_keys(['model_state_dict', 'optimizer_state_dict', 'global_batch_index', 'epoch'])


  checkpoint = torch.load(cand, map_location=device)


[0.5113684009413206, 0.5274296792153219, 0.12684245589909918]
model_checkpoint_bc_4300.pth
dict_keys(['model_state_dict', 'optimizer_state_dict', 'global_batch_index', 'epoch'])
[0.5116099097318955, 0.5266996889927479, 0.12748914722155108]
model_checkpoint_bc_4400.pth
dict_keys(['model_state_dict', 'optimizer_state_dict', 'global_batch_index', 'epoch'])
[0.507734108742387, 0.5216285204425489, 0.12693975096089877]
model_checkpoint_bc_4500.pth
dict_keys(['model_state_dict', 'optimizer_state_dict', 'global_batch_index', 'epoch'])
[0.5071937932265851, 0.5209542141257962, 0.1273876168663449]
model_checkpoint_bc_4600.pth
dict_keys(['model_state_dict', 'optimizer_state_dict', 'global_batch_index', 'epoch'])
[0.5104514979821988, 0.524096440763613, 0.1280729190997645]
model_checkpoint_bc_4700.pth
dict_keys(['model_state_dict', 'optimizer_state_dict', 'global_batch_index', 'epoch'])
[0.5106681103155294, 0.524953744299711, 0.12800751741748495]
model_checkpoint_bc_4800.pth
dict_keys(['model_state_

In [None]:
res_candidates_out = pd.DataFrame(res_candidates).T
res_candidates_out.columns = ['Mean', 'Median', 'Std']

In [None]:
import pandas as pd
res_candidates_out = pd.concat( [ pd.read_csv('metrics_out_v0201.csv', index_col=0) , pd.read_csv('metrics_out_v0202.csv', index_col=0) ] )

In [None]:
# Calculate the average of 'Mean' and 'Median' columns
res_candidates_out['Average'] = (res_candidates_out['Mean'] + res_candidates_out['Median']) / 2
# Sort by the 'Average' column in descending order
res_candidates_out_sorted = res_candidates_out.sort_values(by='Average', ascending=False)
# Display or use the sorted DataFrame
res_candidates_out_sorted.head(7).sort_values(by='Std', ascending=True)

Unnamed: 0,Mean,Median,Std,Average
model_checkpoint_bc_4200.pth,0.511368,0.52743,0.126842,0.519399
model_checkpoint_bc_4300.pth,0.51161,0.5267,0.127489,0.519155
model_checkpoint_bc_5400.pth,0.511544,0.525496,0.12774,0.51852
model_checkpoint_bc_5500.pth,0.511611,0.525863,0.127778,0.518737
model_checkpoint_bc_5300.pth,0.511501,0.52569,0.127909,0.518595
model_checkpoint_bc_5000.pth,0.511622,0.526172,0.12841,0.518897
model_checkpoint_bc_4800.pth,0.511552,0.526645,0.128568,0.519099


In [None]:
# further test more granular grid checkpoints
ckpt_candidates = ["model_checkpoint_bc_4150.pth","model_checkpoint_bc_4250.pth","model_checkpoint_bc_4350.pth","model_checkpoint_bc_5350.pth","model_checkpoint_bc_5450.pth"]

In [None]:
res_candidates = {}
for cand in ckpt_candidates:
    print(cand)
    checkpoint = torch.load(cand, map_location=device)
    print(checkpoint.keys())

    # Model instance
    GameInferModel = MyModel()
    # Optimizer
    GameInferoptimizer = optim.SGD(GameInferModel.parameters())

    GameInferModel.to(device)

    # Load model state
    GameInferModel.load_state_dict(checkpoint["model_state_dict"])
    # Load optimizer state
    GameInferoptimizer.load_state_dict(checkpoint["optimizer_state_dict"])
    GameInferModel.eval()

    res_tmp = [0 , 0 , 0]
    with torch.no_grad():
        for tst_index, (tst_inputs, tst_labels) in enumerate(test_loader):
            if tst_index % 5 == 0:
                tst_labels = tst_labels.to(device)
                aux_tensor = construct_auxiliary_tensor(tst_inputs, tst_labels, 0.2)
                aux_tensor = aux_tensor.to(device)
                tst_outputs, tst_outputs_reveal_tensor = GameInferModel(tst_inputs, aux_tensor, device=device)

                top_values, top_indices = torch.topk(tst_labels - tst_outputs_reveal_tensor, k=5, dim=1)
                # Convert indices to letters (a-z)
                letters = [[chr(97 + idx) for idx in row] for row in top_indices.tolist()]

                top_values_pred, top_indices_pred = torch.topk(tst_outputs - 1e4 * tst_outputs_reveal_tensor, k=5, dim=1)
                # Convert indices to letters (a-z)
                letters_pred = [[chr(97 + idx) for idx in row] for row in top_indices_pred.tolist()]
                # Print results

                # Convert tensors to numpy arrays
                tensor1_np = (tst_labels - tst_outputs_reveal_tensor).cpu().numpy()
                tensor2_np = (tst_outputs - 1e4 * tst_outputs_reveal_tensor).cpu().numpy()
                # Calculate Spearman rank correlations for each row
                spearman_correlations = [spearmanr(row1, row2).correlation for row1, row2 in zip(tensor1_np, tensor2_np)]
                # Compute mean and median of Spearman correlations
                mean_correlation = np.mean(spearman_correlations)
                median_correlation = np.median(spearman_correlations)
                std_correlation = np.std(spearman_correlations)

                res_tmp[0] = res_tmp[0] + mean_correlation
                res_tmp[1] = res_tmp[1] + median_correlation
                res_tmp[2] = res_tmp[2] + std_correlation

        res_tmp[0] = res_tmp[0] / np.ceil(len(test_loader) / 5)
        res_tmp[1] = res_tmp[1] / np.ceil(len(test_loader) / 5)
        res_tmp[2] = res_tmp[2] / np.ceil(len(test_loader) / 5)
        print(res_tmp)
    res_candidates[cand] = res_tmp


model_checkpoint_bc_4150.pth
dict_keys(['model_state_dict', 'optimizer_state_dict', 'global_batch_index', 'epoch'])


  checkpoint = torch.load(cand, map_location=device)


[0.5117686764252046, 0.527170559544205, 0.1274395019568907]
model_checkpoint_bc_4250.pth
dict_keys(['model_state_dict', 'optimizer_state_dict', 'global_batch_index', 'epoch'])
[0.5109352074325573, 0.5250024599811209, 0.127469496943653]
model_checkpoint_bc_4350.pth
dict_keys(['model_state_dict', 'optimizer_state_dict', 'global_batch_index', 'epoch'])
[0.5038032005808346, 0.516660685031892, 0.12690066339074427]
model_checkpoint_bc_5350.pth
dict_keys(['model_state_dict', 'optimizer_state_dict', 'global_batch_index', 'epoch'])
[0.5115115663056263, 0.5254624862450021, 0.12775198970379698]
model_checkpoint_bc_5450.pth
dict_keys(['model_state_dict', 'optimizer_state_dict', 'global_batch_index', 'epoch'])
[0.5114842896316943, 0.5255777686760783, 0.12782618232036505]


In [None]:
res_candidates_out = pd.DataFrame(res_candidates).T
res_candidates_out.columns = ['Mean', 'Median', 'Std']

In [None]:
res_candidates_out

Unnamed: 0,Mean,Median,Std
model_checkpoint_bc_4150.pth,0.511769,0.527171,0.12744
model_checkpoint_bc_4250.pth,0.510935,0.525002,0.127469
model_checkpoint_bc_4350.pth,0.503803,0.516661,0.126901
model_checkpoint_bc_5350.pth,0.511512,0.525462,0.127752
model_checkpoint_bc_5450.pth,0.511484,0.525578,0.127826


In [None]:
res_candidates_out = pd.concat( [ pd.concat( [ pd.read_csv('metrics_out_v0201.csv', index_col=0) , pd.read_csv('metrics_out_v0202.csv', index_col=0) ] ) , res_candidates_out ] )

In [None]:
# Sort by the 'Average' column in descending order
res_candidates_out_sorted = res_candidates_out.sort_values(by='Mean', ascending=False)
# Display or use the sorted DataFrame
res_candidates_out_sorted.head(10).sort_values(by='Std', ascending=True)

Unnamed: 0,Mean,Median,Std,Average
model_checkpoint_bc_4150.pth,0.511769,0.527171,0.12744,0.51947
model_checkpoint_bc_4300.pth,0.51161,0.5267,0.127489,0.519155
model_checkpoint_bc_5200.pth,0.511497,0.525412,0.127641,0.518455
model_checkpoint_bc_5400.pth,0.511544,0.525496,0.12774,0.51852
model_checkpoint_bc_5350.pth,0.511512,0.525462,0.127752,0.518487
model_checkpoint_bc_5500.pth,0.511611,0.525863,0.127778,0.518737
model_checkpoint_bc_5450.pth,0.511484,0.525578,0.127826,0.518531
model_checkpoint_bc_5300.pth,0.511501,0.52569,0.127909,0.518595
model_checkpoint_bc_5000.pth,0.511622,0.526172,0.12841,0.518897
model_checkpoint_bc_4800.pth,0.511552,0.526645,0.128568,0.519099


submit model_checkpoint_bc_5500.pth and run remaining practice games

In [None]:
checkpoint = torch.load('model_checkpoint_bc_5500.pth', map_location=device)
checkpoint.keys()

  checkpoint = torch.load('model_checkpoint_bc_5500.pth', map_location=device)


dict_keys(['model_state_dict', 'optimizer_state_dict', 'global_batch_index', 'epoch'])

In [None]:
# Model instance
GameInferModel = MyModel()
# Optimizer
GameInferoptimizer = optim.SGD(GameInferModel.parameters())

In [None]:
GameInferModel.to(device)

MyModel(
  (fc1): Linear(in_features=15, out_features=64, bias=True)
  (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1_5): Linear(in_features=64, out_features=128, bias=True)
  (bn1_5): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc2): Linear(in_features=128, out_features=32, bias=True)
  (bn2): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc3): Linear(in_features=32, out_features=1, bias=True)
  (dropout): Dropout(p=0.25, inplace=False)
  (activation_gelu): GELU(approximate='none')
  (activation_leakyrelu): LeakyReLU(negative_slope=0.05)
  (activation_tanh): Tanh()
  (projection): Linear(in_features=15, out_features=32, bias=True)
)

In [None]:
# Load model state
GameInferModel.load_state_dict(checkpoint["model_state_dict"])
# Load optimizer state
GameInferoptimizer.load_state_dict(checkpoint["optimizer_state_dict"])
GameInferModel.eval()

MyModel(
  (fc1): Linear(in_features=15, out_features=64, bias=True)
  (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1_5): Linear(in_features=64, out_features=128, bias=True)
  (bn1_5): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc2): Linear(in_features=128, out_features=32, bias=True)
  (bn2): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc3): Linear(in_features=32, out_features=1, bias=True)
  (dropout): Dropout(p=0.25, inplace=False)
  (activation_gelu): GELU(approximate='none')
  (activation_leakyrelu): LeakyReLU(negative_slope=0.05)
  (activation_tanh): Tanh()
  (projection): Linear(in_features=15, out_features=32, bias=True)
)

In [None]:
tps = total_practice_successes
print(tps)
amt_this = 250
for i in range(amt_this):
    print('Playing ', i, ' th game')
    api.start_game(practice=1,verbose=True)
    [total_practice_runs,total_recorded_runs,total_recorded_successes,total_practice_successes] = api.my_status() # Get my game stats: (# of tries, # of wins)
    practice_success_rate = total_practice_successes / total_practice_runs
    if (i+1) % 50 == 0:
        print('run %d practice games out of an allotted 100,000. practice success rate so far = %.3f' % (total_practice_runs, practice_success_rate))
        print(round((total_practice_successes - tps) / amt_this , 3 ) )
    # DO NOT REMOVE as otherwise the server may lock you out for too high frequency of requests
    time.sleep(0.5)

[1;30;43m流式输出内容被截断，只能显示最后 5000 行内容。[0m
  Clean word for NN: ____i_i______
top_NN_letters:  [['t', 's', 'c', 'n', 'a']]
Guessing letter: t
Sever response: {'game_id': 'd737d4ac1bf8', 'status': 'ongoing', 'tries_remains': 4, 'word': '_ _ _ _ i _ i _ _ _ _ _ _ '}
  Clean word for NN: ____i_i______
top_NN_letters:  [['s', 'c', 'n', 'a', 'l']]
Guessing letter: s
Sever response: {'game_id': 'd737d4ac1bf8', 'status': 'ongoing', 'tries_remains': 3, 'word': '_ _ _ _ i _ i _ _ _ _ _ _ '}
  Clean word for NN: ____i_i______
top_NN_letters:  [['c', 'n', 'a', 'l', 'r']]
Guessing letter: c
Sever response: {'game_id': 'd737d4ac1bf8', 'status': 'ongoing', 'tries_remains': 3, 'word': 'c _ _ c i _ i _ _ _ _ _ _ '}
  Clean word for NN: c__ci_i______
top_NN_letters:  [['n', 'a', 'o', 'l', 'r']]
Guessing letter: n
Sever response: {'game_id': 'd737d4ac1bf8', 'status': 'ongoing', 'tries_remains': 3, 'word': 'c _ n c i _ i _ _ _ _ _ _ '}
  Clean word for NN: c_nci_i______
top_NN_letters:  [['o', 'a', 'l', 'r

submitted: 8000 cumulative practice runs SO FAR, 0.403 ratio. as shown in last output window, real-time hit rate is around 0.48 for the last round of practice runs.

## Playing recorded games:
Please finalize your code prior to running the cell below. Once this code executes once successfully your submission will be finalized. Our system will not allow you to rerun any additional games.

Please note that it is expected that after you successfully run this block of code that subsequent runs will result in the error message "Your account has been deactivated".

Once you've run this section of the code your submission is complete. Please send us your source code via email.

In [None]:
for i in range(511):
    print('Playing ', i, ' th game')
    # Uncomment the following line to execute your final runs. Do not do this until you are satisfied with your submission
    api.start_game(practice=0,verbose=False)
    # DO NOT REMOVE as otherwise the server may lock you out for too high frequency of requests
    time.sleep(0.5)

[1;30;43m流式输出内容被截断，只能显示最后 5000 行内容。[0m
top_NN_letters:  [['t', 'n', 's', 'l', 'd']]
  Clean word for NN: _a__a___
top_NN_letters:  [['n', 's', 'l', 'd', 'i']]
  Clean word for NN: _a__a___
top_NN_letters:  [['s', 'l', 'd', 'i', 'm']]
  Clean word for NN: sa__a__s
top_NN_letters:  [['l', 'i', 'u', 'b', 'o']]
  Clean word for NN: sal_a__s
top_NN_letters:  [['i', 'u', 'o', 'v', 'y']]
  Clean word for NN: sal_a_is

  Clean word: sal.a.is
Fuzzy match size: 723 with num letters mismatch: 3
from PE version:  m
top_NN_letters:  [['o', 'm', 'c', 'v', 'd']]
  Clean word for NN: sal_a_is

  Clean word: sal.a.is
Fuzzy match size: 723 with num letters mismatch: 6
from PE version:  m
top_NN_letters:  [['m', 'c', 'v', 'd', 'p']]
  Clean word for NN: salma_is

  Clean word: salma.is
Fuzzy match size: 104 with num letters mismatch: 3
from PE version:  c
top_NN_letters:  [['c', 'g', 'd', 'k', 'b']]
Playing  388  th game
  Clean word for NN: ________

  Clean word: ........
from PE version:  e
top_NN_l

HangmanAPIError: {'error': 'You have reached 1000 of games', 'status': 'denied'}

## To check your game statistics
1. Simply use "my_status" method.
2. Returns your total number of games, and number of wins.

In [None]:
[total_practice_runs,total_recorded_runs,total_recorded_successes,total_practice_successes] = api.my_status() # Get my game stats: (# of tries, # of wins)
success_rate = total_recorded_successes/total_recorded_runs
print('overall success rate = %.3f' % success_rate)

overall success rate = 0.412


In [None]:
total_recorded_runs

1000

In [None]:
total_recorded_successes

412