In [42]:
from hangman import hangman, get_dictionary
import wordfreq
import json
from decimal import Decimal, getcontext

In [68]:

big_dictionary = get_dictionary("words.txt")

freq_dict = {}
for word in big_dictionary:
    word = word.lower()
    weight = Decimal(wordfreq.word_frequency(word, "en", wordlist='small', minimum=0.0))
    if weight > 0.0 and len(word) > 1:
        freq_dict[word] = float(weight)  

with open('webppl-model/word_freq.json', 'w') as f:
    json.dump(freq_dict, f, indent=2)


In [49]:
getcontext().prec = 50
weight = Decimal(wordfreq.word_frequency("moonrise", "en", wordlist='small', minimum=0.0))
print(weight)
print(weight > 0.0)

0
False


In [69]:


letter_freq = {}
for word in freq_dict:
    for letter in word:
        if letter in letter_freq:
            letter_freq[letter] += freq_dict[word]
        else:
            letter_freq[letter] = freq_dict[word]
            

total = sum(letter_freq.values())
for letter in letter_freq:
    letter_freq[letter] /= total
    
with open('webppl/letter_freq.json', 'w') as f:
    json.dump(letter_freq, f, indent=2)

FileNotFoundError: [Errno 2] No such file or directory: 'webppl/letter_freq.json'

In [70]:
""" turn csv into data of the form 

var partial = ["_", "o", "_", "_", "e", "_", "_", "_", "_"]
var in_letters = ["o", "e"]
var not_in_letters = ["s"]
var previousGuesses = in_letters.concat(not_in_letters)

var data = [
  {
    previousGuesses: previousGuesses,
    actualGuess: "l",
    partialWordPattern: partial,
    word: "wonderful"
  },
]


where each guess is piece of data. each row of the csv is a series of guesses eg. 
8	calendar	e	t 	a	l	n	i	o	s	r	c	d	
""" 
import csv
import json

def make_data_from_csv(csv_file_path, data_file_path):
   
    def update_partial(partial, word, guess):
        return [char if char == guess or char in partial else "_" for char in word]

    output_data = []

    with open(csv_file_path, "r") as csv_file:
        csv_reader = csv.reader(csv_file, delimiter="\t")
        next(csv_reader) 
        for row in csv_reader:
            row = row[0].split(",")
            
            if len(row) < 4 or row[0] == "":
                continue

            step_number, word, *guesses = row
            word = word.strip().lower()
            broken_words = ["moonrise"]
            if word in broken_words:
                continue
            guesses = [guess.strip() for guess in guesses if guess.strip()]

        
            partial = ["_" for _ in word]
            in_letters = []
            not_in_letters = []

      
            for i, guess in enumerate(guesses):
                
                data_point = {
                    "previousGuesses": in_letters + not_in_letters,
                    "actualGuess": guess,
                    "partialWordPattern": partial.copy(), 
                    "word": word
                }

          
                output_data.append(data_point)

             
                if guess in word:
                    in_letters.append(guess)
                    partial = update_partial(partial, word, guess)
                else:
                    not_in_letters.append(guess)

  
    with open(data_file_path, "w") as output_file:
        json.dump(output_data, output_file, indent=2)



In [65]:
make_data_from_csv("dataset.csv", "webppl-model/data.js")

In [71]:

def make_successful_unsuccessful_data(data_file_path, successful_data_file_path, unsuccessful_data_file_path):
    with open(data_file_path, "r") as data_file:
        data = json.load(data_file)

  
    successful_data = [
        {
            "previousGuesses": d["previousGuesses"],
            "actualGuess": d["actualGuess"],
            "partialWordPattern": [char if char in d["previousGuesses"] else "_" for char in d["word"]],
            "word": d["word"]
        }
        for d in data if d["actualGuess"] in d["word"]
    ]

    unsuccessful_data = [
        {
            "previousGuesses": d["previousGuesses"],
            "actualGuess": d["actualGuess"],
            "partialWordPattern": [char if char in d["previousGuesses"] else "_" for char in d["word"]],
            "word": d["word"]
        }
        for d in data if d["actualGuess"] not in d["word"]
    ]

    with open(successful_data_file_path, "w") as output_file:
        json.dump(successful_data, output_file, indent=2)

    
    with open(unsuccessful_data_file_path, "w") as output_file:
        json.dump(unsuccessful_data, output_file, indent=2)


In [55]:
make_successful_unsuccessful_data("webppl-model/data.js", "webppl-model/successful_data.js", "webppl-model/unsuccessful_data.js")

In [56]:
import json

def split_data_by_game_step(data_file_path, output_folder_path):
    """
    Splits the game data into steps of the game (e.g., first guess, second guess) 
    and saves them as separate files.

    Args:
        data_file_path (str): Path to the input data file (JSON format).
        output_folder_path (str): Path to the folder where step-wise files will be saved.

    Output:
        Saves separate JSON files for each step in the game.
    """
    with open(data_file_path, "r") as data_file:
        data = json.load(data_file)

    
    steps = {}

    
    for record in data:
        step = len(record["previousGuesses"]) + 1  
        if step not in steps:
            steps[step] = []
        steps[step].append(record)

 
    for step, guesses in steps.items():
        output_file_path = f"{output_folder_path}/step_{step}.json"
        with open(output_file_path, "w") as output_file:
            json.dump(guesses, output_file, indent=2)

    print(f"Data successfully split into steps and saved in {output_folder_path}.")


In [58]:
split_data_by_game_step("webppl-model/data.js", "webppl-model/data_steps")

Data successfully split into steps and saved in webppl-model/data_steps.


In [72]:

def split_data_by_word(data_file_path, output_folder_path):
    """
    Splits the game data into steps of the game (e.g., first guess, second guess) 
    and saves them as separate files.

    Args:
        data_file_path (str): Path to the input data file (JSON format).
        output_folder_path (str): Path to the folder where step-wise files will be saved.

    Output:
        Saves separate JSON files for each step in the game.
    """
    with open(data_file_path, "r") as data_file:
        data = json.load(data_file)

 
    words = {}

    for record in data:
        word = record["word"]
        if word not in words:
            words[word] = []
        words[word].append(record)

    for word, guesses in words.items():
        output_file_path = f"{output_folder_path}/{word}.json"
        with open(output_file_path, "w") as output_file:
            json.dump(guesses, output_file, indent=2)

    print(f"Data successfully split into words and saved in {output_folder_path}.")

In [66]:
split_data_by_word("webppl-model/data.js", "webppl-model/data_words")

Data successfully split into words and saved in webppl-model/data_words.


In [67]:

#print each word