In [2]:
import json
import itertools
from itertools import product
from itertools import permutations
import pandas

import spacy
nlp = spacy.load("en_core_web_lg")

def yieldPermutations(definition):
    for t in product(*definition):
        yield "".join(t)

def checkWord(word):
    return word in words

def filterQuery(queryList):
    filtered_query = []
    for token in queryList:
        for elem in nlp(token):
            if elem.pos_ not in ['PRON', 'ADP', 'PROPN'] and not elem.tag_ in ['VBZ', 'VBD'] and not elem.is_stop and not elem.is_punct:
                filtered_query.append(elem.text)
    return filtered_query

def filterWords(wordList):
    filtered_wordList = []
    for token in wordList:
        for elem in nlp(token):
            if elem.pos_ not in ['PRON', 'ADP'] and not elem.is_stop and not elem.is_punct:
                filtered_wordList.append(elem.text)
    return filtered_wordList

def computeQueryList(puzzleId):
    comment = df['Comment'][puzzleId]
    question = df['Question'][puzzleId]
    query = comment.lower()+question.lower()
    list_new = query.split()
    return filterQuery(list_new)

def compareSimilarity(queryList, filteredList):
    similarity_list = []
    for token1 in queryList:
        for token2 in filteredList:
            if nlp(token1) and nlp(token2) and nlp(token1).vector_norm and nlp(token2).vector_norm:
                similarity_list.append((token1, token2, nlp(token1).similarity(nlp(token2))))
        
    return list(sorted(similarity_list, key=lambda x: x[2], reverse = True))

def probableWords(filteredList, puzzleId):
    
    #Checking contextual similarity of the word with the query
    queryList = computeQueryList(puzzleId)
    sorted_similarity_list = compareSimilarity(queryList, filteredList)
    
    #Picking top 50% of the values for computation
    top_solutions = sorted_similarity_list[: int(len(sorted_similarity_list) * 0.2)]

    probable_words = []
    for each in top_solutions:
        if each[1] not in probable_words:
            probable_words.append(each[1])
    return probable_words

def remainingLetters(originalList, wordList):
    res = [ele for ele in originalList] 
    for a in wordList: 
        if a in originalList: 
            res.remove(a) 
    return res

def validWordCombinationGenerator(probableList, letters_list, reqWordLengths):
    remaining_store = letters_list
    resultList = []
    
    for token in probableList:
        remaining_letters = remainingLetters(letters_list, list(token))

        inter_remaining_store = remaining_letters
        possibleWords = generateNLengthWords(remaining_letters, reqWordLengths[0], puzzleId)
        if len(possibleWords) >=1 and not sorted((token, possibleWords[0])) in resultList:
                resultList.append(sorted((token, possibleWords[0])))

        if len(possibleWords)>0:
            for each in possibleWords:
                letters_list = inter_remaining_store
                remaining_letters = remainingLetters(letters_list, list(each))
                possible_words2 = generateNLengthWords(remaining_letters, reqWordLengths[0], puzzleId)
                if len(possible_words2) >=1 and not sorted((token, each, possible_words2[0])) in resultList:
                    resultList.append(sorted((token, each, possible_words2[0])))

        letters_list = remaining_store
    return resultList

def generateNLengthWords(lettersList, wordLength, puzzleId):
    pairs = [''.join(combo) for combo in itertools.permutations(lettersList, wordLength)]
    
    #Filtering to check if it is a valid english word and has positive frequency distrubtion from json dictionary
    filtered_pairs = set(filter(lambda elem: checkWord(elem) and data[elem]>=0, pairs))
    
    # Filtering to eliminate punctuations, stop-words, irrelevant words based on POS
    filtered_words = filterWords(filtered_pairs)
    
    #Sorting to have highest probable terms based on frequency distribution
    sorted_filtered_list = list(sorted(filtered_words, key=lambda x: data[x]))
    return probableWords(sorted_filtered_list, puzzleId)

def generateValidCombinations(relatedList, lettersList, solutionWordLengths):
    #finalSet = set()
    finalList = []
    for index in solutionWordLengths:
        finalList.append(tuple(validWordCombinationGenerator(relatedList, lettersList, solutionWordLengths)))
    
    #finalSet = set(finalList)
    return finalList

def preProcessCsv(puzzleId):
    jumbled_words = df['JumbledWords'][puzzleId].split(';')
    jumbled_words = [x.strip() for x in jumbled_words]
    return jumbled_words

def preProcessWord(jumbledWord):
    jumbledWord = jumbledWord.strip('"''")(').replace("'", '').split(',')
    letter_positions_str = jumbledWord[1][2:-1].split()       
    return jumbledWord[0], list(map(int, letter_positions_str))

def preProcessReqWordLength(puzzleId):
    word_lengths_str = df['RequiredWordLengths'][puzzleId][1:-1].split()
    return sorted(list(map(int, word_lengths_str)), reverse = True)
    
anagram_dict ={}


# Opening JSON file
with open('./freq_dict.json') as json_file:
        data = json.load(json_file)

        df = pandas.read_csv('./puzzleData.csv')
        puzzleId = int(input('Enter a puzzle number '))
        
        words = data.keys()

        #creating list of anagrams as keys
        anagram_keys = [''.join(sorted(word.strip().lower())) for word in list(words)]

        #looping through 'data' dictionary to create anagaram_dict
        for key, value in data.items():
            new_key = ''.join(sorted(key))
            if new_key in anagram_dict.keys():
                anagram_dict[new_key].append((key, value))
            else:
                anagram_dict[new_key] = [(key, value)]

        possible_options = []
        endResultHintsList = []
        endResultPossibleHintCombinations = []
        endResultHintLetters = []
        jumbled_words = preProcessCsv(puzzleId)
 
        for jumbledWord in jumbled_words:
            jumbledWord, letter_positions = preProcessWord(jumbledWord)
            endResultPossibleHintCombinations = []
            
            #Check jumbled word
            if ''.join(sorted(jumbledWord)) in anagram_dict.keys():
                possible_options = anagram_dict[''.join(sorted(jumbledWord))]
            else:
                continue
            print("\nPossible options: ",  possible_options)
       
            #Retrieve all the positional hint letters
            for option in possible_options:
                endResultHintLetters = []
                option_letters = list(option[0])
                for position in letter_positions:
                    endResultHintLetters.append(option_letters[position-1])                      
                endResultPossibleHintCombinations.append(endResultHintLetters)
            endResultHintsList.append(endResultPossibleHintCombinations)
        
        possible_combinations = list(itertools.product(*endResultHintsList))
        finalResult = []
        
        for tuple_combination in possible_combinations:
            #Permutations of letter lists
            letters_list = [letter for arr in tuple_combination for letter in arr]

            reqWords = preProcessReqWordLength(puzzleId)
            initialList = tuple(generateNLengthWords(letters_list, reqWords[0], puzzleId))
            
            if len(reqWords) == 1:
                finalResult.append(initialList)
            else:
                if len(generateValidCombinations(initialList, letters_list, reqWords[1:])) > 0:
                    finalResult.append(generateValidCombinations(initialList, 
                              letters_list,
                              reqWords[1:]))               
                         
        print('Final Result: ', finalResult)
            

        
        
        

Enter a puzzle number 3

Possible options:  [('dinky', 0)]

Possible options:  [('agile', 0), ('galei', 0), ('agiel', 0)]

Possible options:  [('encore', 0)]

Possible options:  [('devout', 0)]
Final Result:  [('addition',), ('toddling',), ('addition',)]
