# Homework 1 — Natural Language Processing



In [1]:
import nltk
import random

In [None]:
nltk.download()

## Task 1.
* Write a function which picks rhymes for a word using CMU Pronouncing    Dictionary (nltk.corpus.cmudict). Two words usually rhyme if their pronunciation from the stressed syllable till the end of the word is the same.

In [4]:
all_dataset = nltk.corpus.cmudict.entries()
target_dataset = [entry for entry in all_dataset if len(entry[0]) > 2 and entry[0].isalpha()]

# Main Function for adding rhymes of a word into a list
def rhymes_for_word(input_w):
    all_rhymes = []
    input_pron = [pron for word, pron in target_dataset if word == input_w]
    for i_p in input_pron:
        
        index_stress = stress_index_finder(i_p)
        
        for word, pron in target_dataset:
            if len(pron[index_stress:]) != len(i_p[index_stress:]):
                continue
            if stress_index_finder(pron) == index_stress:
                if pron[index_stress:] == i_p[index_stress:]:
                    all_rhymes.append(word)
                    
    if len(all_rhymes) == 0:
        return "No rhyme founded!"   
    return all_rhymes

# Function for finding if the word is stressed or not
def if_stressed_word(inp):
    if '1' in list(inp) or '2' in list(inp):
        return True
    return False

# Function for finding the index of stress
def stress_index_finder(inp):
    for i in range(len(inp)):
        if if_stressed_word(inp[i]):
            return i

### Test our program
* You can call the finction rhymes_for_word(input) with an input argument such a word.

In [5]:
rhymes_for_word('book')

['book',
 'chook',
 'cook',
 'cooke',
 'gook',
 'hook',
 'hooke',
 'look',
 'mook',
 'nook',
 'rook',
 'rooke',
 'schook',
 'shook',
 'took',
 'tooke',
 'zook']

## Task 2.
* Improve our text generator using trigrams (nltk.trigram) instead of bigrams. The idea is to select the next word based on two previous words, not just one. It is acceptable if you have to start the generation from two initial words instead of one. Apply the generator to texts from different corpora.

### Hint for Testing Program:
* Just run the following cell to execute the procedure.

In [31]:
trigrams = list(nltk.trigrams(nltk.corpus.genesis.words('english-kjv.txt')))
cfd = nltk.ConditionalFreqDist(((w1, w2), w3) for w1, w2, w3 in trigrams)

def trigram_generator(cfdist, word1, word2, num = 50):
    word_list = [word1, word2]
    print(word_list[0], end = ' ')
    for i in range(num):
        w1 = word_list[i]
        w2 = word_list[i + 1]
        print(word_list[-1], end = ' ')
        lucky_word = [w for w, f in cfdist[w1, w2].most_common()]
        frequency = [f for w, f in cfdist[w1, w2].most_common()]
        word = random.choices(lucky_word, weights = frequency)[0]
        word_list.append(word)

### Test our program
* You can call the finction trigram_generator(inputs) with corresponding input arguments.

In [36]:
cfd['God', 'created']
trigram_generator(cfd, 'God','created')

God created man , the father of the famine was in the days of thy life ; It shall come to pass , that I have lift up her eyes upon him , Thou shalt be over my cattle , and said , I have blessed him , Bethel , he 

## Task 3.
* Write a code for Hangman game (https://en.wikipedia.org/wiki/Hangman_(game)). The code should select a random word from a dictionary (e. g. nltk.corpus.words) and show it to the user, replacing letters with dots. The user has to guess the word, naming one letter per move. If the named letter is there within the word, then all its occurrences are shown, otherwise the user loses an attempt. The user wins if (s)he opens all the letters before all attempts are spent, otherwise (s)he fails. You do not have to draw the hangman, just count the attempts left.

### Hint for Testing Program:
* Just run the following cells to execute the procedure.

In [37]:
def get_word():
    word = random.choice(nltk.corpus.words.words())
    return word.upper()

def play(word):
    word_completion = "_" * len(word)
    guessed = False
    guessed_letters = []
    guessed_words = []
    tries = len(word)
    print('*' * 100)
    print('\n')
    print("Let's play Hangman!")
    print(f'\nLength of the word: ' + str(len(word)))
    print(f'Possible chance: ' + str(tries))
    print(f'Word: ' + word_completion)
    print("\n")
    while not guessed and tries > 0:
        print('-' * 50)
        guess = input("Please guess a letter or word: ").upper()
        if len(guess) == 1 and guess.isalpha():
            if guess in guessed_letters:
                print("You already guessed the letter", guess)
            elif guess not in word:
                print(guess, "is not in the word.")
                tries -= 1
                guessed_letters.append(guess)
            else:
                print("Good job,", guess, "is in the word!")
                guessed_letters.append(guess)
                word_as_list = list(word_completion)
                indices = [i for i, letter in enumerate(word) if letter == guess]
                for index in indices:
                    word_as_list[index] = guess
                word_completion = "".join(word_as_list)
                if "_" not in word_completion:
                    guessed = True
        elif len(guess) == len(word) and guess.isalpha():
            if guess in guessed_words:
                print("You already guessed the word", guess)
            elif guess != word:
                print(guess, "is not the word.")
                tries -= 1
                guessed_words.append(guess)
            else:
                guessed = True
                word_completion = word
        else:
            print("Not a valid guess.")
        print("\n")
        print(f'Remaining chance: ' + str(tries))
        print(word_completion)
        print("\n")
    if guessed:
        print("Congrats, you guessed the word! You win!")
    else:
        print("Sorry, you ran out of tries. The word was " + word + ". Maybe next time!")

def main():
    word = get_word()
    play(word)
    while input("Play Again? (Y/N) ").upper() == "Y":
        word = get_word()
        play(word)

if __name__ == "__main__":
    main()

****************************************************************************************************


Let's play Hangman!

Length of the word: 6
Possible chance: 6
Word: ______


--------------------------------------------------
Please guess a letter or word: q
Q is not in the word.


Remaining chance: 5
______


--------------------------------------------------
Please guess a letter or word: w
W is not in the word.


Remaining chance: 4
______


--------------------------------------------------
Please guess a letter or word: e
Good job, E is in the word!


Remaining chance: 4
_____E


--------------------------------------------------
Please guess a letter or word: r
R is not in the word.


Remaining chance: 3
_____E


--------------------------------------------------
Please guess a letter or word: t
T is not in the word.


Remaining chance: 2
_____E


--------------------------------------------------
Please guess a letter or word: y
Y is not in the word.


Remaining chance: 1
_