In [1]:
import csv
import numpy as np

# train the input data by threshold 0.0001 of the L1 norm(A+O)
with open ("A_matrix_10.csv" ,'r') as dest_f:
    data_iter = csv.reader(dest_f, delimiter = ',', quotechar = '"') 
    data = [data for data in data_iter] 
    A_array = np.asarray(data, 'f')

with open ("O_matrix_10.csv" ,'r') as dest_f:
    data_iter = csv.reader(dest_f, delimiter = ',', quotechar = '"') 
    data = [data for data in data_iter] 
    O_array = np.asarray(data, 'f')



In [2]:
from HMM_proj2 import HiddenMarkovModel

In [3]:
import codecs
import datetime
import os
import re

# open the file.

def loadShakespeareSonnets():
    sonnets = []
    with open('shakespeare.txt', 'r') as f:
        sonnet = []
        sonnetToAppend = False
        for line in f:
            if line.strip().split(' ')[-1].isdigit():
                sonnetToAppend = True
                continue
            if line == '\n':
                if sonnetToAppend:
                    sonnets.append(sonnet)
                    sonnet = []
                    sonnetToAppend = False
                continue
            sonnet.append([re.sub(r'[^\w\s\']', '', w) for
                           w in line.strip().split(' ')])
        sonnets.append(sonnet)
    f.close()
    return sonnets

sonnet = loadShakespeareSonnets()

def getUniqueWords(sonnets):
    s = set()
    for sonnet in sonnets:
        for sentence in sonnet:
            s |= set([word.lower() for word in sentence])
    return(list(s))

dictionary = getUniqueWords(sonnet)
#print((dictionary))
# i th state = dictionary[i] word
    
convert = dict(zip(dictionary, [i for i in range(len(dictionary))]))

number_sonnet = [[[0 for m in range(len(sonnet[k][j]))] for j in range(len(sonnet[k]))] for k in range(154)]
for k in range(154):
    for j in range(len(sonnet[k])):
        for m in range(len(sonnet[k][j])):
            number_sonnet[k][j][m] = convert[sonnet[k][j][m].lower()]

def flatten(list):
    return [x for sublist in list for x in sublist]
#print(flatten(number_sonnet))

convert = dict(zip(dictionary, [i for i in range(len(dictionary))]))
invert = dict(zip([i for i in range(len(dictionary))], dictionary))

number_sonnet = [[[0 for m in range(len(sonnet[k][j]))] for j in range(len(sonnet[k]))] for k in range(154)]
for k in range(154):
    for j in range(len(sonnet[k])):
        for m in range(len(sonnet[k][j])):
            number_sonnet[k][j][m] = convert[sonnet[k][j][m].lower()]

def flatten(list):
    return [x for sublist in list for x in sublist]
#print(flatten(number_sonnet))





frequency = [0 for i in range(len(dictionary))]
for k in range(154):
    for j in range(len(sonnet[k])):
        for m in range(len(sonnet[k][j])):
            frequency[number_sonnet[k][j][m]] +=1


# counting syllables. 
import pronouncing

# this is the elementary way to syllable counting. Only apply to the case the word is not in the nltk library. 
def easy_syllable_count(text):
    count = 0
    vowels = 'aeiouy'
    text = text.lower().strip(".:;?!)(")
    if text[0] in vowels:
        count += 1
    for index in range(1, len(text)):
        if text[index] in vowels and text[index-1] not in vowels:
            count += 1
    if text.endswith('e'):
        count -= 1
    if text.endswith('le'):
        count += 1
    if text.endswith('es'):
        count -= 1
    if count == 0:
        count += 1
    count = count - (0.1*count)
    return (round(count))


def syllable_count(word):
    test = pronouncing.phones_for_word(word)
    if(test==[]):
        return easy_syllable_count(word)
    else:
        return pronouncing.syllable_count(test[0]) # using nltk library.
        


# determining the stress states if we know the starting stress and the number of syllables.

# this determines the starting state is stress or unstress for each word.
def start(k,j,m):
    save = int(sum(number[k][j][0:m]))
    if (save//2)*2 == save: return 'x'
    else: return '/'



# if you put word starting condition and total length, it will give the stress states.
# if you put '/' and '3', it gives '/x/'
def function(start, number):
    answer = []
    number = int(number)
    if start == '/':
        for i in range(number):
            if (i//2)*2 ==i: answer.append('/')
            else : answer.append('x')
    else:
        for i in range(number):
            if (i//2)*2 ==i: answer.append('x')
            else : answer.append('/')
    x = ''.join([str(xi) for xi in answer])
    return x
    
# saving the number of syllables for each word
number = [[[0 for m in range(len(sonnet[k][j]))] for j in range(len(sonnet[k]))] for k in range(154)]

# saving stress states on each syllable. 
syllable = [[['' for m in range(len(sonnet[k][j]))] for j in range(len(sonnet[k]))] for k in range(154)]

for k in range(154):
    for j in range(len(sonnet[k])):
        for m in range(len(sonnet[k][j])):
            number[k][j][m] = syllable_count(sonnet[k][j][m])
            syllable[k][j][m] = function(start(k,j,m), number[k][j][m])




sonnet_flat = flatten(flatten(sonnet))
stress_flat = flatten(flatten(syllable))
low_sonnet = [x.lower() for x in sonnet_flat]
stress_dictionary = dict(zip(low_sonnet, stress_flat))



In [4]:
HMM = HiddenMarkovModel(A_array,O_array)

In [57]:
for iterate in range(1):
    x=HMM.generate_emission()
 
    poem = [invert[i] for i in x]
    meter = [stress_dictionary[invert[i]] for i in x]
    print(poem)
    

['known', 'crave', "another's", 'wont', 'idolatry']


In [389]:
##############################################
# function that returns the last phoneme containing the last vowel 
###################################################
def rhyme_check(word): 
    length = len(word[0])
    start_idx = -1
    i = 1
    while(start_idx == -1):
        # inspecting phoneme backward 
        # if the function detects number which corresponds to a vowel, we save that index 
        if word[0][length - i].isdigit() == True:
            end_idx = length -i
            start_idx = end_idx -1
            # from the saved end index, we go back until we find the start of the corresponding phoneme.
            while((word[0][start_idx] != ' ') & (start_idx != 0) ):
                start_idx = start_idx -1
            # if the detected phoneme is in the first block, you get start_idx 
            # if the detected phonem is not placed at the first place, you get start_idx +1
            if start_idx != 0:
                start_idx = start_idx +1
        i = i + 1
            
    
    return word[0][start_idx: end_idx+1]   


####################
# first generate a b c d e f g and save the end-word from each line.
####################
end_word = []
first_poem =[]
count =1
while(len(end_word) != 7):
    
    x=HMM.generate_emission()
    
    idx = x[len(x)-1]
    if pronouncing.phones_for_word(invert[x[len(x)-1]]) != []:
        first_poem.append(x)
        end_word.append(invert[x[len(x)-1]])
    #print(count)
    count +=1

######################
# obtain rhyme  
#####################    
#print(end_word, len(end_word))
rhyme_block =[] 
for i in range(7):
    #print(i)
    end_word_phonemes = pronouncing.phones_for_word(end_word[i])
    #print(end_word_phonemes)
    rhyme_block.append(rhyme_check(end_word_phonemes))
    
#print(rhyme_block)

#############################
# Now add rhyme in the poem.
#############################
second_poem = []
count = 0
while(len(second_poem) != 7):
    
    x=HMM.generate_emission()
    endword = invert[x[len(x)-1]]
    #print(endword)
    step_a = pronouncing.phones_for_word(endword)
    #print(step_a)
    
    if step_a != []:
        step_b = rhyme_check(step_a)
        #print(step_b)
        if step_b == rhyme_block[count]:
            second_poem.append(x)
            count = count +1
            #print(count)
            
            
            
############################
# Now we have complete poem and need to combine them
############################

for i in range(0,3):
    counter = 0 
    while counter <len(first_poem[2*i]):
        print(invert[first_poem[2*i][counter]], end=" ")
        counter += 1
    
    print(" ")

    counter = 0 
    while counter <len(first_poem[2*i+1]):
        print(invert[first_poem[2*i+1][counter]], end=" ")
        counter += 1

    print(" ")
    counter = 0
    while counter <len(second_poem[2*i]):
        print(invert[second_poem[2*i][counter]], end=" ")
        counter += 1
    
    print(" ")

    counter = 0
    while counter <len(second_poem[2*i+1]):
        print(invert[second_poem[2*i+1][counter]], end=" ")
        counter += 1

    print(" ")
    
counter = 0 
while counter <len(first_poem[6]):
    print(invert[first_poem[6][counter]], end=" ")
    counter += 1

print(" ")
counter = 0
while counter <len(second_poem[6]):
    print(invert[second_poem[6][counter]], end=" ")
    counter += 1
    
print(" ")


another's moiety struck another's white  
richer some debtor well vouchsafe surmount  
o'ergreen wiry wretch unbred delights  
whoever art's indeed abundant drowns  
kill lacked wide give refusest ignorance  
whom please feasts hateth bases counterfeit  
indeed another's showers general  
he's merits please another's counterfeit  
remain decays another's victors thrusts  
tibey hath willing legacy please curse  
another's victors wires merit won  
selfkilled beguiled shook another's curse  
hopes breast another's cheer under doubting  
exceeds alive unkind beyond instinct  
