In [74]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [79]:
!pip install transformers
!pip install syllabipy
!pip install cmudict



In [80]:
import numpy as np
import pandas as pd

import random
import time
import datetime

import torch
from torch.utils.data import Dataset, random_split, DataLoader, RandomSampler, SequentialSampler

from tqdm import tqdm

from syllabipy.sonoripy import SonoriPy

import cmudict

from tokenizers import ByteLevelBPETokenizer
from tokenizers import BertWordPieceTokenizer

from transformers import GPT2Tokenizer, GPT2LMHeadModel, GPT2Config, AdamW, get_linear_schedule_with_warmup
from transformers import BertTokenizer

In [81]:
# EOT = "<|endoftext|>"
# EOL = "\n"

# EOT = "\n"
# EOL = "-"

EOT = "$"
EOL = "-"

In [82]:
cmudict = cmudict.dict() # Compatible with NLTK

In [83]:
sentence = "as a soup bisque is best when served hot"

def sentence_to_syllables(sentence):
    sentence_syllables = []
    for word in sentence.split():
        word_syllables = SonoriPy(word)
        sentence_syllables.extend(word_syllables)
    return sentence_syllables

print(sentence_to_syllables(sentence))

['as', 'a', 'soup', 'bis', 'que', 'is', 'best', 'when', 'ser', 'ved', 'hot']


In [84]:
def syllable_count(word):
    count = 0
    vowels = "aeiouy"
    if word[0] in vowels:
        count += 1
    for index in range(1, len(word)):
        if word[index] in vowels and word[index - 1] not in vowels:
            count += 1
    if word.endswith("e"):
        count -= 1
    if count == 0:
        count += 1
    return count

In [85]:
def legal_syllables_lens(syllables_lens):
    assert len(syllables_lens) == 5
    len1 = syllables_lens[0]
    len2 = syllables_lens[1]
    len3 = syllables_lens[2]
    len4 = syllables_lens[3]
    len5 = syllables_lens[4]

    # if (abs(len1 - len2) > 2 or abs(len1 - len5) > 2 or abs(len2 - len5) > 2 or abs(len3 - len4) > 2):
    if (min(len1, len2, len5) < max(len3, len4)):
        return False
    else:
        return True

def legal_last_phonemes(last_phonemes):
    assert len(last_phonemes) == 5
    p1 = last_phonemes[0]
    p2 = last_phonemes[1]
    p3 = last_phonemes[2]
    p4 = last_phonemes[3]
    p5 = last_phonemes[4]

    return p1 == p2 and p1 == p5 and p2 == p5 and p3 == p4

In [86]:
# input = "capn jack was washed over the side\n\
# his crew searched but found not hair nor hide\n\
# no longer the helm\n\
# but the deep benthic realm\n\
# is where jack will forever reside"

# input = "as a soup bisque is best when served hot\n\
# made with lobster it hits the right spot\n\
# i think it tastes dreamy\n\
# its so rich and creamy\n\
# its the soup youd be served on a yacht"

# input = "simply add to the grasp of a rhesus\n\
# the antithesis psychokinesis\n\
# and i guarantee\n\
# its a sure phd\n\
# i wont write it though im antithesis\n"

input = "a smiling young fellow from spain\n\
fell asleep while aloft in a plane\n\
in spite of his smile it\n\
was he who was pilot\n\
he never went flying again"


input = "0: if theres trouble - and leaving the boy in a bubble - on account of a spouse - whos in love with the house - you can bet him in heat with a bubble"

def legal_limerick(input):
    sentences = input.split(EOL)
    sentences = list(filter(lambda sentence: sentence != "", sentences))
    sentences = list(filter(lambda sentence: sentence != " ", sentences))

    # Rule 1: Must have 5 Sentences
    if len(sentences) != 5:
        # print("Sentence Length is not Five")
        return 0

    # Rule 2: Sentence 1/2/5 and 3/4 have same syllables length
    syllables_lens = []
    for sentence in sentences:
        count = 0;
        for word in sentence.split():
            count += syllable_count(word)
        syllables_lens.append(count)
    # print(syllables_lens)
    # if (not legal_syllables_lens(syllables_lens)):
    #     print("Sentence syllables length do not meet limerick requirement")
    #     print(sentences)
    #     print(syllables_lens)
    #     return 0

    # Rule 3: Sentence 1/2/5 and 3/4 have same rhyme (last phoneme)
    last_phonemes = []
    for sentence in sentences:
        try:
            last_word = sentence.split()[-1]
            last_phoneme = cmudict[last_word][0][-1]
        except:
            # print("Extract last phoneme failed")
            return -1
        last_phonemes.append(last_phoneme)
    if (not legal_last_phonemes(last_phonemes)):
        # print("Last phonemes do not meet limerick requirement")
        # print(sentences)
        # print(last_phonemes)
        return 0

    return 1

print(legal_limerick(input))

1


In [87]:
inputs = [
"capn jack was washed over the side\n\
his crew searched but found not hair nor hide\n\
no longer the helm\n\
but the deep benthic realm\n\
is where jack will forever reside",

"as a soup bisque is best when served hot\n\
made with lobster it hits the right spot\n\
i think it tastes dreamy\n\
its so rich and creamy\n\
its the soup youd be served on a yacht",

"simply add to the grasp of a rhesus\n\
the antithesis psychokinesis\n\
and i guarantee\n\
its a sure phd\n\
i wont write it though im antithesis",

"abeds where you sleep in the night\n\
unless you and your wife had a fight\n\
then for you its the couch\n\
next time dont be a grouch\n\
just permit her to think she was right",

"a smiling young fellow from spain\n\
fell asleep while aloft in a plane\n\
in spite of his smile it\n\
was he who was pilot\n\
he never went flying again"
]


def limerick_passing_rate(inputs):
    num = 0
    legal_num = 0
    for input in inputs:
        result = legal_limerick(input)
        if result == 1:
            legal_num += 1
            num += 1
        elif result == 0:
            num += 1
    print("legal_num = ", legal_num)
    print("num = ", num)
    return legal_num / num

print(limerick_passing_rate(inputs))

legal_num =  0
num =  5
0.0


In [95]:
# input_path = '/content/gdrive/MyDrive/pr/limericks_input.txt'
# input_path = '/content/gdrive/MyDrive/pr/ours.txt'
# input_path = '/content/gdrive/MyDrive/pr/baseline.txt'
input_path = '/content/gdrive/MyDrive/pr/baseline4.txt'

with open(input_path) as f_in:
    all_text = f_in.read()
    inputs = all_text.split(EOT)
    inputs = list(filter(lambda input: input != EOL, inputs))
    # print(inputs)

    passing_rate = limerick_passing_rate(inputs)
    print(passing_rate)

legal_num =  30
num =  75
0.4


In [None]:
def syllable_count(word):
    count = 0
    vowels = "aeiouy"
    if word[0] in vowels:
        count += 1
    for index in range(1, len(word)):
        if word[index] in vowels and word[index - 1] not in vowels:
            count += 1
    if word.endswith("e"):
        count -= 1
    if count == 0:
        count += 1
    return count

In [None]:
print(syllable_count("what"))

1
