In [2]:
from pandas import DataFrame
import random

In [3]:
def create_generated_line_dictionary(in_paths, poem_type):
    lines = {'keyword': [],
             'line1': [],
             'line2': [],
             'line3': [],
             'line4': [],
             'line5': [],
             'score': []}
    for in_path in in_paths:
        in_txt = open(in_path, 'rb')

        counter = 0
        curr_keyword = None
        # We only write to csv if it is the first poem in the 
        should_write_to_csv = False
        for line in in_txt:
            line = line.rstrip().decode('utf-8')
            # Process keyword
            if len(line.split(':')) == 2:
                new_keyword, score = line.split(':')
                if new_keyword != curr_keyword:
                    should_write_to_csv = True
                    lines['keyword'].append(new_keyword)
                    lines['score'].append(score)
                else:
                    should_write_to_csv = False
                curr_keyword = new_keyword
            # Write line to dictionary
            elif len(line) >= 5 and should_write_to_csv:
                # Process line
                line = line.strip()
                line = line[0].upper() + line[1:]
                if not line[-1].isalpha():
                    line = ' '.join(line.split()[:-1]) + line[-1]
                lines['line' + str(counter+1)].append(line)
                counter += 1
                counter %= 5
    poem_count = len(lines['line1'])
    lines['type'] = [poem_type] * poem_count
    print("Dictionary created with " + str(poem_count) + " poems")
    return lines

In [10]:
def create_line_dictionary(in_paths, poem_type):
    lines = {'line1': [],
             'line2': [],
             'line3': [],
             'line4': [],
             'line5': []}
    for in_path in in_paths:
        in_txt = open(in_path, 'rb')

        counter = 0
        for line in in_txt:
            line = line.rstrip().decode('utf-8')
            if len(line) < 5 or len(line.split()) == 1:
                continue
            lines['line' + str(counter+1)].append(line)
            counter += 1
            counter %= 5
    poem_count = len(lines['line1'])
    lines['type'] = [poem_type] * poem_count
    print("Dictionary created with " + str(poem_count) + " poems")
    return lines

def print_single_poem(poem_dict, index):
    for key in poem_dict:
        print(poem_dict[key][index])

In [15]:
HUMAN = create_line_dictionary(['py_files/saved_objects/limericks.txt'], 'HUMAN')
DTS_NO_STORY = create_generated_line_dictionary(['new_final_testing_DTS_no_story1/25_30_0.1_multi_True_no_story.txt'], 'DTS_NO_STORY')
DTS_STORY = create_generated_line_dictionary(['new_final_testing_DTS_storyline1/25_30_0.1_multi_True_original.txt'], 'DTS_STORY')
SINGLE_NO_STORY = create_generated_line_dictionary(['final_testing_single_no_story1/20_30_0.1_5_True_no_story.txt'], 'SINGLE_NO_STORY')
SINGLE_STORY = create_generated_line_dictionary(['new_final_testing_single_no_story3/25_30_0.1_single_True_no_story.txt'], 'SINGLE_STORY')
BEST_GENERATED_POEM = create_line_dictionary(['py_files/saved_objects/curated_poems.txt'], 'GENERATED')

print("------EXAMPLE------")
print_single_poem(BEST_GENERATED_POEM, 36)

Dictionary created with 87 poems
Dictionary created with 59 poems
Dictionary created with 51 poems
Dictionary created with 58 poems
Dictionary created with 60 poems
Dictionary created with 48 poems
------EXAMPLE------
There was a lively man named Clyde
Who stayed a long time on our side.
He came back from the sea,
He was able to see,
He saw through the clouds and looked outside.
GENERATED


In [16]:
def get_poem_list(dictionary, index, batch):
    return [[dictionary[key][i] for key in dictionary] for i in range(index, index + batch)]

def export_poetic_measure_experiment(batch, DTS_NO_STORY, DTS_STORY, SINGLE_NO_STORY, SINGLE_STORY, name=""):
    length = min(len(DTS_NO_STORY['line1']), len(DTS_STORY['line1']), len(SINGLE_NO_STORY['line1']), len(SINGLE_STORY['line1']))
    index = 0
    while index + batch <= length:
        poem_list = get_poem_list(DTS_NO_STORY, index, batch) + get_poem_list(DTS_STORY, index, batch) + \
                    get_poem_list(SINGLE_NO_STORY, index, batch) + get_poem_list(SINGLE_STORY, index, batch)
        random.shuffle(poem_list)
        index += batch
        out_path = 'csv/poetic_measure_'+ name + str(int(index/batch)) + '.csv'
        df = DataFrame(poem_list, columns= ['keyword', 'line1', 'line2', 'line3', 'line4', 'line5', 'score', 'type'])
        export_csv = df.to_csv (out_path, index = None, header=True)

def export_turing_test_experiment(batch, HUMAN, GENERATED, name=""):
    length = min(len(HUMAN['line1']), len(GENERATED['line1']))
    index = 0
    while index + batch <= length:
        poem_list = get_poem_list(HUMAN, index, batch) + get_poem_list(GENERATED, index, batch)
        random.shuffle(poem_list)
        index += batch
        out_path = 'csv/turing_test_'+ name + str(int(index/batch)) + '.csv'
        df = DataFrame(poem_list, columns= ['line1', 'line2', 'line3', 'line4', 'line5', 'type'])
        export_csv = df.to_csv (out_path, index = None, header=True)

In [17]:
export_poetic_measure_experiment(5, DTS_NO_STORY, DTS_STORY, SINGLE_NO_STORY, SINGLE_STORY)

In [18]:
export_turing_test_experiment(5, HUMAN, BEST_GENERATED_POEM)

In [None]:
length = min(len(DTS_NO_STORY['line1']), len(DTS_STORY['line1']), len(SINGLE_NO_STORY['line1']), len(SINGLE_STORY['line1']))
export_poetic_measure_experiment(length, DTS_NO_STORY, DTS_STORY, SINGLE_NO_STORY, SINGLE_STORY, "complete_")