In [10]:
from pandas import DataFrame
import random
from collections import defaultdict

In [74]:
def create_generated_line_dictionary(in_paths, poem_type):
    lines = {'keyword': [],
             'line1': [],
             'line2': [],
             'line3': [],
             'line4': [],
             'line5': [],
             'score': []}
    for in_path in in_paths:
        in_txt = open(in_path, 'rb')

        counter = 0
        # We only write to csv if it is the first poem in the 
        for line in in_txt:
            line = line.rstrip().decode('utf-8')
            # Process keyword
            if len(line.split(':')) == 2:
                category, data = line.split(':')
                if category == 'prompt':
                    lines['keyword'].append(data)
                elif category == 'score':
                    lines['score'].append(data)
            # Write line to dictionary
            elif len(line) >= 5:
                # Process line
                line = line.strip()
                line = line[0].upper() + line[1:]
                lines['line' + str(counter+1)].append(line)
                counter += 1
                counter %= 5
    poem_count = len(lines['line1'])
    lines['type'] = [poem_type] * poem_count
    print("Dictionary created with " + str(poem_count) + " poems")
    return lines

In [7]:
def create_line_dictionary(in_paths, poem_type):
    lines = {'line1': [],
             'line2': [],
             'line3': [],
             'line4': [],
             'line5': []}
    for in_path in in_paths:
        in_txt = open(in_path, 'rb')

        counter = 0
        for line in in_txt:
            line = line.rstrip().decode('utf-8')
            if len(line) < 5 or len(line.split()) == 1:
                continue
            lines['line' + str(counter+1)].append(line)
            counter += 1
            counter %= 5
    poem_count = len(lines['line1'])
    lines['type'] = [poem_type] * poem_count
    print("Dictionary created with " + str(poem_count) + " poems")
    return lines

def print_single_poem(poem_dict, index):
    for key in poem_dict:
        print(poem_dict[key][index])

In [76]:
# HUMAN = create_line_dictionary(['py_files/saved_objects/limericks.txt'], 'HUMAN')
# BEST_GENERATED_POEM = create_line_dictionary(['py_files/saved_objects/curated_poems.txt'], 'GENERATED')

DTS_NO_STORY = create_generated_line_dictionary(['automatic_no_story.txt'], 'DTS_NO_STORY')
DTS_STORY = create_generated_line_dictionary(['automatic_story.txt'], 'DTS_STORY')
SINGLE_NO_STORY = create_generated_line_dictionary(['baseline_automatic_no_story.txt'], 'SINGLE_NO_STORY')
SINGLE_STORY = create_generated_line_dictionary(['baseline_automatic_story.txt'], 'SINGLE_STORY')

print("------EXAMPLE------")
print_single_poem(DTS_STORY, 10)

Dictionary created with 98 poems
Dictionary created with 79 poems
Dictionary created with 95 poems
Dictionary created with 58 poems
------EXAMPLE------
holiday
There was a bright grandma named Laure
Who spent all her money in a store.
It was for her birthday,
But it was the last day,
So she gave up and drank on the shore.
-2.763656878110134
DTS_STORY


In [33]:
def get_poem_list(dictionary, index, batch):
    return [[dictionary[key][i] for key in dictionary] for i in range(index, index + batch)]

def export_poetic_measure_experiment(batch, DTS_NO_STORY, DTS_STORY, SINGLE_NO_STORY, SINGLE_STORY, name=""):
    length = min(len(DTS_NO_STORY['line1']), len(DTS_STORY['line1']), len(SINGLE_NO_STORY['line1']), len(SINGLE_STORY['line1']))
    index = 0
    while index + batch <= length:
        poem_list = get_poem_list(DTS_NO_STORY, index, batch) + get_poem_list(DTS_STORY, index, batch) + \
                    get_poem_list(SINGLE_NO_STORY, index, batch) + get_poem_list(SINGLE_STORY, index, batch)
        random.shuffle(poem_list)
        index += batch
        out_path = 'csv/poetic_measure_'+ name + str(int(index/batch)) + '.csv'
        df = DataFrame(poem_list, columns= ['keyword', 'line1', 'line2', 'line3', 'line4', 'line5', 'score', 'type'])
        export_csv = df.to_csv (out_path, index = None, header=True)

def export_turing_test_experiment(batch, HUMAN, GENERATED, name=""):
    length = min(len(HUMAN['line1']), len(GENERATED['line1']))
    index = 0
    while index + batch <= length:
        poem_list = get_poem_list(HUMAN, index, batch) + get_poem_list(GENERATED, index, batch)
        random.shuffle(poem_list)
        index += batch
        out_path = 'csv/turing_test_'+ name + str(int(index/batch)) + '.csv'
        df = DataFrame(poem_list, columns= ['line1', 'line2', 'line3', 'line4', 'line5', 'type'])
        export_csv = df.to_csv (out_path, index = None, header=True)

In [82]:
def export_poetic_measure_experiment_new(DTS_NO_STORY, DTS_STORY, SINGLE_NO_STORY, SINGLE_STORY, name=""):
    reorganized_poems = defaultdict(list)
    for poem_dict in [DTS_NO_STORY, DTS_STORY, SINGLE_NO_STORY, SINGLE_STORY]:
        for i in range(len(poem_dict['line1'])):
            reorganized_poems[poem_dict['keyword'][i]].append([poem_dict[key][i] for key in poem_dict])
    output_poems = {}
    
    for keyword in reorganized_poems:
        if len(reorganized_poems[keyword]) == 4:
            output_poems[keyword] = reorganized_poems[keyword]
            
    output_poem_list = []
    for keyword in output_poems:
        poems = []
        for i in random.sample([0,1,2,3],4):
            poems += output_poems[keyword][i]
        output_poem_list.append(poems)
        
    column_names = []
    for i in range(1, 5):
        column = [str(i) + '_' + n for n in ['keyword', 'line1', 'line2', 'line3', 'line4', 'line5', 'score', 'type']]
        column_names += column

    out_path = 'csv/poetic_measure_'+ name + '.csv'
    df = DataFrame(output_poem_list, columns= column_names)
    export_csv = df.to_csv (out_path, index = None, header=True)

In [83]:
export_poetic_measure_experiment_new(DTS_NO_STORY, DTS_STORY, SINGLE_NO_STORY, SINGLE_STORY,'test')

In [17]:
export_poetic_measure_experiment(5, DTS_NO_STORY, DTS_STORY, SINGLE_NO_STORY, SINGLE_STORY)

In [34]:
export_turing_test_experiment(5, HUMAN, BEST_GENERATED_POEM)

In [None]:
length = min(len(DTS_NO_STORY['line1']), len(DTS_STORY['line1']), len(SINGLE_NO_STORY['line1']), len(SINGLE_STORY['line1']))
export_poetic_measure_experiment(length, DTS_NO_STORY, DTS_STORY, SINGLE_NO_STORY, SINGLE_STORY, "complete_")

In [37]:
length = min(len(HUMAN['line1']), len(BEST_GENERATED_POEM['line1']))
export_turing_test_experiment(length, HUMAN, BEST_GENERATED_POEM, "complete_")