In [None]:
import json

In [None]:
candidates = []

path = '/home/nazar/ParlAI/data/WoZ/'
fnames = ['woz_train_en.json', 'woz_validate_en.json', 'woz_test_en.json']

for fname in fnames:

    with open(path+fname, "r") as f:
        data = json.load(f)

    for dialogue in data:
        for line in dialogue['dialogue']:
            for el in [':'.join(turn_labels) for turn_labels in line['turn_label']]:
                candidates.append(el) 

In [None]:
candidates = set(candidates)

In [None]:
with open('labels_full.dict', 'a') as f:
    for el in candidates:
        f.write(el+'\n')

# dialog babi task 5 checking labels

In [None]:
labels = []
with open('../../../data/dialog-bAbI/dialog-bAbI-tasks/dialog-babi-task5-full-dialogs-trn.txt') as f:
    for line in f.readlines():
        if '\t' in line:
            strings = line.strip().split('\t')
            labels.append(
                strings[-1]
            )

In [None]:
labels = set(labels)

In [None]:
len(labels)

In [None]:
len(labels)

In [None]:
with open('task5/candidates.txt', 'a') as f:
    for el in labels:
        f.write(el+'\n')

In [None]:
from parlai.scripts.display_data import DisplayData

In [None]:
DisplayData.main(
    task='fromfile:parlaiformat',
    fromfile_datapath='flow_data/testflow',
    fromfile_datatype_extension=True
)

In [None]:
from parlai.scripts.interactive import Interactive

Interactive.main(
    model_file='task5-3/pos_enc/memnn_dialog_babi',
    eval_candidates='fixed',
    fixed_candidates_path='task5-3/test_cand.txt',
    # repeat_blocking_heuristic=False,
)

# Creating test set with greetings

In [None]:
# text:hello	labels:hi, how are you?	episode_done:True

# text:hi	labels:hello hello	episode_done:True

In [None]:
response = 'Good day, [Sir/Madam]! Can I speak with [NAME], please?'

with open('flow_data/responses/greetings.txt') as greetings:
    with open('flow_data/testflow_train.txt', 'a') as dataset:
        for line in greetings.readlines():
            dataset.write(
                f'text:{line.strip()}\tlabels:{response}\tepisode_done:True\n\n'
            )


# storyline class

In [3]:
import os

test_stucture = {
    '0':['1'],
    '1':['1a', '1b', '1c'],
    '1a':['2a'],
    '1b':['2b'],
    '1c':['2c'],
    '2b':['2b-1', '2b-2', '2b-3'],
    '2b-1':['3b-1'],
    '2b-2':['3b-2'],
    '2b-3':['3b-3'],
    '2a':['2a-2', '2a-3'],
    # '2a':['2a-1', '2a-2', '2a-3'],
    # '2a-1':['3a-1'],
    '2a-2':['5a-2'],
    '2a-3':['3a-3'],
    '3a-3':['4a-3-a', '4a-3-b'],
    '4a-3-a':['5a-2'],
    '4a-3-b':['5a-3']
}

class Story():
    def __init__(
        self,
        structure=test_stucture,
        folder='flow_data/responses',
        dataset_path='flow_data/flow_train_2.txt',
        candidates_outpath='flow_test/candidates.txt'
    ) -> None:
        self.folder = folder
        self.structure = structure
        self.dataset_path = dataset_path
        self.candidates_outpath = candidates_outpath

        self.scripts = []
        self.recursive_build_scripts('', '0')

        self.stories = []
        self.build_stories()


    def add_connection(self, story, out_node):
        return story + out_node + ' '

    def recursive_build_scripts(self, story, inp):
        story = self.add_connection(story, inp)        
        connections = self.structure.get(inp)
        try:
            for node in connections:
                self.recursive_build_scripts(story, node)
        except:
            self.scripts.append(
                story
            )
    
    def recursive_build_story(self, story, nodes):
        try:
            filename = f'response_{nodes[0]}.txt'
            path = os.path.join(self.folder, filename)
            with open(path, 'r') as f:
                lines = f.readlines()
                if len(lines) == 0:
                    self.stories.append(story)
                else:
                    for line in lines:
                        self.recursive_build_story(
                            story + line,
                            nodes[1:]
                        )
        except:
            self.stories.append(story)

    def build_stories(self):
        for script in self.scripts:
            nodes = script.split()
            self.recursive_build_story('', nodes)


    def __getitem__(self, idx):
        return self.stories[idx]
    
    def __len__(self):
        return len(self.stories)

    def story_to_ds_format(self, story):
        lines = story.split('\n')
        string = ''
        prefix = ['text:', 'labels:']

        for i, line in enumerate(lines[:-2]):
            if i%2!=0:
                string += prefix[1]+line+'\n'
            else:
                string += prefix[0]+line + '\t'
        

        string += prefix[1] + lines[-2] + '\tepisode_done:True\n\n'
        return string

    def build_dataset(self):
        with open(self.dataset_path, 'w') as f:
            for story in self.stories:
                f.write(self.story_to_ds_format(story))

    def build_candidates(self, outpath=None):
        all_labels = []
        for story in self.stories:
            lines = story.split('\n')
            labels = lines[1::2]
            all_labels.extend(labels)

        all_labels = list(set(all_labels))
        
        if outpath is None:
            outpath = self.candidates_outpath
        
        with open(outpath, 'w') as f:
            for cand in all_labels:
                f.write(cand+'\n')

In [None]:
stories = Story()

In [None]:
stories.build_dataset()

In [None]:
stories.build_candidates()

In [4]:
nodes = []
for key, vals in test_stucture.items():
    nodes.append(key)
    for el in vals:
        nodes.append(el)

nodes = set(nodes)

In [6]:
import os

for subset in ['train', 'valid']:
    for node in nodes:
        folderpath = 'flow_data/responses'
        filepath = os.path.join(folderpath, subset, f'response_{node}.txt')
        if not os.path.exists(filepath):
            open(filepath, 'w').close()

In [None]:
from parlai.scripts.build_candidates import BuildCandidates

In [None]:
BuildCandidates.main(
    datapath='flow_data',
    task='testflow:train2',
    outfile='flow_test/candidates.txt'
)