In [None]:
import json

In [None]:
candidates = []

path = '/home/nazar/ParlAI/data/WoZ/'
fnames = ['woz_train_en.json', 'woz_validate_en.json', 'woz_test_en.json']

for fname in fnames:

    with open(path+fname, "r") as f:
        data = json.load(f)

    for dialogue in data:
        for line in dialogue['dialogue']:
            for el in [':'.join(turn_labels) for turn_labels in line['turn_label']]:
                candidates.append(el) 

In [None]:
candidates = set(candidates)

In [None]:
with open('labels_full.dict', 'a') as f:
    for el in candidates:
        f.write(el+'\n')

# dialog babi task 5 checking labels

In [None]:
labels = []
with open('../../../data/dialog-bAbI/dialog-bAbI-tasks/dialog-babi-task5-full-dialogs-trn.txt') as f:
    for line in f.readlines():
        if '\t' in line:
            strings = line.strip().split('\t')
            labels.append(
                strings[-1]
            )

In [None]:
labels = set(labels)

In [None]:
len(labels)

In [None]:
len(labels)

In [None]:
with open('task5/candidates.txt', 'a') as f:
    for el in labels:
        f.write(el+'\n')

In [None]:
from parlai.scripts.display_data import DisplayData

In [None]:
DisplayData.main(
    task='fromfile:parlaiformat',
    fromfile_datapath='flow_data/testflow',
    fromfile_datatype_extension=True
)

In [None]:
from parlai.scripts.interactive import Interactive

Interactive.main(
    model_file='task5-3/pos_enc/memnn_dialog_babi',
    eval_candidates='fixed',
    fixed_candidates_path='task5-3/test_cand.txt',
    # repeat_blocking_heuristic=False,
)

# Creating test set with greetings

In [None]:
# text:hello	labels:hi, how are you?	episode_done:True

# text:hi	labels:hello hello	episode_done:True

In [None]:
response = 'Good day, [Sir/Madam]! Can I speak with [NAME], please?'

with open('flow_data/responses/greetings.txt') as greetings:
    with open('flow_data/testflow_train.txt', 'a') as dataset:
        for line in greetings.readlines():
            dataset.write(
                f'text:{line.strip()}\tlabels:{response}\tepisode_done:True\n\n'
            )


# storyline class

In [11]:
import os

test_stucture = {
    '0':['1'],
    '1':['1a', '1b', '1c'],
    '1a':['2a'],
    '1b':['2b'],
    '1c':['2c'],
    '2b':['2b-1', '2b-2', '2b-3'],
    '2b-1':['3b-1'],
    '2b-2':['3b-2'],
    '2b-3':['3b-3'],
    '2a':['2a-2', '2a-3'],
    # '2a':['2a-1', '2a-2', '2a-3'],
    # '2a-1':['3a-1'],
    '2a-2':['5a-2'],
    '2a-3':['3a-3'],
    '3a-3':['4a-3-a', '4a-3-b'],
    '4a-3-a':['5a-2'],
    '4a-3-b':['5a-3']
}

class Story():
    def __init__(
        self,
        structure=test_stucture,
        folder='flow_data/flow',
        version=3,
        # dataset_path='flow_data/flow/flow_train_2.txt',
        candidates_outpath='flow_data/flow/candidates.txt'
    ) -> None:
        self.folder = folder
        self.structure = structure

        self.types = ['train', 'valid']
        self.prefix = 'flow'
        # self.dataset_path = dataset_path
        self.version = version
        self.candidates_outpath = candidates_outpath

        self.scripts = []
        self.recursive_build_scripts('', '0')

        self.stories = {suffix:[] for suffix in self.types}
        self.build_stories()


    def add_connection(self, story, out_node):
        return story + out_node + ' '

    def recursive_build_scripts(self, story, inp):
        story = self.add_connection(story, inp)        
        connections = self.structure.get(inp)
        try:
            for node in connections:
                self.recursive_build_scripts(story, node)
        except:
            self.scripts.append(
                story
            )
    
    def recursive_build_story(self, story, nodes, suffix):
        try:
            filename = f'response_{nodes[0]}.txt'
            path = os.path.join(self.folder, 'responses', suffix, filename)
            with open(path, 'r') as f:
                lines = f.readlines()
                if len(lines) == 0:
                    self.stories[suffix].append(story)
                else:
                    for line in lines:
                        self.recursive_build_story(
                            story + line,
                            nodes[1:],
                            suffix
                        )
        except:
            self.stories[suffix].append(story)

    def build_stories(self):
        for suffix in self.types:
            for script in self.scripts:
                nodes = script.split()
                self.recursive_build_story('', nodes, suffix)


    # def __getitem__(self, idx):
    #     return self.stories[idx]
    
    # def __len__(self):
    #     return len(self.stories)

    def to_ParlAIDialogTeacher_format(self, story):
        lines = story.split('\n')
        string = ''
        prefix = ['text:', 'labels:']

        for i, line in enumerate(lines[:-2]):
            if i%2!=0:
                string += prefix[1]+line+'\n'
            else:
                string += prefix[0]+line + '\t'
        

        string += prefix[1] + lines[-2] + '\tepisode_done:True\n\n'
        return string

    
    def to_FbDeprecatedDialogTeacher_format(self, story):
        lines = story.split('\n')
        string = ''
        counter = 1
        # from itertools import cycle
        # sep = cycle(['\t', '\n'])
        # for line in lines[:-1]:
        #     string += line + next(sep)
        
        for i in range((len(lines)-1)//2):
            string += str(counter) + ' ' + lines[2*i] + '\t' + lines[2*i+1] + '\n'
            counter += 1

        string += '\n'
        return string


    def story_to_ds_format(self, story, teacher='fb'):
        if teacher == 'parali':
            return self.to_ParlAIDialogTeacher_format(story)
        elif teacher == 'fb':
            return self.to_FbDeprecatedDialogTeacher_format(story)
        

    def build_dataset(self):
        for suffix in self.types:
            path = self.dataset_path(suffix)
            with open(path, 'w') as f:
                for story in self.stories[suffix]:
                    f.write(self.story_to_ds_format(story))

    def build_candidates(self, outpath=None):
        all_labels = []
        for key in self.stories.keys():
            for story in self.stories[key]:
                lines = story.split('\n')
                labels = lines[1::2]
                all_labels.extend(labels)

        all_labels = list(set(all_labels))
        
        if outpath is None:
            outpath = self.candidates_outpath
        
        with open(outpath, 'w') as f:
            for cand in all_labels:
                f.write(cand+'\n')

    def dataset_path(self, suffix):
        filename = '_'.join([self.prefix, suffix, str(self.version)]) + '.txt'
        return os.path.join(self.folder, filename)




In [12]:
stories = Story()

In [13]:
story = stories.stories['train'][0]

In [14]:
stories.to_FbDeprecatedDialogTeacher_format(story)

"1 hello\tgood day, can I speak with [NAME], please?\n2 yeah, here he is\tmy boss, with whom you spoke about the loan, asked me to call you back and clarify when you plan to make a payment, for how much and in which bank?\n3 i don't know\t[ESCALATING FURTHER]\n\n"

In [15]:
print(stories.to_FbDeprecatedDialogTeacher_format(story))

1 hello	good day, can I speak with [NAME], please?
2 yeah, here he is	my boss, with whom you spoke about the loan, asked me to call you back and clarify when you plan to make a payment, for how much and in which bank?
3 i don't know	[ESCALATING FURTHER]




In [16]:
stories.build_dataset()

In [39]:
stories.build_candidates()

In [4]:
nodes = []
for key, vals in test_stucture.items():
    nodes.append(key)
    for el in vals:
        nodes.append(el)

nodes = set(nodes)

In [6]:
import os

for subset in ['train', 'valid']:
    for node in nodes:
        folderpath = 'flow_data/responses'
        filepath = os.path.join(folderpath, subset, f'response_{node}.txt')
        if not os.path.exists(filepath):
            open(filepath, 'w').close()

In [None]:
from parlai.scripts.build_candidates import BuildCandidates

BuildCandidates.main(
    datapath='flow_data',
    task='testflow:train2',
    outfile='flow_test/candidates.txt'
)

In [17]:
from parlai.scripts.display_data import DisplayData

# DisplayData.main(
#     # task='flow:flow:2:datapath=flow_data,dialog_babi:task:5',
#     # task='dialog_babi:task:5',
#     task='flow:flow:2:candidates=fixed:fcp=flow_data/candidates.txt:datapath=flow_data',
#     init_opts='flow_test/flow_test/model/memnn_prtr.dict.opt',
#     allow_missing_opts=True,
#     # verbose=True,
#     # display_add_fields='label_candidates',
# )

DisplayData.main(
    task='flow:flow:3',
    verbose=True,
    datapath='flow_data',
    # display_add_fields='label_candidates',
)

18:31:20 | Opt:
18:31:20 |     allow_missing_init_opts: False
18:31:20 |     batchsize: 1
18:31:20 |     datapath: flow_data
18:31:20 |     datatype: train:ordered
18:31:20 |     dict_class: None
18:31:20 |     display_add_fields: 
18:31:20 |     download_path: None
18:31:20 |     dynamic_batching: None
18:31:20 |     hide_labels: False
18:31:20 |     ignore_agent_reply: True
18:31:20 |     image_cropsize: 224
18:31:20 |     image_mode: raw
18:31:20 |     image_size: 256
18:31:20 |     init_model: None
18:31:20 |     init_opt: None
18:31:20 |     is_debug: False
18:31:20 |     loglevel: info
18:31:20 |     max_display_len: 1000
18:31:20 |     model: None
18:31:20 |     model_file: None
18:31:20 |     multitask_weights: [1]
18:31:20 |     mutators: None
18:31:20 |     num_examples: 10
18:31:20 |     override: "{'task': 'flow:flow:3', 'verbose': True, 'datapath': 'flow_data'}"
18:31:20 |     parlai_home: /home/nazar/ParlAI
18:31:20 |     starttime: Jul27_18-31
18:31:20 |     task: flow:f

In [23]:
from parlai.scripts.eval_model import EvalModel

EvalModel.main(
    # task='flow:flow:2:datapath=flow_data,dialog_babi:task:5',
    # task='dialog_babi:task:5',
    task='flow:flow:2',
    init_opt='model/memnn_prtr.dict.opt',
    allow_missing_init_opts=True,
    datapath='../flow_data',
    # datatype='train:evalmode',
)

17:23:23 | [33mThe "evaltask" key in model/memnn_prtr.dict.opt will not be loaded, because it does not exist in the target opt.[0m
17:23:23 | [33mThe "eval_batchsize" key in model/memnn_prtr.dict.opt will not be loaded, because it does not exist in the target opt.[0m
17:23:23 | [33mThe "eval_dynamic_batching" key in model/memnn_prtr.dict.opt will not be loaded, because it does not exist in the target opt.[0m
17:23:23 | [33mThe "num_workers" key in model/memnn_prtr.dict.opt will not be loaded, because it does not exist in the target opt.[0m
17:23:23 | [33mThe "num_epochs" key in model/memnn_prtr.dict.opt will not be loaded, because it does not exist in the target opt.[0m
17:23:23 | [33mThe "max_train_time" key in model/memnn_prtr.dict.opt will not be loaded, because it does not exist in the target opt.[0m
17:23:23 | [33mThe "max_train_steps" key in model/memnn_prtr.dict.opt will not be loaded, because it does not exist in the target opt.[0m
17:23:23 | [33mThe "log_every_n

{'exs': SumMetric(22),
 'accuracy': ExactMatchMetric(0.3636),
 'f1': F1Metric(0.4208),
 'bleu-4': BleuMetric(0.3182),
 'hits@1': AverageMetric(0.3636),
 'hits@5': AverageMetric(0.5455),
 'hits@10': AverageMetric(0.9545),
 'hits@100': AverageMetric(0.9545),
 'llen': AverageMetric(13.41),
 'ltrunc': AverageMetric(0),
 'ltrunclen': AverageMetric(0),
 'loss': AverageMetric(5.404),
 'rank': AverageMetric(5.133),
 'mrr': AverageMetric(0.3234),
 'exps': GlobalTimerMetric(514.4),
 'ltpb': GlobalAverageMetric(73.75),
 'ltps': GlobalTimerMetric(6913),
 'ctpb': GlobalAverageMetric(21.5),
 'ctps': GlobalTimerMetric(2017),
 'tpb': GlobalAverageMetric(95.25),
 'tps': GlobalTimerMetric(8936),
 'gpu_mem': GlobalAverageMetric(0.000112)}

In [10]:
from parlai.scripts.eval_model import EvalModel

EvalModel.main(
    task='flow:flow:2:datapath=../flow_data,dialog_babi:task:5',
    # task='dialog_babi:task:5',
    # task='flow:flow:2',
    model_file='model/memnn_prtr',
)

17:39:05 | [33mOverriding opt["datatype"] to valid (previously: train)[0m
17:39:05 | [33mOverriding opt["task"] to flow:flow:2:datapath=../flow_data,dialog_babi:task:5 (previously: flow:flow:train:2)[0m
17:39:05 | Using CUDA
17:39:05 | loading dictionary from model/memnn_prtr.dict
17:39:05 | num words = 148
17:39:05 | Total parameters: 19,968 (19,968 trainable)
17:39:05 | Loading existing model parameters from model/memnn_prtr
17:39:05 | Loading fixed candidate set from candidates.txt
17:39:05 | Loading fixed candidate set vectors from model/memnn_prtr.candidates.vecs
17:39:05 | Loading fixed candidate set encodings from model/memnn_prtr.candidates.encs
17:39:05 | Opt:
17:39:05 |     adafactor_eps: '[1e-30, 0.001]'
17:39:05 |     adam_eps: 1e-08
17:39:05 |     add_p1_after_newln: True
17:39:05 |     aggregate_micro: False
17:39:05 |     allow_missing_init_opts: False
17:39:05 |     area_under_curve_class: None
17:39:05 |     area_under_curve_digits: -1
17:39:05 |     batchsize: 100

{'flow:flow:2:datapath=../flow_data/exs': SumMetric(22),
 'exs': SumMetric(1.848e+04),
 'flow:flow:2:datapath=../flow_data/accuracy': ExactMatchMetric(0.3636),
 'flow:flow:2:datapath=../flow_data/f1': F1Metric(0.4208),
 'flow:flow:2:datapath=../flow_data/bleu-4': BleuMetric(0.3182),
 'flow:flow:2:datapath=../flow_data/hits@1': AverageMetric(0.3636),
 'flow:flow:2:datapath=../flow_data/hits@5': AverageMetric(0.5909),
 'flow:flow:2:datapath=../flow_data/hits@10': AverageMetric(0.9545),
 'flow:flow:2:datapath=../flow_data/hits@100': AverageMetric(0.9545),
 'flow:flow:2:datapath=../flow_data/llen': AverageMetric(13.41),
 'flow:flow:2:datapath=../flow_data/ltrunc': AverageMetric(0),
 'flow:flow:2:datapath=../flow_data/ltrunclen': AverageMetric(0),
 'flow:flow:2:datapath=../flow_data/loss': AverageMetric(3.996),
 'flow:flow:2:datapath=../flow_data/rank': AverageMetric(3.952),
 'flow:flow:2:datapath=../flow_data/mrr': AverageMetric(0.5167),
 'exps': GlobalTimerMetric(34.13),
 'ltpb': GlobalAv