# Practice 2. ConvLab을 이용한 Pipelined 대화 시스템 구축

# 목차

## Step 1. MultiWOZ 데이터셋을 살펴보자

## Step 2. ConvLab을 활용해 Pipelined 대화 시스템을 구축하자

## Step 3. ConvLab에서 제공하는 모듈들로 모델들을 구성 및 진단하고, 평가하자

## Additional. End-to-end Neural Pipeline (ACL 2020) 모델을 사용해보자

# Step 1. MultiWOZ 데이터셋을 살펴보자

## Step 1.0 필요한 module들을 정의합니다

아래 코드를 도와주는 module들 이며, 필요시 수정할 수 있습니다.

In [2]:
import json
import os
import zipfile
import sys
from collections import Counter
from nltk.tokenize import word_tokenize

from textwrap import indent
from pprint import pformat
from pprint import pprint

def read_zipped_json(filepath, filename):
    archive = zipfile.ZipFile(filepath, 'r')
    return json.load(archive.open(filename))


def pprint_manual(user_manual, name):
    """print user manual
        argument 'name' is needed to discriminate 'WOZ' from others
    """
    print('    User manual (message) : ')
    if 'WOZ' in name:
        print(" "*8, user_manual)
    else:
        for manual_one in user_manual:
            print(" "*8, manual_one)


def pprint_goal(goal, name):
    """print user's goal
        argument 'name' is needed to discriminate "WOZ" from others.
    """
    if 'WOZ' in name:
        pass
    else:
        for i, mes in enumerate(goal['message']):
            mes = mes.replace('<span class=\'emphasis\'>', '')
            mes = mes.replace('</span>', '')
            goal['message'][i] = mes

    print("[Goals]")
    user_manual = None
    for key, value in goal.items():
        if not value:           # empty
            continue
        elif key == 'message':  # user manual
            user_manual = value
        else:                   # valid domain
            domain = key        
            print(indent(pformat({domain : value}), ' '*4))
    pprint_manual(user_manual, name)
    

def get_valid_domains(goal):
    """return valid domains for pretty print"""
    domains = []
    for key, value in goal.items():
        if not value:           # empty
            continue
        elif key == 'message':  # user manual
            continue
        else:                   # valid domain
            domains.append(key)
    return domains


def pprint_turns(log, domains):
    """pretty print for dialogue turns"""
    
    # signal for stopping print
    signal = None
    
    for i, log_one in enumerate(log):
        
        # dummy input function for pausing
        print('-' * 20 + '1. Enter to keep going 2. Type \'stop\' and Enter to stop printing ' + '-' * 40)
        signal = input()
        if 'stop' in signal:
            break

        # check whether system turn or not
        bool_sys_turn = False
        if log_one['metadata']:
            bool_sys_turn = True

        # delete span_info
        if 'span_info' in log_one:
            del log_one['span_info']

        # delete unnecessary domains
        domain_pairs = log_one['metadata']
        del_domains = []
        for dom, _ in domain_pairs.items():
            if not dom in domains:
                del_domains.append(dom)
        for dom in del_domains:
            del domain_pairs[dom]
    
        # pretty print
        if bool_sys_turn: print("[SYS]", end=" ")
        else:             print("[USR]", end=" ")
        print("(turn {})".format(i))

        log_one['1. dialogue_state'] = log_one['metadata']
        log_one['2. dialogue_act'] = log_one['dialog_act']
        log_one['3. text'] = log_one['text']
        del log_one['metadata']
        del log_one['dialog_act']
        del log_one['text']
        print(indent(pformat(log_one, width=100), ' ' * 4))
    
    # transform signal to boolean
    if 'stop' in signal:
        signal = True
    else: 
        signal = False
    return signal

### Step 1.1 MultiWOZ 데이터셋을 불러옵니다.

MultiWOZ 데이터셋은 7개의 domain ('hotel', 'train', 'attraction', 'restaurant' 'taxi', 'policy', 'hospital') 으로 구성되어 있으며, 여행정보를 얻고자 하는 'user' 와 이를 도와주는 'system'이 나누는 대화에 대한 데이터 셋입니다.
약 10,000개의 대화 뭉치로 구성되어 있으며, train, validation, test용으로 구분되어 있습니다.

아래 코드를 실행하면, MultiWOZ 데이터 셋 내 train용 데이터 이름 100개가 출력 됩니다.

In [3]:
cur_dir = os.path.abspath(os.curdir)
print("current directory :", cur_dir)
data_dir = "ConvLab-2/data/multiwoz"
processed_data_dir = os.path.join(cur_dir, 'multiwoz_data/all_data')
if not os.path.exists(processed_data_dir):
    os.makedirs(processed_data_dir)

data_key = ['train', 'val', 'test']
data = {}
for key in data_key:
    data[key] = read_zipped_json(os.path.join(data_dir, key + '.json.zip'), key + '.json')
    print('load {}.json...! '.format(os.path.join(data_dir, key)))
    print('number of dialogues : {}'.format(len(data[key])))
print()

# print available dialogue name until 100
print(list(data['train'].keys())[:100])


current directory : /home/donghoon/PycharmProjects/samsung_dialogue_tutorial
load ConvLab-2/data/multiwoz/train.json...! 
number of dialogues : 8434
load ConvLab-2/data/multiwoz/val.json...! 
number of dialogues : 999
load ConvLab-2/data/multiwoz/test.json...! 
number of dialogues : 1000

['SNG01856', 'SNG0129', 'MUL2168', 'SNG01445', 'MUL2105', 'PMUL1690', 'MUL2395', 'SNG0190', 'PMUL1170', 'SNG01741', 'PMUL4899', 'MUL2261', 'SSNG0348', 'MUL0784', 'MUL0886', 'PMUL2512', 'SNG0548', 'MUL1474', 'PMUL4372', 'PMUL4047', 'PMUL0151', 'MUL0586', 'PMUL3552', 'PMUL1539', 'MUL1790', 'PMUL3021', 'SNG0699', 'SNG0228', 'PMUL3296', 'MUL1434', 'PMUL2203', 'PMUL3250', 'PMUL0510', 'MUL1124', 'PMUL3719', 'SNG0297', 'PMUL2049', 'SNG01722', 'PMUL2100', 'MUL1853', 'MUL2694', 'SNG1006', 'SNG1345', 'MUL1299', 'MUL1490', 'PMUL2749', 'MUL1628', 'PMUL2202', 'SNG01450', 'SNG0131', 'SNG0984', 'PMUL1419', 'SNG0798', 'MUL0161', 'PMUL2803', 'MUL0925', 'MUL1005', 'SNG0104', 'SNG1197', 'MUL1265', 'WOZ20259', 'MUL1223',

## Step 1.2 데이터가 어떻게 생겼는지 살펴봅시다.

위의 출력된 데이터 이름들 중 몇개를 파이썬 리스트 안에 삽입하여 dialogue 를 확인할 수 있습니다. (ex. names = \['SNG0943', 'MUL1801'] ))

한 dialogue는 크게

1. user의 goal, (코드 상에서 'goal'))

2. system과 user의 대화, (코드 상에서 'dialogue_turns'))

로 구분 됩니다.

***

user (\[USR]])는 정의된 goal 및 manual을 읽고, 해당 goal을 달성하기 위해 대화를 수행합니다.

system (\[SYS])은 user의 goal을 알지 못하고, 대화를 통해 (1)user가 원하는 조건을 파악하고, (2)user가 원하는 정보를 제공하며, (3)필요시 예약을 수행합니다.

***

goal 내에서,

`info`는 user 입장에서, system에게 user가 원하는 조건 및 니즈를 알려주고자(inform) 하는 내용이고,

`reqt`는 user 입장에서, system에게 uesr가 요청하고자(request) 하는 내용입니다.

***

본 데이터셋의 경우 __system model을 만드는 것__을 목표로 합니다.

***

아래 코드를 실행하면 goal 및 발화를 살펴볼 수 있으며, Enter로 넘길 수 있습니다. 

그만 보고 싶다면 stop을 입력 후 Enter 합니다.

In [4]:
# You can handle dialogue_names
dialogue_names = ['SNG0943', 'MUL1801']

for name in dialogue_names:
    
    print()
    print('-' * 125)
    print("[Dialogue name] \'{}\'".format(name))

    # access datum using name
    datum = data['train'][name]
    goal = datum['goal']
    dialogue_turns = datum['log']

    # print goal and dialogue turns
    pprint_goal(goal, name)
    valid_domains = get_valid_domains(goal)
    break_signal = pprint_turns(dialogue_turns, valid_domains)  # If you don't want to see print, please comment!
    # break_signal = pprint_turns(dialogue_turns, valid_domains)    Like this!

    if break_signal:
        break


-----------------------------------------------------------------------------------------------------------------------------
[Dialogue name] 'SNG0943'
[Goals]
    {'hotel': {'fail_info': {},
               'info': {'internet': 'yes',
                        'parking': 'yes',
                        'stars': '4',
                        'type': 'guesthouse'},
               'reqt': ['address', 'pricerange']}}
    User manual (message) : 
         You are looking for a place to stay. The hotel should have a star of 4 and should include free parking
         The hotel should be in the type of guesthouse and should include free wifi
         Make sure you get address and price range
--------------------1. Enter to keep going 2. Type 'stop' and Enter to stop printing ----------------------------------------

[USR] (turn 0)
    {'1. dialogue_state': {},
     '2. dialogue_act': {'Hotel-Inform': [['Parking', 'yes'], ['Stars', '4']]},
     '3. text': "Howdy ! I 'm in town for the night and ne

# Step 2. ConvLab을 활용해 Pipelined 대화 시스템을 구축하자

## Step 2.0 필요한 module들을 정의합니다

아래 코드를 도와주는 module들 이며, 필요시 수정할 수 있습니다.

In [5]:
# common import: convlab2.$module.$model.$dataset
from convlab2.nlu.jointBERT.multiwoz import BERTNLU
from convlab2.nlu.milu.multiwoz import MILU
from convlab2.dst.rule.multiwoz import RuleDST
from convlab2.policy.rule.multiwoz import RulePolicy
from convlab2.nlg.template.multiwoz import TemplateNLG
from convlab2.dialog_agent import BiSession, Agent # , # PipelineAgent
from convlab2.evaluator.multiwoz_eval import MultiWozEvaluator
from pprint import pprint
import random
import numpy as np
import torch
import spacy

import logging 
# uncessary logging block
mpl_logger = logging.getLogger('matplotlib') 
mpl_logger.setLevel(logging.WARNING) 
cntp_logger = logging.getLogger('urllib3.connectionpool')
cntp_logger.setLevel(logging.WARNING)
ttu_logger = logging.getLogger('transformers.tokenization_utils')
ttu_logger.setLevel(logging.WARNING)
tcu_logger = logging.getLogger('transformers.configuration_utils')
tcu_logger.setLevel(logging.WARNING)
tmu_logger = logging.getLogger('transformers.modeling_utils')
tmu_logger.setLevel(logging.WARNING)
logging.getLogger().setLevel(logging.INFO)
import warnings
warnings.filterwarnings("ignore")

spacy.cli.download('en_core_web_sm')
spacy.load('en_core_web_sm')

from convlab2.nlu import NLU
from convlab2.dst import DST
from convlab2.policy import Policy
from convlab2.nlg import NLG
from copy import deepcopy

class PipelineAgent(Agent):
    """Pipeline dialog agent base class, including NLU, DST, Policy and NLG.

    The combination modes of pipeline agent modules are flexible. The only thing you have to make sure is that
    the API of agents are matched.

    Example:
        If agent A is (nlu, tracker, policy), then the agent B should be like (tracker, policy, nlg) to ensure API
        matching.
    The valid module combinations are as follows:
           =====   =====    ======  ===     ==      ===
            NLU     DST     Policy  NLG     In      Out
           =====   =====    ======  ===     ==      ===
            \+      \+        \+    \+      nl      nl
             o      \+        \+    \+      da      nl
             o      \+        \+     o      da      da
            \+      \+        \+     o      nl      da
             o       o        \+     o      da      da
           =====   =====    ======  ===     ==      ===
    """

    def __init__(self, nlu: NLU, dst: DST, policy: Policy, nlg: NLG, name: str):
        """The constructor of PipelineAgent class.

        Here are some special combination cases:

            1. If you use word-level DST (such as Neural Belief Tracker), you should set the nlu_model paramater \
             to None. The agent will combine the modules automitically.

            2. If you want to aggregate DST and Policy as a single module, set tracker to None.

        Args:
            nlu (NLU):
                The natural langauge understanding module of agent.

            dst (DST):
                The dialog state tracker of agent.

            policy (Policy):
                The dialog policy module of agent.

            nlg (NLG):
                The natural langauge generator module of agent.
        """
        super(PipelineAgent, self).__init__(name=name)
        assert self.name in ['user', 'sys']
        self.opponent_name = 'user' if self.name is 'sys' else 'sys'
        self.nlu = nlu
        self.dst = dst
        self.policy = policy
        self.nlg = nlg
        self.init_session()
        self.history = []

        self.print_nlu = False
        self.print_dst = False
        self.print_pol = False

    def response(self, observation, print_nlu=False, print_dst=False, print_pol=False):
        """Generate agent response using the agent modules."""
        # Note: If you modify the logic of this function, please ensure that it is consistent with deploy.server.ServerCtrl._turn()

        self.print_nlu = print_nlu
        self.print_dst = print_dst
        self.print_pol = print_pol

        if self.dst is not None:
            self.dst.state['history'].append([self.opponent_name, observation]) # [['sys', sys_utt], ['user', user_utt],...]
        self.history.append([self.opponent_name, observation])
        # get dialog act
        if self.nlu is not None:
            self.input_action = self.nlu.predict(observation, context=[x[1] for x in self.history[:-1]])
        else:
            self.input_action = observation
        self.input_action = deepcopy(self.input_action) # get rid of reference problem
        if self.print_nlu:
            print("nlu predict")
            pprint(self.input_action)
        # get state
        if self.dst is not None:
            if self.name is 'sys':
                self.dst.state['user_action'] = self.input_action
            else:
                self.dst.state['system_action'] = self.input_action
            state = self.dst.update(self.input_action)
        else:
            state = self.input_action
        state = deepcopy(state) # get rid of reference problem
        if self.print_dst:
            print("dialogue state predict")
            pprint({'dialogue state': state['belief_state'], 'history': state['history']})
        # get action
        self.output_action = deepcopy(self.policy.predict(state)) # get rid of reference problem
        if self.print_pol:
            print("dialogue act predict")
            pprint(self.output_action)

        # get model response
        if self.nlg is not None:
            model_response = self.nlg.generate(self.output_action)
        else:
            model_response = self.output_action
        # print("model response {}".format(model_response))
        if self.dst is not None:
            self.dst.state['history'].append([self.name, model_response])
            if self.name is 'sys':
                self.dst.state['system_action'] = self.output_action
            else:
                self.dst.state['user_action'] = self.output_action
        self.history.append([self.name, model_response])
        return model_response

    def is_terminated(self):
        if hasattr(self.policy, 'is_terminated'):
            return self.policy.is_terminated()
        return None

    def get_reward(self):
        if hasattr(self.policy, 'get_reward'):
            return self.policy.get_reward()
        return None

    def init_session(self):
        """Init the attributes of DST and Policy module."""
        if self.nlu is not None:
            self.nlu.init_session()
        if self.dst is not None:
            self.dst.init_session()
            if self.name == 'sys':
                self.dst.state['history'].append([self.name, 'null'])
        if self.policy is not None:
            self.policy.init_session()
        if self.nlg is not None:
            self.nlg.init_session()
        self.history = []

    def get_in_da(self):
        return self.input_action

    def get_out_da(self):
        return self.output_action


I0810 03:56:16.787091 139710458230592 file_utils.py:35] PyTorch version 1.1.0 available.
I0810 03:56:19.440644 139710458230592 modeling_bert.py:226] Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex .
I0810 03:56:19.444469 139710458230592 modeling_xlnet.py:339] Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex .
I0810 03:56:19.608417 139710458230592 registrable.py:73] instantiating registered subclass relu of <class 'allennlp.nn.activations.Activation'>
I0810 03:56:19.610358 139710458230592 registrable.py:73] instantiating registered subclass relu of <class 'allennlp.nn.activations.Activation'>
I0810 03:56:19.611890 139710458230592 registrable.py:73] instantiating registered subclass relu of <class 'allennlp.nn.activations.Activation'>
I0810 03:56:19.613340 139710458230592 registrable.py:73] instantiating registered subclass relu of <class 'allennlp.nn.activations.Activation'>


Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.
[38;5;2m✔ Download and installation successful[0m
You can now load the model via spacy.load('en_core_web_sm')


## Step 2.1 pipelined 대화 시스템 예시를 살펴봅시다.

우선, system model을 pipelined 대화 시스템으로 구성했을 때, 대화를 어떻게 수행할 수 있는지 살펴봅시다.

pipelined 대화 모델은 크게 4가지로 구성되어 있습니다.

NLU (Natural Language Understanding (언어 인식 모듈)) : 직전의 상대방 발화를 이해 및 해석 합니다.

DST (Dialogue State Tracking (대화 상태 추적 모듈)) : 현재까지 대화의 맥락을 추적하고, 변경 사항을 업데이트 합니다.

Dialogue Policy (대화 정책 모듈) : 다음 발화를 위해 구조화된 단어 형태로 정책을 결정 합니다. (자연스러운 문장의 형태가 아닌, 의도만을 결정합니다.)

NLG (Natural Language Generation (언어 생성 모듈)) : 결정된 정책을 가지고, 사람이 이해할 수 있는 자연어를 생성합니다.

-----------------

아래는 가장 기본적인 Pipelined 대화 시스템을 구성한 예시입니다.

BERT NLU : 앞선 practice 1에서 다룬 BERT NLU

RuleDST : Rule 기반 DST module

RulePolicy : Rule 기반 Policy module

TemplateNLG : Template 기반 (정해진 템플릿 위에서 단어를 채워넣는 방식) NLG module

4가지 모듈에 대해 정의를 끝마쳤다면, PipelineAgent라는 wrapper 에 씌워 sys_agent를 선언합니다.

In [6]:
# MILU
sys_nlu = MILU()
# simple rule DST
sys_dst = RuleDST()
# rule policy
sys_policy = RulePolicy()
# template NLG
sys_nlg = TemplateNLG(is_user=False)
# assemble
sys_agent = PipelineAgent(sys_nlu, sys_dst, sys_policy, sys_nlg, name='sys')

Load from https://convlab.blob.core.windows.net/convlab-2/milu_multiwoz_all_context.tar.gz


I0810 03:56:31.223443 139710458230592 archival.py:173] loading archive file /home/donghoon/.convlab2/cache/adeae6e5151198de3b1634fcb630a0243cd2e098a75703260c4489c6e92a5f51.4fff038b1c36dbb489d93575f604cf72276b651ac6b370b5a48f8f67df2ed971
I0810 03:56:31.224578 139710458230592 archival.py:182] extracting archive file /home/donghoon/.convlab2/cache/adeae6e5151198de3b1634fcb630a0243cd2e098a75703260c4489c6e92a5f51.4fff038b1c36dbb489d93575f604cf72276b651ac6b370b5a48f8f67df2ed971 to temp dir /tmp/tmpelk1y8yc
I0810 03:56:31.346318 139710458230592 registrable.py:73] instantiating registered subclass milu of <class 'allennlp.models.model.Model'>
I0810 03:56:31.347058 139710458230592 params.py:265] type = default
I0810 03:56:31.347500 139710458230592 registrable.py:73] instantiating registered subclass default of <class 'allennlp.data.vocabulary.Vocabulary'>
I0810 03:56:31.347939 139710458230592 vocabulary.py:306] Loading token dictionary from /tmp/tmpelk1y8yc/vocabulary.
I0810 03:56:31.364222 139

I0810 03:56:31.387293 139710458230592 params.py:265] model.text_field_embedder.token_embedders.tokens.vocab_namespace = tokens
I0810 03:56:31.387852 139710458230592 params.py:265] model.text_field_embedder.token_embedders.tokens.embedding_dim = 50
I0810 03:56:31.388443 139710458230592 params.py:265] model.text_field_embedder.token_embedders.tokens.pretrained_file = None
I0810 03:56:31.388859 139710458230592 params.py:265] model.text_field_embedder.token_embedders.tokens.projection_dim = None
I0810 03:56:31.389339 139710458230592 params.py:265] model.text_field_embedder.token_embedders.tokens.trainable = True
I0810 03:56:31.389777 139710458230592 params.py:265] model.text_field_embedder.token_embedders.tokens.padding_index = None
I0810 03:56:31.391076 139710458230592 params.py:265] model.text_field_embedder.token_embedders.tokens.max_norm = None
I0810 03:56:31.391502 139710458230592 params.py:265] model.text_field_embedder.token_embedders.tokens.norm_type = 2.0
I0810 03:56:31.391897 139

I0810 03:56:31.536191 139710458230592 initializers.py:328]    intent_encoder._module.weight_ih_l0
I0810 03:56:31.536708 139710458230592 initializers.py:328]    intent_encoder._module.weight_ih_l0_reverse
I0810 03:56:31.537280 139710458230592 initializers.py:328]    intent_projection_layer.bias
I0810 03:56:31.538460 139710458230592 initializers.py:328]    intent_projection_layer.weight
I0810 03:56:31.539056 139710458230592 initializers.py:328]    tag_projection_layer._module.bias
I0810 03:56:31.539602 139710458230592 initializers.py:328]    tag_projection_layer._module.weight
I0810 03:56:31.540225 139710458230592 initializers.py:328]    text_field_embedder.token_embedder_token_characters._embedding._module.weight
I0810 03:56:31.540777 139710458230592 initializers.py:328]    text_field_embedder.token_embedder_token_characters._encoder._module.conv_layer_0.bias
I0810 03:56:31.541268 139710458230592 initializers.py:328]    text_field_embedder.token_embedder_token_characters._encoder._modul

sys_agent.response("user의 발화", print_nlu=False, print_dst=False, print_pol=False) 를 하면, user의 발화에 대한 응답을 합니다.

print_nlu, print_dst, print_pol을 True로 변경하면, 해당하는 value를 print 해볼 수 있습니다.

In [7]:
sys_agent.init_session()
sys_agent.response("I want to find a moderate hotel", print_nlu=False, print_dst=False, print_pol=False)

'Would lovell lodge work for you ? I have 3 options for you.'

In [8]:
sys_agent.response("Which type of hotel is it ?")

'It is a hotel .'

In [9]:
sys_agent.response("OK , where is its address ?")

'The address is 74 chesterton road .'

In [10]:
sys_agent.response("Thank you !")

'You are welcome.'

In [11]:
sys_agent.response("Try to find me a Chinese restaurant in south area .")

'The good luck chinese food takeaway matches your description . I have 3 options for you.'

In [12]:
sys_agent.response("Which kind of food it provides ?")

'It is chinese food .'

In [13]:
sys_agent.response("Book a table for 5 , this Sunday .")

'Booking was successful . Reference number is : 00000003 .'

## Step 2.2 system agent와 대화할 user simulator를 구성해봅시다.

system model의 성능을 알아보기 위해서는 user 역할을 할 수 있는 user simulator가 필요합니다. 

사람이 매번 user의 역할을 하여 대화를 주고 받는 것은 많은 노동력을 필요로 하기 때문입니다. 

특히, Dialog Policy를 RL agent로 두었을 때, 다양한 대화 시도를 위해서 user simulator 는 필수적입니다.

ConvLab에서는 RulePolicy(character='usr')로 두었을 때, `Agenda` policy로 변환되며, 이는 user의 goal을 기반으로 하는 user model을 정의할 수 있습니다. 

또한, `Agenda` policy는 dst 모델까지 함께 포함하고 있기 때문에 `user_dst = None` 이 됩니다.


In [14]:
# MILU
user_nlu = MILU()
# not use dst
user_dst = None
# rule policy
user_policy = RulePolicy(character='usr')   # UserPolicyAgendaMultiWoz()
# template NLG
user_nlg = TemplateNLG(is_user=True)
# user_nlg = SCLSTM(is_user=True)
# assemble
user_agent = PipelineAgent(user_nlu, user_dst, user_policy, user_nlg, name='user')

Load from https://convlab.blob.core.windows.net/convlab-2/milu_multiwoz_all_context.tar.gz


I0810 03:57:00.125682 139710458230592 archival.py:173] loading archive file /home/donghoon/.convlab2/cache/adeae6e5151198de3b1634fcb630a0243cd2e098a75703260c4489c6e92a5f51.4fff038b1c36dbb489d93575f604cf72276b651ac6b370b5a48f8f67df2ed971
I0810 03:57:00.126493 139710458230592 archival.py:182] extracting archive file /home/donghoon/.convlab2/cache/adeae6e5151198de3b1634fcb630a0243cd2e098a75703260c4489c6e92a5f51.4fff038b1c36dbb489d93575f604cf72276b651ac6b370b5a48f8f67df2ed971 to temp dir /tmp/tmposxwr65b
I0810 03:57:00.222218 139710458230592 registrable.py:73] instantiating registered subclass milu of <class 'allennlp.models.model.Model'>
I0810 03:57:00.223016 139710458230592 params.py:265] type = default
I0810 03:57:00.223470 139710458230592 registrable.py:73] instantiating registered subclass default of <class 'allennlp.data.vocabulary.Vocabulary'>
I0810 03:57:00.223851 139710458230592 vocabulary.py:306] Loading token dictionary from /tmp/tmposxwr65b/vocabulary.
I0810 03:57:00.237730 139

I0810 03:57:00.255993 139710458230592 params.py:265] model.text_field_embedder.token_embedders.tokens.vocab_namespace = tokens
I0810 03:57:00.256966 139710458230592 params.py:265] model.text_field_embedder.token_embedders.tokens.embedding_dim = 50
I0810 03:57:00.257428 139710458230592 params.py:265] model.text_field_embedder.token_embedders.tokens.pretrained_file = None
I0810 03:57:00.257823 139710458230592 params.py:265] model.text_field_embedder.token_embedders.tokens.projection_dim = None
I0810 03:57:00.258263 139710458230592 params.py:265] model.text_field_embedder.token_embedders.tokens.trainable = True
I0810 03:57:00.258663 139710458230592 params.py:265] model.text_field_embedder.token_embedders.tokens.padding_index = None
I0810 03:57:00.259059 139710458230592 params.py:265] model.text_field_embedder.token_embedders.tokens.max_norm = None
I0810 03:57:00.259510 139710458230592 params.py:265] model.text_field_embedder.token_embedders.tokens.norm_type = 2.0
I0810 03:57:00.259939 139

I0810 03:57:00.393446 139710458230592 initializers.py:328]    intent_encoder._module.weight_ih_l0
I0810 03:57:00.395200 139710458230592 initializers.py:328]    intent_encoder._module.weight_ih_l0_reverse
I0810 03:57:00.395625 139710458230592 initializers.py:328]    intent_projection_layer.bias
I0810 03:57:00.396020 139710458230592 initializers.py:328]    intent_projection_layer.weight
I0810 03:57:00.396461 139710458230592 initializers.py:328]    tag_projection_layer._module.bias
I0810 03:57:00.396895 139710458230592 initializers.py:328]    tag_projection_layer._module.weight
I0810 03:57:00.397929 139710458230592 initializers.py:328]    text_field_embedder.token_embedder_token_characters._embedding._module.weight
I0810 03:57:00.398365 139710458230592 initializers.py:328]    text_field_embedder.token_embedder_token_characters._encoder._module.conv_layer_0.bias
I0810 03:57:00.398777 139710458230592 initializers.py:328]    text_field_embedder.token_embedder_token_characters._encoder._modul

Loading goal model is done


## Step 2.3 user simulator 와 system model 간 대화를 수행해봅시다.

지금까지, 우리는 user simulator와 system model을 정의 했습니다.

`MultiWozEvaluator` 클래스는 성능을 평가하기 위해 사용됩니다. (user의 goal을 정의해줍니다.)

`BiSession` 클래스는 user simulator와 system model의 대화 및 평가를 도와줍니다.

`next_turn` 함수는 한 턴의 대화를 수행합니다.

### 평가 지표

success rate : 예약 성사 + recall == 1, 즉 유저의 조건에 부합하는 예약을 해내고, 물어보는 모든 정보에 대해 알맞은 갑ㅅ을 출력

book rate : 예약 성사율 (= 예약 성공 수 / 예약 정답 수) 

Inform precision : (TP) / (TP + FP), precision이 낮다는 것은 요청한 slot 외 불필요한 정보를 많이 알려주는 것으로 해석할 수 있음.

Inform recall : (TP) / (TP + FN), recall이 낮다는 것은 요청한 slot에 대해 대답하지 못한 것으로 해석 할 수 있음.

Inform F1 : Precision & Recall에 대한 조화 평균



In [15]:
def set_seed(r_seed):
    random.seed(r_seed)
    np.random.seed(r_seed)
    torch.manual_seed(r_seed)

evaluator = MultiWozEvaluator()
sess = BiSession(sys_agent=sys_agent, user_agent=user_agent, kb_query=None, evaluator=evaluator)

set_seed(20200804)

sys_response = ''
sess.init_session()
print('init goal:')
pprint(sess.evaluator.goal)
print('-'*100)
for i in range(20):
    sys_response, user_response, session_over, reward = sess.next_turn(sys_response)
    print('user:', user_response)
    print('sys:', sys_response)
    print()
    if session_over is True:
        break
print('task success:', sess.evaluator.task_success())
print('book rate:', sess.evaluator.book_rate())
print('inform precision/recall/f1:', sess.evaluator.inform_F1())
print('-'*50)
print('final goal:')
pprint(sess.evaluator.goal)

print('='*100)

init goal:
{'attraction': {'info': {'area': 'centre', 'type': 'museum'},
                'reqt': {'address': '?', 'phone': '?'}},
 'train': {'info': {'arriveBy': '08:30',
                    'day': 'monday',
                    'departure': 'cambridge',
                    'destination': 'peterborough'},
           'reqt': {'price': '?', 'trainID': '?'}}}
----------------------------------------------------------------------------------------------------
user: I also need a train. I need some information on a train going to peterborough . I would like to leave on monday. I 'd like to arrive by 08:30 .
sys: Where are you departing from ?

user: I ' m also looking for a train from cambridge .
sys: Would you like me to book the 08:30 train ? I can book your tickets for monday . Woudl you like me to book a train to peterborough for you ? Woudl you like me to book a train from cambridge for you ?

user: I need to book a train to peterborough .
sys: I can get you tickets for an arrival time 

# Step 3. ConvLab에서 제공하는 모듈들로 모델들을 구성 및 진단하고, 평가하자

## Step 3.0. ConvLab 에서 지원하는 모델들을 load 합니다.

이용가능한 model들:

- NLU: BERTNLU, MILU, SVMNLU
- DST: RuleDST
- Word-DST: SUMBT, TRADE (set `sys_nlu` to `None`)
- Policy: RulePolicy, Imitation, REINFORCE, PPO, GDPL
- Word-Policy: MDRG, HDSA, LaRL (set `sys_nlg` to `None`)
- NLG: Template, SCLSTM
- End2End: Sequicity, DAMD, RNN_rollout (directly used as `sys_agent`)
- Simulator policy: Agenda, VHUS (for `user_policy`)


In [16]:
# available NLU models
from convlab2.nlu.svm.multiwoz import SVMNLU
from convlab2.nlu.jointBERT.multiwoz import BERTNLU
from convlab2.nlu.milu.multiwoz import MILU
# available DST models
from convlab2.dst.rule.multiwoz import RuleDST
#from convlab2.dst.mdbt.multiwoz import MDBT
from convlab2.dst.sumbt.multiwoz import SUMBT
from convlab2.dst.trade.multiwoz import TRADE
# available Policy models
from convlab2.policy.rule.multiwoz import RulePolicy
from convlab2.policy.ppo.multiwoz import PPOPolicy
from convlab2.policy.pg.multiwoz import PGPolicy
from convlab2.policy.mle.multiwoz import MLEPolicy
from convlab2.policy.gdpl.multiwoz import GDPLPolicy
#from convlab2.policy.vhus.multiwoz import UserPolicyVHUS
from convlab2.policy.mdrg.multiwoz import MDRGWordPolicy
from convlab2.policy.hdsa.multiwoz import HDSA
from convlab2.policy.larl.multiwoz import LaRL
# available NLG models
from convlab2.nlg.template.multiwoz import TemplateNLG
from convlab2.nlg.sclstm.multiwoz import SCLSTM
# available E2E models
from convlab2.e2e.sequicity.multiwoz import Sequicity
from convlab2.e2e.damd.multiwoz import Damd

Downloading from:  https://convlab.blob.core.windows.net/convlab-2/mdrg_model.zip
Load from https://convlab.blob.core.windows.net/convlab-2/mdrg_model.zip


I0810 03:57:21.985568 139710458230592 allennlp_file_utils.py:284] https://convlab.blob.core.windows.net/convlab-2/mdrg_model.zip not found in cache, downloading to /tmp/tmp7lpjyuwq
100%|██████████| 21577107/21577107 [00:07<00:00, 2887981.54B/s]
I0810 03:57:30.093079 139710458230592 allennlp_file_utils.py:297] copying /tmp/tmp7lpjyuwq to cache at /home/donghoon/.convlab2/cache/b0bc758ff68dc79ef5287ddd38b6267f8784df273b2e6f7e496a1e9031c65ca5.ea9a4a5a9034b22be1093ea89deb230956f14487e2c2441b9ee59cef0fc252a2
I0810 03:57:30.110977 139710458230592 allennlp_file_utils.py:301] creating metadata file for /home/donghoon/.convlab2/cache/b0bc758ff68dc79ef5287ddd38b6267f8784df273b2e6f7e496a1e9031c65ca5.ea9a4a5a9034b22be1093ea89deb230956f14487e2c2441b9ee59cef0fc252a2
I0810 03:57:30.111685 139710458230592 allennlp_file_utils.py:307] removing temp file /tmp/tmp7lpjyuwq


Extracting...
Downloading from:  https://convlab.blob.core.windows.net/convlab-2/mdrg_data.zip
Load from https://convlab.blob.core.windows.net/convlab-2/mdrg_data.zip


I0810 03:57:30.809415 139710458230592 allennlp_file_utils.py:284] https://convlab.blob.core.windows.net/convlab-2/mdrg_data.zip not found in cache, downloading to /tmp/tmpqjwx40jc
100%|██████████| 47104409/47104409 [00:18<00:00, 2574017.90B/s]
I0810 03:57:49.786106 139710458230592 allennlp_file_utils.py:297] copying /tmp/tmpqjwx40jc to cache at /home/donghoon/.convlab2/cache/00a406587d87174b74198f14cae25cd2054923a471c59233f27ec80caef23686.da1518d0f3a98f95e2be9aee8474275aa3e182c5b9faccf16e9deac38752afce
I0810 03:57:49.836905 139710458230592 allennlp_file_utils.py:301] creating metadata file for /home/donghoon/.convlab2/cache/00a406587d87174b74198f14cae25cd2054923a471c59233f27ec80caef23686.da1518d0f3a98f95e2be9aee8474275aa3e182c5b9faccf16e9deac38752afce
I0810 03:57:49.837951 139710458230592 allennlp_file_utils.py:307] removing temp file /tmp/tmpqjwx40jc


Extracting...
Downloading from:  https://convlab.blob.core.windows.net/convlab-2/mdrg_db.zip
Load from https://convlab.blob.core.windows.net/convlab-2/mdrg_db.zip


I0810 03:57:53.406880 139710458230592 allennlp_file_utils.py:284] https://convlab.blob.core.windows.net/convlab-2/mdrg_db.zip not found in cache, downloading to /tmp/tmpksyu804s
100%|██████████| 183081/183081 [00:00<00:00, 470357.32B/s]
I0810 03:57:54.337714 139710458230592 allennlp_file_utils.py:297] copying /tmp/tmpksyu804s to cache at /home/donghoon/.convlab2/cache/a9766cc757fb79e7ac5266715dd065c687f884e5dc06840bba4d3b07307eb95b.b7bac7303e20c54957b367fa386215aaa595d5df9fb04341554b2067d458679c
I0810 03:57:54.340874 139710458230592 allennlp_file_utils.py:301] creating metadata file for /home/donghoon/.convlab2/cache/a9766cc757fb79e7ac5266715dd065c687f884e5dc06840bba4d3b07307eb95b.b7bac7303e20c54957b367fa386215aaa595d5df9fb04341554b2067d458679c
I0810 03:57:54.341880 139710458230592 allennlp_file_utils.py:307] removing temp file /tmp/tmpksyu804s


Extracting...


[nltk_data] Downloading package stopwords to
[nltk_data]     /home/donghoon/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


## Step 3.1. ConvLab에서 지원하는 모델들을 가지고 나만의 대화시스템을 만들어 봅시다.

Word-DST 모델들은 NLU와 DST가 합쳐진 모델을 의미합니다. 따라서, 별도의 NLU 모델 없이 사용할 수 있습니다.

따라서, (1) NLU+RuleDST 또는 (2) Word-DST 로 조합이 가능합니다.

[주의!] Word-DST 의 경우 sys_nlu = None 이어야 합니다.

Word-Policy 모델들은 Dialogue Policy 와 NLG 가 합쳐진 모델을 의미합니다. 따라서 별도의 NLG 모델없이 사용할 수 있습니다.

따라서, (1) Policy+NLG 또는 Word-Policy 로 조합이 가능합니다.

[주의!] Word-Policy 의 경우 sys_nlg = None 이어야 합니다.

`PipelineAgent` class를 이용해 Pipelined 대화 시스템을 만들 수 있습니다. 또는 End-to-End model를 사용할 수도 있습니다.



In [17]:
# NLU+RuleDST:
sys_nlu = MILU()
# sys_nlu = SVMNLU()
# sys_nlu = BERTNLU()
sys_dst = RuleDST()

# or Word-DST:
# sys_nlu = None
# sys_dst = SUMBT()
# sys_dst = TRADE()
#### (not working!) sys_dst = MDBT()

# [Caution] In Word-DST case, sys_nlu must be "None"

# Policy+NLG:
sys_policy = RulePolicy()
# sys_policy = PPOPolicy()
# sys_policy = PGPolicy()
# sys_policy = MLEPolicy()
# sys_policy = GDPLPolicy()
sys_nlg = TemplateNLG(is_user=False)
#sys_nlg = SCLSTM(is_user=False)

# or Word-Policy:
# sys_policy = LaRL()
# sys_policy = HDSA()
# sys_policy = MDRGWordPolicy()
# sys_nlg = None

# [Caution] "In Word-policy case, sys_nlg must be None"

sys_agent = PipelineAgent(sys_nlu, sys_dst, sys_policy, sys_nlg, 'sys')
# sys_agent = Sequicity()
# sys_agent = Damd()

Load from https://convlab.blob.core.windows.net/convlab-2/milu_multiwoz_all_context.tar.gz


I0810 03:58:12.280826 139710458230592 archival.py:173] loading archive file /home/donghoon/.convlab2/cache/adeae6e5151198de3b1634fcb630a0243cd2e098a75703260c4489c6e92a5f51.4fff038b1c36dbb489d93575f604cf72276b651ac6b370b5a48f8f67df2ed971
I0810 03:58:12.281775 139710458230592 archival.py:182] extracting archive file /home/donghoon/.convlab2/cache/adeae6e5151198de3b1634fcb630a0243cd2e098a75703260c4489c6e92a5f51.4fff038b1c36dbb489d93575f604cf72276b651ac6b370b5a48f8f67df2ed971 to temp dir /tmp/tmprlutkp_7
I0810 03:58:12.391796 139710458230592 registrable.py:73] instantiating registered subclass milu of <class 'allennlp.models.model.Model'>
I0810 03:58:12.392410 139710458230592 params.py:265] type = default
I0810 03:58:12.392838 139710458230592 registrable.py:73] instantiating registered subclass default of <class 'allennlp.data.vocabulary.Vocabulary'>
I0810 03:58:12.393239 139710458230592 vocabulary.py:306] Loading token dictionary from /tmp/tmprlutkp_7/vocabulary.
I0810 03:58:12.407484 139

I0810 03:58:12.431257 139710458230592 params.py:265] model.text_field_embedder.token_embedders.tokens.vocab_namespace = tokens
I0810 03:58:12.431625 139710458230592 params.py:265] model.text_field_embedder.token_embedders.tokens.embedding_dim = 50
I0810 03:58:12.431978 139710458230592 params.py:265] model.text_field_embedder.token_embedders.tokens.pretrained_file = None
I0810 03:58:12.432303 139710458230592 params.py:265] model.text_field_embedder.token_embedders.tokens.projection_dim = None
I0810 03:58:12.432699 139710458230592 params.py:265] model.text_field_embedder.token_embedders.tokens.trainable = True
I0810 03:58:12.433143 139710458230592 params.py:265] model.text_field_embedder.token_embedders.tokens.padding_index = None
I0810 03:58:12.433560 139710458230592 params.py:265] model.text_field_embedder.token_embedders.tokens.max_norm = None
I0810 03:58:12.433982 139710458230592 params.py:265] model.text_field_embedder.token_embedders.tokens.norm_type = 2.0
I0810 03:58:12.434392 139

I0810 03:58:12.577107 139710458230592 initializers.py:328]    intent_encoder._module.weight_ih_l0
I0810 03:58:12.577471 139710458230592 initializers.py:328]    intent_encoder._module.weight_ih_l0_reverse
I0810 03:58:12.577841 139710458230592 initializers.py:328]    intent_projection_layer.bias
I0810 03:58:12.578907 139710458230592 initializers.py:328]    intent_projection_layer.weight
I0810 03:58:12.579430 139710458230592 initializers.py:328]    tag_projection_layer._module.bias
I0810 03:58:12.579805 139710458230592 initializers.py:328]    tag_projection_layer._module.weight
I0810 03:58:12.580208 139710458230592 initializers.py:328]    text_field_embedder.token_embedder_token_characters._embedding._module.weight
I0810 03:58:12.580600 139710458230592 initializers.py:328]    text_field_embedder.token_embedder_token_characters._encoder._module.conv_layer_0.bias
I0810 03:58:12.580976 139710458230592 initializers.py:328]    text_field_embedder.token_embedder_token_characters._encoder._modul

앞에서 했던 방식대로, user simulator도 정의해줍니다.

(ConvLab에서는 RulePolicy(character='usr')로 두었을 때, `Agenda` policy로 변환되며, 이는 user의 goal을 기반으로 하는 user model을 정의할 수 있습니다. )

In [18]:
user_nlu = BERTNLU()
# user_nlu = MILU()
# user_nlu = SVMNLU()
user_dst = None
user_policy = RulePolicy(character='usr')
# user_policy = UserPolicyVHUS(load_from_zip=True)
user_nlg = TemplateNLG(is_user=True)
# user_nlg = SCLSTM(is_user=True)
user_agent = PipelineAgent(user_nlu, user_dst, user_policy, user_nlg, name='user')

load train, size 8434
load val, size 999
load test, size 1000
loaded train, size 113500
loaded val, size 14730
loaded test, size 14744
dialog act num: 36
sentence label num: 137
tag num: 331


I0810 03:58:33.830533 139710458230592 file_utils.py:362] https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt not found in cache or force_download set to True, downloading to /tmp/tmpgoc816tq
I0810 03:58:35.263275 139710458230592 file_utils.py:377] copying /tmp/tmpgoc816tq to cache at /home/donghoon/.cache/torch/transformers/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
I0810 03:58:35.264796 139710458230592 file_utils.py:381] creating metadata file for /home/donghoon/.cache/torch/transformers/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
I0810 03:58:35.265638 139710458230592 file_utils.py:390] removing temp file /tmp/tmpgoc816tq


intent num: 137
tag num: 331
Load from model_file param
Load from https://convlab.blob.core.windows.net/convlab-2/bert_multiwoz_all_context.zip


I0810 03:58:35.878661 139710458230592 allennlp_file_utils.py:284] https://convlab.blob.core.windows.net/convlab-2/bert_multiwoz_all_context.zip not found in cache, downloading to /tmp/tmptdc44s1z
100%|██████████| 425713245/425713245 [02:20<00:00, 3037041.82B/s]
I0810 04:00:56.721940 139710458230592 allennlp_file_utils.py:297] copying /tmp/tmptdc44s1z to cache at /home/donghoon/.convlab2/cache/fe2b28201c498bb510ae89111e6bb1710a013c23920583adafc3f2b140376b90.4fbb3a3c9025fd8bc79740ec9ac9f931fb88de0cece837a569fba203fa3df2a0
I0810 04:00:57.098271 139710458230592 allennlp_file_utils.py:301] creating metadata file for /home/donghoon/.convlab2/cache/fe2b28201c498bb510ae89111e6bb1710a013c23920583adafc3f2b140376b90.4fbb3a3c9025fd8bc79740ec9ac9f931fb88de0cece837a569fba203fa3df2a0
I0810 04:00:57.099168 139710458230592 allennlp_file_utils.py:307] removing temp file /tmp/tmptdc44s1z


Load from /home/donghoon/PycharmProjects/ConvLab-2/convlab2/nlu/jointBERT/multiwoz/output/all_context/pytorch_model.bin
bert-base-uncased


I0810 04:01:01.167612 139710458230592 file_utils.py:362] https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-config.json not found in cache or force_download set to True, downloading to /tmp/tmp6v7dyesv
I0810 04:01:01.987807 139710458230592 file_utils.py:377] copying /tmp/tmp6v7dyesv to cache at /home/donghoon/.cache/torch/transformers/4dad0251492946e18ac39290fcfe91b89d370fee250efe9521476438fe8ca185.7156163d5fdc189c3016baca0775ffce230789d7fa2a42ef516483e4ca884517
I0810 04:01:01.989084 139710458230592 file_utils.py:381] creating metadata file for /home/donghoon/.cache/torch/transformers/4dad0251492946e18ac39290fcfe91b89d370fee250efe9521476438fe8ca185.7156163d5fdc189c3016baca0775ffce230789d7fa2a42ef516483e4ca884517
I0810 04:01:01.989931 139710458230592 file_utils.py:390] removing temp file /tmp/tmp6v7dyesv
I0810 04:01:02.828496 139710458230592 file_utils.py:362] https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-pytorch_model.bin not found in cache or

BERTNLU loaded
Loading goal model is done


## Step 3.2 분석 툴을 사용해 system model을 진단해봅시다.

Convlab2에서는 분석 툴(analysis tool)을 제공하며, 이를 통해 정의한 system model의 성능 및 취약점을 분석하고 진단할 수 있습니다.

뿐만 아니라 HTML report를 작성해주어, 조금 더 풍부한 통계 정보를 얻어낼 수 있습니다. (results/\$model_name\$ 를 참조합니다)

In [19]:
from convlab2.util.analysis_tool.analyzer import Analyzer

# if sys_nlu!=None, set use_nlu=True to collect more information
analyzer = Analyzer(user_agent=user_agent, dataset='multiwoz')

set_seed(20200131)
analyzer.comprehensive_analyze(sys_agent=sys_agent, model_name='sys_agent', total_dialog=20)

dialogue:   5%|▌         | 1/20 [00:01<00:19,  1.04s/it]I0810 04:02:15.939608 139710458230592 policy_agenda_multiwoz.py:242] Value not found in standard value set: [47932720631] (slot: phone domain: taxi)
I0810 04:02:15.985555 139710458230592 policy_agenda_multiwoz.py:242] Value not found in standard value set: [57253443034] (slot: phone domain: taxi)
I0810 04:02:16.030598 139710458230592 policy_agenda_multiwoz.py:242] Value not found in standard value set: [31316650087] (slot: phone domain: taxi)
I0810 04:02:16.074352 139710458230592 policy_agenda_multiwoz.py:242] Value not found in standard value set: [85878627496] (slot: phone domain: taxi)
I0810 04:02:16.118682 139710458230592 policy_agenda_multiwoz.py:242] Value not found in standard value set: [07233561887] (slot: phone domain: taxi)
I0810 04:02:16.164804 139710458230592 policy_agenda_multiwoz.py:242] Value not found in standard value set: [17343656291] (slot: phone domain: taxi)
I0810 04:02:16.216439 139710458230592 policy_agend

I0810 04:02:23.393176 139710458230592 policy_agenda_multiwoz.py:242] Value not found in standard value set: [73986571951] (slot: phone domain: taxi)
I0810 04:02:23.439137 139710458230592 policy_agenda_multiwoz.py:242] Value not found in standard value set: [91595639954] (slot: phone domain: taxi)
I0810 04:02:23.485215 139710458230592 policy_agenda_multiwoz.py:242] Value not found in standard value set: [42110591073] (slot: phone domain: taxi)
I0810 04:02:23.532055 139710458230592 policy_agenda_multiwoz.py:242] Value not found in standard value set: [20734023996] (slot: phone domain: taxi)
I0810 04:02:23.576474 139710458230592 policy_agenda_multiwoz.py:242] Value not found in standard value set: [26091778855] (slot: phone domain: taxi)
I0810 04:02:23.621210 139710458230592 policy_agenda_multiwoz.py:242] Value not found in standard value set: [77777216401] (slot: phone domain: taxi)
dialogue: 100%|██████████| 20/20 [00:09<00:00,  1.59it/s]


complete number of dialogs/tot: 0.7
success number of dialogs/tot: 0.65
average precision: 0.7517857142857143
average recall: 0.8025
average f1: 0.7711996336996336
average book rate: 0.9761904761904762
average turn (succ): 14.615384615384615
average turn (all): 18.9


(0.7,
 0.65,
 0.7517857142857143,
 0.8025,
 0.7711996336996336,
 0.9761904761904762,
 18.9)

## Step 3.3 여러 개의 system model 간 성능을 비교해봅시다. 

서로 다른 3가지의 system model에 대한 결과를 아래에 채워봅시다. (vscode 기준, 더블 클릭하면 수정할 수 있습니다.)

NLU       | DST       | Policy    | NLG       | Success rate | Book rate | Inform P | Inform R | Inform F1 | Turn(succ/all) |
--------- | --------- | --------- | --------- | :----------: | :-------: | -------- | -------- | --------- | -------------- |
Content   | Content   | Content   | Content   | Content      | Content   | Content  | Content  | Content   | Content        |
Content   | Content   | Content   | Content   | Content      | Content   | Content  | Content  | Content   | Content        |
Content   | Content   | Content   | Content   | Content      | Content   | Content  | Content  | Content   | Content        |

In [None]:
set_seed(20200805)

# define your own system agent2
# sys_agent2 = PipelineAgent(...)

# define your own system agent3
# sys_agent3 = PipelineAgent(...)

analyzer.compare_models(agent_list=[sys_agent, sys_agent, sys_agent], model_name=['sys_agent1', 'sys_agent2', 'sys_agent3'], total_dialog=100)

# Additional. End-to-end Neural Pipeline (ACL 2020) 모델을 사용해보자

Paper : Donghoon Ham *, Jeong-Gwan Lee *, Youngsoo Jang, and Kee-Eung Kim. 2020. End-to-End Neural Pipeline for Goal-Oriented Dialogue System using GPT-2. ACL 2020

![Model architecture](image/e2e_model.png)

우선, Convlab2에 있는 모델을 import 하고 multiwoz로 pretrained된 weight를 다운로드 합니다.

In [1]:
from convlab2.e2e.Transformer import Transformer
sys_agent = Transformer()

FileNotFoundError: [Errno 2] No such file or directory: './data/multiwoz/dialog_act_slot.txt'

위에서 사용했던 다른 e2e agent 와는 다르게, neural pipeline 모델은 dialogue state, system action (dialogue policy) 을 확인할 수 있습니다.

In [None]:
sys_agent.init_session()
sys_agent.response("I want to find a moderate hotel")

## Neural pipeline 모델과 대화해보고, 성능을 평가해봅시다 !

In [None]:
sys_agent.init_session()
while True:
    raw_text = input(">>> ")
    while not raw_text:
        print('not empty')
        raw_text = input(">>> ")
    if raw_text == 'r':
        sys_agent.init_session()
        continue
    out_text = sys_agent.response(raw_text)
    print('sys: ', out_text)

In [None]:
analyzer = Analyzer(user_agent=user_agent, dataset='multiwoz')
set_seed(20200131)
analyzer.comprehensive_analyze(sys_agent=sys_agent, model_name='sys_agent', total_dialog=20)