# End-to-End Task-Completion Neural Dialogue Systems

Training notebook. Created to visualize results.

In [118]:
from user_simulator import UserSimulator
from error_model_controller import ErrorModelController
from dqn_agent import DQNAgent
from state_tracker import StateTracker
import pickle, argparse, json, math
from utils import remove_empty_slots
from user import User
import numpy as np

# Initialization

## Constants
The file `constants.json` contains the necessary values and paths to perform the training and operation of the network. It is loaded and its content is mapped to variables

In [2]:
# Load constants json into dict
constants_file = 'constants.json'

In [3]:
with open(constants_file) as f:
    constants = json.load(f)
constants

{'db_file_paths': {'database': 'data/movie_db.pkl',
  'dict': 'data/movie_dict.pkl',
  'user_goals': 'data/movie_user_goals.pkl'},
 'run': {'usersim': True,
  'warmup_mem': 1000,
  'num_ep_run': 40000,
  'train_freq': 100,
  'max_round_num': 20,
  'success_rate_threshold': 0.3},
 'agent': {'save_weights_file_path': '',
  'load_weights_file_path': '',
  'vanilla': True,
  'learning_rate': 0.001,
  'batch_size': 16,
  'dqn_hidden_size': 80,
  'epsilon_init': 0.0,
  'gamma': 0.9,
  'max_mem_size': 500000},
 'emc': {'slot_error_mode': 0,
  'slot_error_prob': 0.05,
  'intent_error_prob': 0.0}}

In [4]:
# Load file path constants
file_path_dict = constants['db_file_paths']
DATABASE_FILE_PATH = file_path_dict['database']
DICT_FILE_PATH = file_path_dict['dict']
USER_GOALS_FILE_PATH = file_path_dict['user_goals']

# Load run constants
run_dict = constants['run']
USE_USERSIM = run_dict['usersim']
WARMUP_MEM = run_dict['warmup_mem']
NUM_EP_TRAIN = run_dict['num_ep_run']
TRAIN_FREQ = run_dict['train_freq']
MAX_ROUND_NUM = run_dict['max_round_num']
SUCCESS_RATE_THRESHOLD = run_dict['success_rate_threshold']

## Load databases
### Movie database
A dictionary is build with 990 entries of the file, which are the total number of options that the chatbot must pick from. 

In [6]:
# Load movie DB
# Note: If you get an unpickling error here then run 'pickle_converter.py' and it should fix it
database = pickle.load(open(DATABASE_FILE_PATH, 'rb'), encoding='latin1')

In [7]:
database

{0: {'city': 'hamilton',
  'theater': 'manville 12 plex',
  'zip': '08835',
  'critic_rating': 'good',
  'date': 'tomorrow',
  'state': 'nj',
  'starttime': '10:30am',
  'genre': 'comedy',
  'moviename': 'zootopia'},
 1: {'city': 'manville',
  'theater': 'manville 12 plex',
  'zip': '08835',
  'critic_rating': 'good',
  'date': 'tomorrow',
  'state': 'nj',
  'starttime': '10:30am',
  'genre': 'comedy',
  'moviename': 'zootopia'},
 2: {'city': 'bridgewater',
  'theater': 'manville 12 plex',
  'zip': '08835',
  'critic_rating': 'good',
  'date': 'tomorrow',
  'state': 'nj',
  'starttime': '10:30am',
  'genre': 'comedy',
  'moviename': 'zootopia'},
 3: {'city': 'hamilton',
  'theater': 'amc dine-in theatres bridgewater 7',
  'zip': '08835',
  'critic_rating': 'good',
  'date': 'tomorrow',
  'state': 'nj',
  'starttime': '10:30am',
  'genre': 'comedy',
  'moviename': 'zootopia'},
 4: {'city': 'manville',
  'theater': 'amc dine-in theatres bridgewater 7',
  'zip': '08835',
  'critic_rating'

In [8]:
# Clean DB
remove_empty_slots(database)

### Movie dictionary
It contains the single unique options that compose the database items. The categories are: `['city', 'numberofpeople', 'theater', 'description', 'zip', 'numberofkids', 'distanceconstraints', 'critic_rating', 'price', 'greeting', 'actor', 'date', 'state', 'other', 'mpaa_rating', 'starttime', 'theater_chain', 'genre', 'video_format', 'moviename'`

In [9]:
# Load movie dict
db_dict = pickle.load(open(DICT_FILE_PATH, 'rb'), encoding='latin1')

### User goals
It is a collection of objectives unknown to the chatbot, which is the final goal of the user during the conversation. If the goal is reached, the conversation is considered a success.

In [10]:
 # Load goal File
user_goals = pickle.load(open(USER_GOALS_FILE_PATH, 'rb'), encoding='latin1')
user_goals[0] # Example

{'request_slots': {},
 'diaact': 'request',
 'inform_slots': {'city': 'birmingham',
  'numberofpeople': '1',
  'theater': 'carmike summit 16',
  'state': 'al',
  'starttime': 'around 2pm',
  'date': 'today',
  'moviename': 'zootopia'}}

## User Simulator

In [11]:
# Init. Objects
if USE_USERSIM:
    user = UserSimulator(user_goals, constants, database)
else:
    user = User(constants)

In [12]:
emc = ErrorModelController(db_dict, constants)
state_tracker = StateTracker(database, constants)
dqn_agent = DQNAgent(state_tracker.get_state_size(), constants)


# Training

In [21]:
def run_round(state, warmup=False):
    # 1) Agent takes action given state tracker's representation of dialogue (state)
    agent_action_index, agent_action = dqn_agent.get_action(state, use_rule=warmup)
    
    # 2) Update state tracker with the agent's action
    state_tracker.update_state_agent(agent_action)
    
    # 3) User takes action given agent action
    user_action, reward, done, success = user.step(agent_action)
    
    if not done:
        # 4) Infuse error into semantic frame level of user action
        emc.infuse_error(user_action)
        
    # 5) Update state tracker with user action
    state_tracker.update_state_user(user_action)
    
    # 6) Get next state and add experience
    next_state = state_tracker.get_state(done)
    
    dqn_agent.add_experience(state, agent_action_index, reward, next_state, done)

    return next_state, reward, done, success

def warmup_run():
    """
    Runs the warmup stage of training which is used to fill the agents memory.

    The agent uses it's rule-based policy to make actions. The agent's memory is filled as this runs.
    Loop terminates when the size of the memory is equal to WARMUP_MEM or when the memory buffer is full.

    """

    print('Warmup Started...')
    total_step = 0
    while total_step != WARMUP_MEM and not dqn_agent.is_memory_full():
        # Reset episode
        episode_reset()
        done = False
        # Get initial state from state tracker
        state = state_tracker.get_state()
        while not done:
            next_state, _, done, _ = run_round(state, warmup=True)
            total_step += 1
            state = next_state

    print('...Warmup Ended')

In [17]:
def train_run():
    """
    Runs the loop that trains the agent.

    Trains the agent on the goal-oriented chatbot task. Training of the agent's neural network occurs every episode that
    TRAIN_FREQ is a multiple of. Terminates when the episode reaches NUM_EP_TRAIN.

    """

    print('Training Started...')
    episode = 0
    period_reward_total = 0
    period_success_total = 0
    success_rate_best = 0.0
    while episode < NUM_EP_TRAIN:
        episode_reset()
        episode += 1
        done = False
        state = state_tracker.get_state()
        while not done:
            next_state, reward, done, success = run_round(state)
            period_reward_total += reward
            state = next_state

        period_success_total += success

        # Train
        if episode % TRAIN_FREQ == 0:
            # Check success rate
            success_rate = period_success_total / TRAIN_FREQ
            avg_reward = period_reward_total / TRAIN_FREQ
            # Flush
            if success_rate >= success_rate_best and success_rate >= SUCCESS_RATE_THRESHOLD:
                dqn_agent.empty_memory()
            # Update current best success rate
            if success_rate > success_rate_best:
                print('Episode: {} NEW BEST SUCCESS RATE: {} Avg Reward: {}' .format(episode, success_rate, avg_reward))
                success_rate_best = success_rate
                dqn_agent.save_weights()
            period_success_total = 0
            period_reward_total = 0
            # Copy
            dqn_agent.copy()
            # Train
            dqn_agent.train()
    print('...Training Ended')

In [18]:
def episode_reset():
    """
    Resets the episode/conversation in the warmup and training loops.

    Called in warmup and train to reset the state tracker, user and agent. Also get's the initial user action.

    """

    # First reset the state tracker
    state_tracker.reset()
    # Then pick an init user action
    user_action = user.reset()
    # Infuse with error
    emc.infuse_error(user_action)
    # And update state tracker
    state_tracker.update_state_user(user_action)
    # Finally, reset agent
    dqn_agent.reset()

In [22]:
warmup_run()

Warmup Started...
...Warmup Ended


In [25]:
train_run()

Training Started...
Episode: 1100 NEW BEST SUCCESS RATE: 0.33 Avg Reward: -19.69
Episode: 2000 NEW BEST SUCCESS RATE: 0.36 Avg Reward: -13.82
Episode: 2100 NEW BEST SUCCESS RATE: 0.4 Avg Reward: -11.0
Episode: 2400 NEW BEST SUCCESS RATE: 0.47 Avg Reward: -6.75
Episode: 2500 NEW BEST SUCCESS RATE: 0.51 Avg Reward: -3.35
Episode: 2600 NEW BEST SUCCESS RATE: 0.55 Avg Reward: -1.22
Episode: 2800 NEW BEST SUCCESS RATE: 0.64 Avg Reward: 4.83
Episode: 4200 NEW BEST SUCCESS RATE: 0.74 Avg Reward: 13.56
Episode: 4400 NEW BEST SUCCESS RATE: 0.77 Avg Reward: 16.68
Episode: 5100 NEW BEST SUCCESS RATE: 0.82 Avg Reward: 20.95
Episode: 5500 NEW BEST SUCCESS RATE: 0.86 Avg Reward: 23.3
Episode: 7700 NEW BEST SUCCESS RATE: 0.87 Avg Reward: 24.4
Episode: 8200 NEW BEST SUCCESS RATE: 0.88 Avg Reward: 24.83
Episode: 13600 NEW BEST SUCCESS RATE: 0.9 Avg Reward: 26.35
Episode: 14000 NEW BEST SUCCESS RATE: 0.91 Avg Reward: 27.49
Episode: 20700 NEW BEST SUCCESS RATE: 0.94 Avg Reward: 29.7
Episode: 32100 NEW BE

KeyboardInterrupt: 

# Test


In [28]:
NUM_EP_TEST = run_dict['num_ep_run']


In [29]:
def test_run():
    """
    Runs the loop that tests the agent.

    Tests the agent on the goal-oriented chatbot task. Only for evaluating a trained agent. Terminates when the episode
    reaches NUM_EP_TEST.

    """

    print('Testing Started...')
    episode = 0
    while episode < NUM_EP_TEST:
        episode_reset()
        episode += 1
        ep_reward = 0
        done = False
        # Get initial state from state tracker
        state = state_tracker.get_state()
        while not done:
            # Agent takes action given state tracker's representation of dialogue
            agent_action_index, agent_action = dqn_agent.get_action(state)
            # Update state tracker with the agent's action
            state_tracker.update_state_agent(agent_action)
            # User takes action given agent action
            user_action, reward, done, success = user.step(agent_action)
            ep_reward += reward
            if not done:
                # Infuse error into semantic frame level of user action
                emc.infuse_error(user_action)
            # Update state tracker with user action
            state_tracker.update_state_user(user_action)
            # Grab "next state" as state
            state = state_tracker.get_state(done)
        print('Episode: {} Success: {} Reward: {}'.format(episode, success, ep_reward))
    print('...Testing Ended')


def episode_reset():
    """Resets the episode/conversation in the testing loop."""

    # First reset the state tracker
    state_tracker.reset()
    # Then pick an init user action
    user_action = user.reset()
    # Infuse with error
    emc.infuse_error(user_action)
    # And update state tracker
    state_tracker.update_state_user(user_action)
    # Finally, reset agent
    dqn_agent.reset()


In [31]:
test_run()


Testing Started...
Episode: 1 Success: True Reward: 34
Episode: 2 Success: True Reward: 34
Episode: 3 Success: True Reward: 33
Episode: 4 Success: True Reward: 35
Episode: 5 Success: True Reward: 33
Episode: 6 Success: False Reward: -40
Episode: 7 Success: True Reward: 34
Episode: 8 Success: True Reward: 34
Episode: 9 Success: True Reward: 33
Episode: 10 Success: True Reward: 34
Episode: 11 Success: True Reward: 34
Episode: 12 Success: True Reward: 34
Episode: 13 Success: False Reward: -40
Episode: 14 Success: True Reward: 35
Episode: 15 Success: True Reward: 34
Episode: 16 Success: True Reward: 35
Episode: 17 Success: True Reward: 34
Episode: 18 Success: True Reward: 34
Episode: 19 Success: True Reward: 34
Episode: 20 Success: True Reward: 32
Episode: 21 Success: True Reward: 34
Episode: 22 Success: True Reward: 34
Episode: 23 Success: True Reward: 35
Episode: 24 Success: True Reward: 34
Episode: 25 Success: False Reward: -40
Episode: 26 Success: True Reward: 34
Episode: 27 Success: F

Episode: 225 Success: True Reward: 34
Episode: 226 Success: False Reward: -40
Episode: 227 Success: True Reward: 33
Episode: 228 Success: True Reward: 34
Episode: 229 Success: True Reward: 34
Episode: 230 Success: True Reward: 33
Episode: 231 Success: True Reward: 34
Episode: 232 Success: True Reward: 35
Episode: 233 Success: True Reward: 35
Episode: 234 Success: True Reward: 34
Episode: 235 Success: False Reward: -40
Episode: 236 Success: True Reward: 34
Episode: 237 Success: True Reward: 34
Episode: 238 Success: True Reward: 34
Episode: 239 Success: True Reward: 34
Episode: 240 Success: True Reward: 34
Episode: 241 Success: True Reward: 34
Episode: 242 Success: True Reward: 34
Episode: 243 Success: True Reward: 33
Episode: 244 Success: False Reward: -40
Episode: 245 Success: True Reward: 33
Episode: 246 Success: True Reward: 34
Episode: 247 Success: True Reward: 34
Episode: 248 Success: False Reward: -40
Episode: 249 Success: True Reward: 34
Episode: 250 Success: True Reward: 34
Epis

KeyboardInterrupt: 

# Examples

### episode_reset()

In [93]:
state_tracker.reset()
# Then pick an init user action
user_action = user.reset()

In [99]:
user_action

{'intent': 'request',
 'request_slots': {'ticket': 'UNK'},
 'inform_slots': {'moviename': 'hail caesar'},
 'round': 0,
 'speaker': 'User'}

In [95]:
# Infuse with error
emc.infuse_error(user_action)
# And update state tracker
state_tracker.update_state_user(user_action)
# Finally, reset agent
dqn_agent.reset()

In [89]:
state_tracker.__dict__

{'db_helper': <db_query.DBQuery at 0x24b6949c2c8>,
 'match_key': 'ticket',
 'intents_dict': {'inform': 0,
  'request': 1,
  'done': 2,
  'match_found': 3,
  'thanks': 4,
  'reject': 5},
 'num_intents': 6,
 'slots_dict': {'actor': 0,
  'actress': 1,
  'city': 2,
  'critic_rating': 3,
  'date': 4,
  'description': 5,
  'distanceconstraints': 6,
  'genre': 7,
  'greeting': 8,
  'implicit_value': 9,
  'movie_series': 10,
  'moviename': 11,
  'mpaa_rating': 12,
  'numberofpeople': 13,
  'numberofkids': 14,
  'other': 15,
  'price': 16,
  'seating': 17,
  'starttime': 18,
  'state': 19,
  'theater': 20,
  'theater_chain': 21,
  'video_format': 22,
  'zip': 23,
  'result': 24,
  'ticket': 25,
  'mc_list': 26},
 'num_slots': 27,
 'max_round_num': 20,
 'none_state': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
     

In [57]:
ep_reward = 0
done = False
# Get initial state from state tracker
state = state_tracker.get_state()

In [58]:
state

array([0.  , 1.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 1.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 1.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 1.  ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.2 , 1.  , 0.  , 0.

In [59]:
state_tracker.__dict__

{'db_helper': <db_query.DBQuery at 0x24b6949c2c8>,
 'match_key': 'ticket',
 'intents_dict': {'inform': 0,
  'request': 1,
  'done': 2,
  'match_found': 3,
  'thanks': 4,
  'reject': 5},
 'num_intents': 6,
 'slots_dict': {'actor': 0,
  'actress': 1,
  'city': 2,
  'critic_rating': 3,
  'date': 4,
  'description': 5,
  'distanceconstraints': 6,
  'genre': 7,
  'greeting': 8,
  'implicit_value': 9,
  'movie_series': 10,
  'moviename': 11,
  'mpaa_rating': 12,
  'numberofpeople': 13,
  'numberofkids': 14,
  'other': 15,
  'price': 16,
  'seating': 17,
  'starttime': 18,
  'state': 19,
  'theater': 20,
  'theater_chain': 21,
  'video_format': 22,
  'zip': 23,
  'result': 24,
  'ticket': 25,
  'mc_list': 26},
 'num_slots': 27,
 'max_round_num': 20,
 'none_state': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
     

### State desambiguation

In [98]:
state_tracker.history[-1]

{'intent': 'request',
 'request_slots': {'ticket': 'UNK'},
 'inform_slots': {'moviename': 'hail caesar'},
 'round': 0,
 'speaker': 'User'}

In [97]:
user_action

{'intent': 'request',
 'request_slots': {'ticket': 'UNK'},
 'inform_slots': {'moviename': 'hail caesar'},
 'round': 0,
 'speaker': 'User'}

In [106]:
db_results_dict = state_tracker.db_helper.get_db_results_for_slots(state_tracker.current_informs)
db_results_dict

{'moviename': 10, 'matching_all_constraints': 10}

In [108]:
last_agent_action = state_tracker.history[-2] if len(state_tracker.history) > 1 else None

In [121]:
# Create one-hot of intents to represent the current user action
user_act_rep = np.zeros((state_tracker.num_intents,))
user_act_rep[state_tracker.intents_dict[user_action['intent']]] = 1.0

In [122]:
user_act_rep

array([0., 1., 0., 0., 0., 0.])

In [124]:
# Create bag of inform slots representation to represent the current user action
user_inform_slots_rep = np.zeros((state_tracker.num_slots,))
for key in user_action['inform_slots'].keys():
    user_inform_slots_rep[state_tracker.slots_dict[key]] = 1.0

# Create bag of request slots representation to represent the current user action
user_request_slots_rep = np.zeros((state_tracker.num_slots,))
for key in user_action['request_slots'].keys():
    user_request_slots_rep[state_tracker.slots_dict[key]] = 1.0

# Create bag of filled_in slots based on the current_slots
current_slots_rep = np.zeros((state_tracker.num_slots,))
for key in state_tracker.current_informs:
    current_slots_rep[state_tracker.slots_dict[key]] = 1.0

# Encode last agent intent
agent_act_rep = np.zeros((state_tracker.num_intents,))
if last_agent_action:
    agent_act_rep[state_tracker.intents_dict[last_agent_action['intent']]] = 1.0

# Encode last agent inform slots
agent_inform_slots_rep = np.zeros((state_tracker.num_slots,))
if last_agent_action:
    for key in last_agent_action['inform_slots'].keys():
        agent_inform_slots_rep[state_tracker.slots_dict[key]] = 1.0

# Encode last agent request slots
agent_request_slots_rep = np.zeros((state_tracker.num_slots,))
if last_agent_action:
    for key in last_agent_action['request_slots'].keys():
        agent_request_slots_rep[state_tracker.slots_dict[key]] = 1.0

# Value representation of the round num
turn_rep = np.zeros((1,)) + state_tracker.round_num / 5.

# One-hot representation of the round num
turn_onehot_rep = np.zeros((state_tracker.max_round_num,))
turn_onehot_rep[state_tracker.round_num - 1] = 1.0

# Representation of DB query results (scaled counts)
kb_count_rep = np.zeros((state_tracker.num_slots + 1,)) + db_results_dict['matching_all_constraints'] / 100.
for key in db_results_dict.keys():
    if key in state_tracker.slots_dict:
        kb_count_rep[state_tracker.slots_dict[key]] = db_results_dict[key] / 100.

# Representation of DB query results (binary)
kb_binary_rep = np.zeros((state_tracker.num_slots + 1,)) + np.sum(db_results_dict['matching_all_constraints'] > 0.)
for key in db_results_dict.keys():
    if key in state_tracker.slots_dict:
        kb_binary_rep[state_tracker.slots_dict[key]] = np.sum(db_results_dict[key] > 0.)

state_representation = np.hstack(
    [user_act_rep, user_inform_slots_rep, user_request_slots_rep, agent_act_rep, agent_inform_slots_rep,
     agent_request_slots_rep, current_slots_rep, turn_rep, turn_onehot_rep, kb_binary_rep,
     kb_count_rep]).flatten()

In [134]:
current_slots_rep


array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [126]:
state

array([0.  , 1.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 1.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 1.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 1.  ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.2 , 1.  , 0.  , 0.

In [None]:
while not done:
    # Agent takes action given state tracker's representation of dialogue
    agent_action_index, agent_action = dqn_agent.get_action(state)
    # Update state tracker with the agent's action
    state_tracker.update_state_agent(agent_action)
    # User takes action given agent action
    user_action, reward, done, success = user.step(agent_action)
    ep_reward += reward
    if not done:
        # Infuse error into semantic frame level of user action
        emc.infuse_error(user_action)
    # Update state tracker with user action
    state_tracker.update_state_user(user_action)
    # Grab "next state" as state
    state = state_tracker.get_state(done)
print('Episode: {} Success: {} Reward: {}'.format(episode, success, ep_reward))

In [145]:
agent_action_index, agent_action = dqn_agent.get_action(state)

In [143]:
agent_action

{'intent': 'inform',
 'inform_slots': {'date': 'PLACEHOLDER'},
 'request_slots': {}}

In [149]:
state_tracker.__dict__

{'db_helper': <db_query.DBQuery at 0x24b6949c2c8>,
 'match_key': 'ticket',
 'intents_dict': {'inform': 0,
  'request': 1,
  'done': 2,
  'match_found': 3,
  'thanks': 4,
  'reject': 5},
 'num_intents': 6,
 'slots_dict': {'actor': 0,
  'actress': 1,
  'city': 2,
  'critic_rating': 3,
  'date': 4,
  'description': 5,
  'distanceconstraints': 6,
  'genre': 7,
  'greeting': 8,
  'implicit_value': 9,
  'movie_series': 10,
  'moviename': 11,
  'mpaa_rating': 12,
  'numberofpeople': 13,
  'numberofkids': 14,
  'other': 15,
  'price': 16,
  'seating': 17,
  'starttime': 18,
  'state': 19,
  'theater': 20,
  'theater_chain': 21,
  'video_format': 22,
  'zip': 23,
  'result': 24,
  'ticket': 25,
  'mc_list': 26},
 'num_slots': 27,
 'max_round_num': 20,
 'none_state': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
     

In [148]:
state_tracker.update_state_agent(agent_action)

In [None]:
agent_action

In [141]:
user_action

{'intent': 'request',
 'request_slots': {'ticket': 'UNK'},
 'inform_slots': {'moviename': 'hail caesar'},
 'round': 0,
 'speaker': 'User'}

In [161]:
user_action, reward, done, success = user.step(agent_action)

In [159]:
user._return_response()

AttributeError: 'UserSimulator' object has no attribute '_return_response'

In [None]:
# Update state tracker with the agent's action
state_tracker.update_state_agent(agent_action)
# User takes action given agent action
user_action, reward, done, success = user.step(agent_action)

In [162]:
ep_reward += reward
if not done:
    # Infuse error into semantic frame level of user action
    emc.infuse_error(user_action)
# Update state tracker with user action
state_tracker.update_state_user(user_action)
# Grab "next state" as state
state = state_tracker.get_state(done)

In [163]:
state

array([0.  , 1.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 1.  , 0.  , 1.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 1.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 1.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 1.  ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.4 , 0.  , 1.  , 0.

# Visualizing conversations

The training loop is copied here, and a few convenient `print` operations are added in order to visualize the possible conversation between simulated user and agent.

In [164]:
NUM_EP_TRAIN

40000

In [166]:
episode

10

In [165]:
"""
Runs the loop that trains the agent.

Trains the agent on the goal-oriented chatbot task. Training of the agent's neural network occurs every episode that
TRAIN_FREQ is a multiple of. Terminates when the episode reaches NUM_EP_TRAIN.

"""

print('Training Started...')
episode = 0
period_reward_total = 0
period_success_total = 0
success_rate_best = 0.0
while episode < 10:
    episode_reset()
    episode += 1
    done = False
    state = state_tracker.get_state()
    while not done:
        next_state, reward, done, success = run_round(state)
        period_reward_total += reward
        state = next_state

    period_success_total += success

    # Train
    if episode % TRAIN_FREQ == 0:
        # Check success rate
        success_rate = period_success_total / TRAIN_FREQ
        avg_reward = period_reward_total / TRAIN_FREQ
        # Flush
        if success_rate >= success_rate_best and success_rate >= SUCCESS_RATE_THRESHOLD:
            dqn_agent.empty_memory()
        # Update current best success rate
        if success_rate > success_rate_best:
            print('Episode: {} NEW BEST SUCCESS RATE: {} Avg Reward: {}' .format(episode, success_rate, avg_reward))
            success_rate_best = success_rate
            dqn_agent.save_weights()
        period_success_total = 0
        period_reward_total = 0
        # Copy
        dqn_agent.copy()
        # Train
        dqn_agent.train()

Training Started...
