In [1]:
from google.colab import drive
drive.mount('/content/gdrive')
#drive.mount("/content/gdrive", force_remount=True)

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive


In [8]:
%reload_ext autoreload
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [0]:
import sys
root_path = '/content/gdrive/My Drive/Proyectos/Conferences/2020/Russia-AspireToScience/Resources/GO-Bot-DRL_Fork'  #change dir to your project folder
sys.path.insert(0, root_path)

In [0]:
from user_simulator import UserSimulator
from error_model_controller import ErrorModelController
from dqn_agent import DQNAgent
from state_tracker import StateTracker
import pickle, argparse, json, math
from utils import remove_empty_slots, timeprint
from user import User
from time import time
import datetime

In [27]:
%cd "gdrive/My Drive/Proyectos/Conferences/2020/Russia-AspireToScience/Resources/GO-Bot-DRL_Fork"

/content/gdrive/My Drive/Proyectos/Conferences/2020/Russia-AspireToScience/Resources/GO-Bot-DRL_Fork


In [34]:
# Can provide constants file path in args OR run it as is and change 'CONSTANTS_FILE_PATH' below
# 1) In terminal: python train.py --constants_path "constants.json"
# 2) Run this file as is
#parser = argparse.ArgumentParser()
#parser.add_argument('--constants_path', dest='constants_path', type=str, default='')
#args = parser.parse_args()
#params = vars(args)

# Load constants json into dict

constants_file = 'constants.json'

with open(constants_file) as f:
    constants = json.load(f)

print("CONSTANTS:")
print(json.dumps(constants, indent=4, sort_keys=True))


# Load file path constants
file_path_dict = constants['db_file_paths']
DATABASE_FILE_PATH = file_path_dict['database']
DICT_FILE_PATH = file_path_dict['dict']
USER_GOALS_FILE_PATH = file_path_dict['user_goals']

# Load run constants
run_dict = constants['run']
USE_USERSIM = run_dict['usersim']
WARMUP_MEM = run_dict['warmup_mem']
NUM_EP_TRAIN = run_dict['num_ep_run']
TRAIN_FREQ = run_dict['train_freq']
MAX_ROUND_NUM = run_dict['max_round_num']
SUCCESS_RATE_THRESHOLD = run_dict['success_rate_threshold']

# Load movie DB
# Note: If you get an unpickling error here then run 'pickle_converter.py' and it should fix it
database = pickle.load(open(DATABASE_FILE_PATH, 'rb'), encoding='latin1')

# Clean DB
remove_empty_slots(database)

# Load movie dict
db_dict = pickle.load(open(DICT_FILE_PATH, 'rb'), encoding='latin1')

# Load goal File
user_goals = pickle.load(open(USER_GOALS_FILE_PATH, 'rb'), encoding='latin1')

# Init. Objects
if USE_USERSIM:
    user = UserSimulator(user_goals, constants, database)
else:
    user = User(constants)
emc = ErrorModelController(db_dict, constants)
state_tracker = StateTracker(database, constants)

# Initialize agent
dqn_agent = DQNAgent(state_tracker.get_state_size(), constants)

CONSTANTS:
{
    "agent": {
        "batch_size": 16,
        "dqn_hidden_size": 80,
        "epsilon_init": 0.0,
        "gamma": 0.9,
        "learning_rate": 0.001,
        "load_weights_file_path": "",
        "max_mem_size": 500000,
        "save_weights_file_path": "",
        "vanilla": true
    },
    "db_file_paths": {
        "database": "data/movie_db.pkl",
        "dict": "data/movie_dict.pkl",
        "user_goals": "data/movie_user_goals.pkl"
    },
    "emc": {
        "intent_error_prob": 0.0,
        "slot_error_mode": 0,
        "slot_error_prob": 0.05
    },
    "run": {
        "max_round_num": 20,
        "num_ep_run": 40000,
        "success_rate_threshold": 0.3,
        "train_freq": 100,
        "usersim": true,
        "warmup_mem": 1000
    }
}


In [0]:
def run_round(state, warmup=False):
    # 1) Agent takes action given state tracker's representation of dialogue (state)
    agent_action_index, agent_action = dqn_agent.get_action(state, use_rule=warmup)
    # 2) Update state tracker with the agent's action
    state_tracker.update_state_agent(agent_action)
    # 3) User takes action given agent action
    user_action, reward, done, success = user.step(agent_action)
    if not done:
        # 4) Infuse error into semantic frame level of user action
        emc.infuse_error(user_action)
    # 5) Update state tracker with user action
    state_tracker.update_state_user(user_action)
    # 6) Get next state and add experience
    next_state = state_tracker.get_state(done)
    dqn_agent.add_experience(state, agent_action_index, reward, next_state, done)

    return next_state, reward, done, success


def warmup_run():
    """
    Runs the warmup stage of training which is used to fill the agents memory.

    The agent uses it's rule-based policy to make actions. The agent's memory is filled as this runs.
    Loop terminates when the size of the memory is equal to WARMUP_MEM or when the memory buffer is full.

    """

    print('Warmup Started...')
    total_step = 0
    while total_step != WARMUP_MEM and not dqn_agent.is_memory_full():
        # Reset episode
        episode_reset()
        done = False
        # Get initial state from state tracker
        state = state_tracker.get_state()
        while not done:
            next_state, _, done, _ = run_round(state, warmup=True)
            total_step += 1
            state = next_state

    print('...Warmup Ended')

In [0]:
def train_run():
    """
    Runs the loop that trains the agent.

    Trains the agent on the goal-oriented chatbot task. Training of the agent's neural network occurs every episode that
    TRAIN_FREQ is a multiple of. Terminates when the episode reaches NUM_EP_TRAIN.

    """

    print('Training Started...\n')
    print('at ', str(datetime.datetime.now()), '\n')
    episode = 0
    period_reward_total = 0
    period_success_total = 0
    success_rate_best = 0.0
    t_train_start = time()
    t_ep_start = time()
    
    while episode < NUM_EP_TRAIN:
        episode_reset()
        episode += 1
        done = False
        state = state_tracker.get_state()
        
        steps = 0
        while not done:
            steps += 1
            next_state, reward, done, success = run_round(state)
            period_reward_total += reward
            state = next_state

        period_success_total += success
  
        
        # Train
        if episode % TRAIN_FREQ == 0:
            t_ep = (time()-t_ep_start)
            
            print("EP: %d \tSteps: %d \tMemoryIndex: %d \tTime (ep_freq): %s \tTime (total): %s" % (episode, steps , dqn_agent.memory_index, timeprint(t_ep), timeprint(time()-t_train_start)))
            #print("EP: ", episode, "\tSteps: ", steps, "\nMemoryIndex:", dqn_agent.memory_index)        
            # Check success rate
            success_rate = period_success_total / TRAIN_FREQ
            avg_reward = period_reward_total / TRAIN_FREQ
            # Flush
            if success_rate >= success_rate_best and success_rate >= SUCCESS_RATE_THRESHOLD:
                dqn_agent.empty_memory()
            # Update current best success rate
            if success_rate > success_rate_best:
                print('Episode: {} NEW BEST SUCCESS RATE: {} Avg Reward: {}' .format(episode, success_rate, avg_reward))
                success_rate_best = success_rate
                dqn_agent.save_weights()
            period_success_total = 0
            period_reward_total = 0
            # Copy
            dqn_agent.copy()
            # Train
            dqn_agent.train()
            t_ep_start = time()
    print('...Training Ended')
    print('at ', str(datetime.datetime.now()), '\n')

def episode_reset():
    """
    Resets the episode/conversation in the warmup and training loops.

    Called in warmup and train to reset the state tracker, user and agent. Also get's the initial user action.

    """

    # First reset the state tracker
    state_tracker.reset()
    # Then pick an init user action
    user_action = user.reset()
    # Infuse with error
    emc.infuse_error(user_action)
    # And update state tracker
    state_tracker.update_state_user(user_action)
    # Finally, reset agent
    dqn_agent.reset()



In [37]:
warmup_run()
train_run()

Warmup Started...
...Warmup Ended
Training Started...

at  2020-05-01 18:09:28.499610 

EP: 100 	Steps: 20 	MemoryIndex: 3000 	Time (ep_freq): 0:00:02.93 	Time (total): 0:00:02.93
EP: 200 	Steps: 20 	MemoryIndex: 5000 	Time (ep_freq): 0:00:02.46 	Time (total): 0:00:06.47
EP: 300 	Steps: 20 	MemoryIndex: 7000 	Time (ep_freq): 0:00:02.17 	Time (total): 0:00:09.58
EP: 400 	Steps: 20 	MemoryIndex: 8893 	Time (ep_freq): 0:00:02.64 	Time (total): 0:00:13.49
EP: 500 	Steps: 20 	MemoryIndex: 10573 	Time (ep_freq): 0:00:02.01 	Time (total): 0:00:17.09
EP: 600 	Steps: 20 	MemoryIndex: 12561 	Time (ep_freq): 0:00:01.79 	Time (total): 0:00:20.79
EP: 700 	Steps: 20 	MemoryIndex: 14561 	Time (ep_freq): 0:00:03.25 	Time (total): 0:00:26.31
EP: 800 	Steps: 20 	MemoryIndex: 16561 	Time (ep_freq): 0:00:02.52 	Time (total): 0:00:31.50
EP: 900 	Steps: 20 	MemoryIndex: 17995 	Time (ep_freq): 0:00:01.91 	Time (total): 0:00:36.36
EP: 1000 	Steps: 20 	MemoryIndex: 19995 	Time (ep_freq): 0:00:01.78 	Time (tota