# Simulation 1.1 - Training the agents


## Importing libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import savgol_filter as SGfilter
from IPython.display import clear_output
import datetime
import joblib
from tqdm import tqdm

import const
import utilities as ut
import mockSQLenv as SQLenv
import agent as agn

## Defining the parameters of the simulations

In [2]:
n_simulations = 10
n_episodes_training = 10**6
n_episodes_test = 10**2

exploration_train = 0.1
exploration_test = 0
learningrate = 0.1
discount = 0.9
max_steps = 1000

flag_reward = 10
query_reward = -1

## Running the simulations

In [None]:
train_data = np.zeros((n_simulations,3,n_episodes_training))
test_data = np.zeros((n_simulations,3,n_episodes_test))

for i in tqdm(range(n_simulations)):
    agt = agn.Agent(const.actions,verbose=False)
    agt.set_learning_options(exploration=exploration_train, learningrate=learningrate, discount=discount, max_step = max_steps)
    
    for e in tqdm(range(n_episodes_training)):
        env = SQLenv.mockSQLenv(verbose=False, flag_reward=flag_reward, query_reward=query_reward)

        agt.reset(env)
        agt.run_episode()

        train_data[i,0,e] = agt.steps
        train_data[i,1,e] = agt.rewards
        train_data[i,2,e] = ut.getdictshape(agt.Q)[0]
    
    agt.set_learning_options(exploration=exploration_test)
    
    for e in tqdm(range(n_episodes_test)):
        env = SQLenv.mockSQLenv(verbose=False, flag_reward=flag_reward, query_reward=query_reward)

        agt.reset(env)
        agt.run_episode()

        test_data[i,0,e] = agt.steps
        test_data[i,1,e] = agt.rewards
        test_data[i,2,e] = ut.getdictshape(agt.Q)[0]

  0%|                                                                                           | 0/10 [00:00<?, ?it/s]
  0%|                                                                                      | 0/1000000 [00:00<?, ?it/s][A
  0%|                                                                            | 7/1000000 [00:00<4:14:04, 65.60it/s][A
  0%|                                                                           | 14/1000000 [00:00<4:44:13, 58.64it/s][A
  0%|                                                                           | 17/1000000 [00:00<7:20:49, 37.81it/s][A
  0%|                                                                           | 20/1000000 [00:00<8:31:43, 32.57it/s][A
  0%|                                                                           | 28/1000000 [00:00<7:24:23, 37.50it/s][A
  0%|                                                                           | 34/1000000 [00:00<6:41:52, 41.47it/s][A
  0%|              

  0%|                                                                          | 539/1000000 [00:08<3:22:09, 82.40it/s][A
  0%|                                                                          | 548/1000000 [00:08<3:29:53, 79.36it/s][A
  0%|                                                                          | 557/1000000 [00:08<3:44:27, 74.21it/s][A
  0%|                                                                          | 565/1000000 [00:08<4:40:41, 59.34it/s][A
  0%|                                                                          | 573/1000000 [00:08<4:29:22, 61.84it/s][A
  0%|                                                                          | 583/1000000 [00:08<4:13:50, 65.62it/s][A
  0%|                                                                          | 592/1000000 [00:09<4:00:49, 69.17it/s][A
  0%|                                                                          | 600/1000000 [00:09<3:57:07, 70.25it/s][A
  0%|           

  0%|                                                                         | 1119/1000000 [00:16<3:43:42, 74.42it/s][A
  0%|                                                                         | 1128/1000000 [00:16<3:38:18, 76.26it/s][A
  0%|                                                                         | 1137/1000000 [00:16<3:38:57, 76.03it/s][A
  0%|                                                                         | 1145/1000000 [00:16<4:03:32, 68.35it/s][A
  0%|                                                                         | 1153/1000000 [00:17<4:15:46, 65.08it/s][A
  0%|                                                                         | 1160/1000000 [00:17<4:42:13, 58.99it/s][A
  0%|                                                                         | 1168/1000000 [00:17<4:31:01, 61.42it/s][A
  0%|                                                                         | 1175/1000000 [00:17<4:24:25, 62.96it/s][A
  0%|           

  0%|▏                                                                        | 1743/1000000 [00:24<2:49:23, 98.22it/s][A
  0%|▏                                                                        | 1754/1000000 [00:24<3:15:03, 85.30it/s][A
  0%|▏                                                                        | 1769/1000000 [00:24<2:57:49, 93.56it/s][A
  0%|▏                                                                        | 1780/1000000 [00:25<3:14:51, 85.38it/s][A
  0%|▏                                                                        | 1790/1000000 [00:25<3:47:13, 73.22it/s][A
  0%|▏                                                                        | 1799/1000000 [00:25<4:20:49, 63.78it/s][A
  0%|▏                                                                        | 1807/1000000 [00:25<4:28:26, 61.97it/s][A
  0%|▏                                                                        | 1816/1000000 [00:25<4:10:57, 66.29it/s][A
  0%|▏          

  0%|▏                                                                        | 2304/1000000 [00:33<3:25:33, 80.89it/s][A
  0%|▏                                                                        | 2316/1000000 [00:33<3:18:11, 83.90it/s][A
  0%|▏                                                                        | 2325/1000000 [00:33<3:30:03, 79.16it/s][A
  0%|▏                                                                        | 2336/1000000 [00:33<3:12:54, 86.19it/s][A
  0%|▏                                                                        | 2346/1000000 [00:34<3:30:39, 78.93it/s][A
  0%|▏                                                                        | 2355/1000000 [00:34<3:40:25, 75.43it/s][A
  0%|▏                                                                        | 2366/1000000 [00:34<3:24:30, 81.30it/s][A
  0%|▏                                                                        | 2375/1000000 [00:34<3:57:59, 69.86it/s][A
  0%|▏          

  0%|▏                                                                        | 2980/1000000 [00:41<4:37:29, 59.88it/s][A
  0%|▏                                                                        | 2988/1000000 [00:41<4:21:21, 63.58it/s][A
  0%|▏                                                                        | 2996/1000000 [00:42<5:05:43, 54.35it/s][A
  0%|▏                                                                        | 3003/1000000 [00:42<6:11:24, 44.74it/s][A
  0%|▏                                                                        | 3009/1000000 [00:42<6:21:47, 43.52it/s][A
  0%|▏                                                                        | 3015/1000000 [00:42<5:55:54, 46.69it/s][A
  0%|▏                                                                        | 3026/1000000 [00:42<5:06:10, 54.27it/s][A
  0%|▏                                                                        | 3040/1000000 [00:42<4:11:46, 66.00it/s][A
  0%|▏          

  0%|▎                                                                        | 3545/1000000 [00:50<3:41:38, 74.93it/s][A
  0%|▎                                                                        | 3553/1000000 [00:50<3:46:34, 73.30it/s][A
  0%|▎                                                                        | 3562/1000000 [00:50<3:34:21, 77.47it/s][A
  0%|▎                                                                        | 3570/1000000 [00:50<4:06:34, 67.35it/s][A
  0%|▎                                                                        | 3579/1000000 [00:50<3:51:46, 71.65it/s][A
  0%|▎                                                                        | 3587/1000000 [00:51<4:41:57, 58.90it/s][A
  0%|▎                                                                        | 3594/1000000 [00:51<4:40:45, 59.15it/s][A
  0%|▎                                                                        | 3601/1000000 [00:51<4:33:08, 60.80it/s][A
  0%|▎          

  0%|▎                                                                        | 4097/1000000 [00:58<4:56:44, 55.94it/s][A
  0%|▎                                                                        | 4104/1000000 [00:58<4:43:37, 58.52it/s][A
  0%|▎                                                                        | 4113/1000000 [00:58<4:23:38, 62.96it/s][A
  0%|▎                                                                        | 4120/1000000 [00:59<4:38:54, 59.51it/s][A
  0%|▎                                                                        | 4127/1000000 [00:59<4:34:41, 60.42it/s][A
  0%|▎                                                                        | 4134/1000000 [00:59<5:31:44, 50.03it/s][A
  0%|▎                                                                        | 4140/1000000 [00:59<5:28:13, 50.57it/s][A
  0%|▎                                                                        | 4146/1000000 [00:59<5:30:29, 50.22it/s][A
  0%|▎          

  0%|▎                                                                        | 4621/1000000 [01:07<3:54:30, 70.74it/s][A
  0%|▎                                                                        | 4629/1000000 [01:07<3:54:52, 70.63it/s][A
  0%|▎                                                                        | 4637/1000000 [01:07<5:02:29, 54.84it/s][A
  0%|▎                                                                        | 4644/1000000 [01:07<5:23:03, 51.35it/s][A
  0%|▎                                                                        | 4650/1000000 [01:07<5:56:27, 46.54it/s][A
  0%|▎                                                                        | 4660/1000000 [01:07<5:03:37, 54.64it/s][A
  0%|▎                                                                        | 4673/1000000 [01:07<4:11:28, 65.97it/s][A
  0%|▎                                                                        | 4684/1000000 [01:08<3:42:57, 74.40it/s][A
  0%|▎          

  1%|▍                                                                        | 5149/1000000 [01:15<3:50:24, 71.96it/s][A
  1%|▍                                                                        | 5157/1000000 [01:15<3:48:54, 72.43it/s][A
  1%|▍                                                                        | 5169/1000000 [01:15<3:32:06, 78.17it/s][A
  1%|▍                                                                        | 5179/1000000 [01:16<3:19:34, 83.08it/s][A
  1%|▍                                                                        | 5188/1000000 [01:16<3:22:31, 81.87it/s][A
  1%|▍                                                                        | 5197/1000000 [01:16<3:48:35, 72.53it/s][A
  1%|▍                                                                        | 5210/1000000 [01:16<3:27:19, 79.97it/s][A
  1%|▍                                                                        | 5219/1000000 [01:16<3:58:00, 69.66it/s][A
  1%|▍          

  1%|▍                                                                        | 5718/1000000 [01:24<5:04:17, 54.46it/s][A
  1%|▍                                                                        | 5724/1000000 [01:24<5:19:03, 51.94it/s][A
  1%|▍                                                                        | 5730/1000000 [01:24<5:24:22, 51.09it/s][A
  1%|▍                                                                        | 5737/1000000 [01:24<5:17:56, 52.12it/s][A
  1%|▍                                                                        | 5743/1000000 [01:24<5:22:46, 51.34it/s][A
  1%|▍                                                                        | 5751/1000000 [01:24<5:11:37, 53.18it/s][A
  1%|▍                                                                        | 5757/1000000 [01:24<5:27:24, 50.61it/s][A
  1%|▍                                                                        | 5763/1000000 [01:25<5:33:52, 49.63it/s][A
  1%|▍          

  1%|▍                                                                        | 6267/1000000 [01:32<3:38:08, 75.92it/s][A
  1%|▍                                                                        | 6276/1000000 [01:32<4:11:15, 65.92it/s][A
  1%|▍                                                                        | 6288/1000000 [01:33<3:37:34, 76.12it/s][A
  1%|▍                                                                        | 6300/1000000 [01:33<3:21:01, 82.38it/s][A
  1%|▍                                                                        | 6310/1000000 [01:33<3:34:18, 77.28it/s][A
  1%|▍                                                                        | 6319/1000000 [01:33<4:06:22, 67.22it/s][A
  1%|▍                                                                        | 6327/1000000 [01:33<4:23:15, 62.91it/s][A
  1%|▍                                                                        | 6334/1000000 [01:33<4:29:55, 61.36it/s][A
  1%|▍          

  1%|▌                                                                       | 6981/1000000 [01:41<2:40:25, 103.16it/s][A
  1%|▌                                                                       | 6992/1000000 [01:41<2:40:28, 103.13it/s][A
  1%|▌                                                                       | 7003/1000000 [01:41<2:43:25, 101.27it/s][A
  1%|▌                                                                        | 7014/1000000 [01:41<3:30:58, 78.45it/s][A
  1%|▌                                                                        | 7028/1000000 [01:41<3:05:10, 89.38it/s][A
  1%|▌                                                                        | 7040/1000000 [01:41<2:54:11, 95.00it/s][A
  1%|▌                                                                        | 7051/1000000 [01:41<2:55:03, 94.54it/s][A
  1%|▌                                                                       | 7064/1000000 [01:41<2:41:45, 102.30it/s][A
  1%|▌          

  1%|▌                                                                       | 7702/1000000 [01:49<2:33:01, 108.08it/s][A
  1%|▌                                                                       | 7714/1000000 [01:49<2:45:15, 100.08it/s][A
  1%|▌                                                                        | 7725/1000000 [01:49<3:05:51, 88.98it/s][A
  1%|▌                                                                        | 7735/1000000 [01:49<3:08:58, 87.51it/s][A
  1%|▌                                                                        | 7745/1000000 [01:49<3:33:55, 77.31it/s][A
  1%|▌                                                                        | 7754/1000000 [01:49<3:36:49, 76.27it/s][A
  1%|▌                                                                        | 7764/1000000 [01:49<3:21:44, 81.97it/s][A
  1%|▌                                                                        | 7773/1000000 [01:50<3:27:11, 79.81it/s][A
  1%|▌          

  1%|▌                                                                        | 8328/1000000 [01:57<3:26:38, 79.98it/s][A
  1%|▌                                                                        | 8339/1000000 [01:57<3:21:17, 82.11it/s][A
  1%|▌                                                                        | 8348/1000000 [01:57<3:35:02, 76.86it/s][A
  1%|▌                                                                        | 8358/1000000 [01:57<3:26:53, 79.89it/s][A
  1%|▌                                                                        | 8369/1000000 [01:57<3:22:08, 81.76it/s][A
  1%|▌                                                                        | 8378/1000000 [01:58<3:26:18, 80.11it/s][A
  1%|▌                                                                        | 8387/1000000 [01:58<3:37:39, 75.93it/s][A
  1%|▌                                                                        | 8395/1000000 [01:58<3:42:23, 74.31it/s][A
  1%|▌          

  1%|▋                                                                        | 9053/1000000 [02:05<3:05:06, 89.22it/s][A
  1%|▋                                                                        | 9063/1000000 [02:05<3:13:19, 85.43it/s][A
  1%|▋                                                                        | 9073/1000000 [02:06<3:05:43, 88.92it/s][A
  1%|▋                                                                        | 9083/1000000 [02:06<3:31:18, 78.16it/s][A
  1%|▋                                                                        | 9093/1000000 [02:06<3:24:24, 80.80it/s][A
  1%|▋                                                                        | 9105/1000000 [02:06<3:13:07, 85.51it/s][A
  1%|▋                                                                        | 9114/1000000 [02:06<3:23:56, 80.98it/s][A
  1%|▋                                                                        | 9123/1000000 [02:06<3:20:20, 82.43it/s][A
  1%|▋          

  1%|▋                                                                        | 9671/1000000 [02:13<3:07:55, 87.83it/s][A
  1%|▋                                                                        | 9681/1000000 [02:14<3:18:42, 83.07it/s][A
  1%|▋                                                                        | 9693/1000000 [02:14<3:09:16, 87.20it/s][A
  1%|▋                                                                        | 9705/1000000 [02:14<3:00:13, 91.58it/s][A
  1%|▋                                                                        | 9715/1000000 [02:14<3:11:20, 86.26it/s][A
  1%|▋                                                                        | 9730/1000000 [02:14<2:47:08, 98.75it/s][A
  1%|▋                                                                        | 9741/1000000 [02:14<3:06:27, 88.52it/s][A
  1%|▋                                                                        | 9751/1000000 [02:14<3:36:38, 76.18it/s][A
  1%|▋          

  1%|▋                                                                       | 10350/1000000 [02:22<4:08:37, 66.34it/s][A
  1%|▋                                                                       | 10358/1000000 [02:22<4:20:24, 63.34it/s][A
  1%|▋                                                                       | 10368/1000000 [02:22<3:57:33, 69.43it/s][A
  1%|▋                                                                       | 10376/1000000 [02:22<4:10:11, 65.93it/s][A
  1%|▋                                                                       | 10391/1000000 [02:22<3:31:28, 77.99it/s][A
  1%|▋                                                                       | 10400/1000000 [02:22<4:08:02, 66.50it/s][A
  1%|▋                                                                       | 10408/1000000 [02:23<4:04:52, 67.35it/s][A
  1%|▊                                                                       | 10418/1000000 [02:23<3:48:55, 72.05it/s][A
  1%|▊          

  1%|▊                                                                       | 11079/1000000 [02:30<2:45:34, 99.54it/s][A
  1%|▊                                                                      | 11092/1000000 [02:30<2:35:44, 105.82it/s][A
  1%|▊                                                                      | 11104/1000000 [02:30<2:34:12, 106.87it/s][A
  1%|▊                                                                       | 11116/1000000 [02:30<2:51:39, 96.02it/s][A
  1%|▊                                                                       | 11127/1000000 [02:31<3:26:12, 79.92it/s][A
  1%|▊                                                                       | 11136/1000000 [02:31<3:46:31, 72.75it/s][A
  1%|▊                                                                       | 11144/1000000 [02:31<3:57:34, 69.37it/s][A
  1%|▊                                                                       | 11155/1000000 [02:31<3:33:59, 77.02it/s][A
  1%|▊          

  1%|▊                                                                       | 11724/1000000 [02:39<3:40:34, 74.67it/s][A
  1%|▊                                                                       | 11733/1000000 [02:39<4:03:33, 67.63it/s][A
  1%|▊                                                                       | 11741/1000000 [02:39<4:19:48, 63.39it/s][A
  1%|▊                                                                       | 11748/1000000 [02:39<4:28:27, 61.35it/s][A
  1%|▊                                                                       | 11761/1000000 [02:39<3:51:53, 71.03it/s][A
  1%|▊                                                                       | 11769/1000000 [02:39<3:59:51, 68.67it/s][A
  1%|▊                                                                       | 11777/1000000 [02:39<4:01:26, 68.21it/s][A
  1%|▊                                                                       | 11785/1000000 [02:40<4:07:14, 66.62it/s][A
  1%|▊          

  1%|▉                                                                       | 12375/1000000 [02:47<2:58:49, 92.04it/s][A
  1%|▉                                                                       | 12388/1000000 [02:47<2:49:59, 96.83it/s][A
  1%|▉                                                                       | 12398/1000000 [02:47<3:05:11, 88.88it/s][A
  1%|▉                                                                       | 12411/1000000 [02:48<2:48:23, 97.75it/s][A
  1%|▉                                                                       | 12422/1000000 [02:48<2:51:25, 96.01it/s][A
  1%|▉                                                                       | 12432/1000000 [02:48<3:23:48, 80.76it/s][A
  1%|▉                                                                       | 12441/1000000 [02:48<3:42:16, 74.05it/s][A
  1%|▉                                                                       | 12455/1000000 [02:48<3:11:28, 85.96it/s][A
  1%|▉          

  1%|▉                                                                       | 13087/1000000 [02:56<6:12:37, 44.14it/s][A
  1%|▉                                                                       | 13096/1000000 [02:56<5:18:06, 51.71it/s][A
  1%|▉                                                                       | 13104/1000000 [02:56<4:47:24, 57.23it/s][A
  1%|▉                                                                       | 13111/1000000 [02:56<4:44:19, 57.85it/s][A
  1%|▉                                                                       | 13118/1000000 [02:56<4:47:05, 57.29it/s][A
  1%|▉                                                                       | 13127/1000000 [02:56<4:19:03, 63.49it/s][A
  1%|▉                                                                       | 13137/1000000 [02:56<3:52:31, 70.73it/s][A
  1%|▉                                                                       | 13146/1000000 [02:57<3:40:43, 74.52it/s][A
  1%|▉          

  1%|▉                                                                      | 13771/1000000 [03:04<2:29:36, 109.87it/s][A
  1%|▉                                                                      | 13783/1000000 [03:04<2:31:43, 108.33it/s][A
  1%|▉                                                                      | 13795/1000000 [03:04<2:30:03, 109.53it/s][A
  1%|▉                                                                       | 13807/1000000 [03:04<3:28:35, 78.80it/s][A
  1%|▉                                                                       | 13817/1000000 [03:05<3:28:15, 78.92it/s][A
  1%|▉                                                                       | 13829/1000000 [03:05<3:17:27, 83.24it/s][A
  1%|▉                                                                       | 13839/1000000 [03:05<3:22:08, 81.31it/s][A
  1%|▉                                                                       | 13848/1000000 [03:05<3:33:37, 76.94it/s][A
  1%|▉          

  1%|█                                                                       | 14367/1000000 [03:13<4:13:34, 64.78it/s][A
  1%|█                                                                       | 14377/1000000 [03:13<4:01:52, 67.91it/s][A
  1%|█                                                                       | 14390/1000000 [03:13<3:32:36, 77.26it/s][A
  1%|█                                                                       | 14399/1000000 [03:13<4:02:52, 67.64it/s][A
  1%|█                                                                       | 14407/1000000 [03:13<3:59:48, 68.50it/s][A
  1%|█                                                                       | 14417/1000000 [03:13<3:37:30, 75.52it/s][A
  1%|█                                                                       | 14429/1000000 [03:14<3:19:46, 82.22it/s][A
  1%|█                                                                       | 14438/1000000 [03:14<3:42:20, 73.88it/s][A
  1%|█          

  2%|█                                                                       | 15121/1000000 [03:21<3:01:34, 90.41it/s][A
  2%|█                                                                       | 15131/1000000 [03:21<3:10:57, 85.96it/s][A
  2%|█                                                                       | 15143/1000000 [03:21<2:57:20, 92.56it/s][A
  2%|█                                                                       | 15156/1000000 [03:21<2:52:20, 95.24it/s][A
  2%|█                                                                      | 15172/1000000 [03:21<2:38:23, 103.63it/s][A
  2%|█                                                                       | 15183/1000000 [03:22<3:07:54, 87.35it/s][A
  2%|█                                                                       | 15193/1000000 [03:22<3:03:05, 89.64it/s][A
  2%|█                                                                       | 15206/1000000 [03:22<2:50:05, 96.49it/s][A
  2%|█          

  2%|█▏                                                                      | 15872/1000000 [03:29<3:26:10, 79.55it/s][A
  2%|█▏                                                                      | 15881/1000000 [03:29<3:48:17, 71.84it/s][A
  2%|█▏                                                                      | 15889/1000000 [03:30<4:19:47, 63.13it/s][A
  2%|█▏                                                                      | 15900/1000000 [03:30<3:51:49, 70.75it/s][A
  2%|█▏                                                                      | 15914/1000000 [03:30<3:21:52, 81.24it/s][A
  2%|█▏                                                                      | 15926/1000000 [03:30<3:12:02, 85.41it/s][A
  2%|█▏                                                                      | 15940/1000000 [03:30<2:54:23, 94.05it/s][A
  2%|█▏                                                                      | 15951/1000000 [03:30<3:01:23, 90.41it/s][A
  2%|█▏         

  2%|█▏                                                                      | 16627/1000000 [03:38<3:30:21, 77.91it/s][A
  2%|█▏                                                                      | 16638/1000000 [03:38<3:16:08, 83.56it/s][A
  2%|█▏                                                                      | 16654/1000000 [03:38<2:49:10, 96.88it/s][A
  2%|█▏                                                                      | 16665/1000000 [03:38<2:58:13, 91.95it/s][A
  2%|█▏                                                                      | 16675/1000000 [03:38<2:59:26, 91.33it/s][A
  2%|█▏                                                                      | 16686/1000000 [03:38<2:50:22, 96.19it/s][A
  2%|█▏                                                                      | 16698/1000000 [03:38<2:44:13, 99.80it/s][A
  2%|█▏                                                                     | 16709/1000000 [03:38<2:40:40, 101.99it/s][A
  2%|█▏         

  2%|█▏                                                                      | 17359/1000000 [03:46<3:39:56, 74.46it/s][A
  2%|█▎                                                                      | 17370/1000000 [03:46<3:24:27, 80.10it/s][A
  2%|█▎                                                                      | 17379/1000000 [03:46<3:38:15, 75.04it/s][A
  2%|█▎                                                                      | 17388/1000000 [03:46<3:31:11, 77.54it/s][A
  2%|█▎                                                                      | 17399/1000000 [03:46<3:23:38, 80.42it/s][A
  2%|█▎                                                                      | 17408/1000000 [03:47<3:27:31, 78.91it/s][A
  2%|█▎                                                                      | 17419/1000000 [03:47<3:10:08, 86.13it/s][A
  2%|█▎                                                                      | 17428/1000000 [03:47<3:17:13, 83.03it/s][A
  2%|█▎         

  2%|█▎                                                                      | 18081/1000000 [03:54<3:25:53, 79.49it/s][A
  2%|█▎                                                                      | 18093/1000000 [03:54<3:10:18, 85.99it/s][A
  2%|█▎                                                                      | 18103/1000000 [03:54<3:06:02, 87.96it/s][A
  2%|█▎                                                                     | 18123/1000000 [03:55<2:34:59, 105.58it/s][A
  2%|█▎                                                                     | 18138/1000000 [03:55<2:21:28, 115.67it/s][A
  2%|█▎                                                                     | 18152/1000000 [03:55<2:18:57, 117.76it/s][A
  2%|█▎                                                                      | 18165/1000000 [03:55<2:49:47, 96.38it/s][A
  2%|█▎                                                                     | 18177/1000000 [03:55<2:40:10, 102.16it/s][A
  2%|█▎         

  2%|█▎                                                                      | 18847/1000000 [04:02<3:02:54, 89.40it/s][A
  2%|█▎                                                                      | 18857/1000000 [04:03<2:58:15, 91.74it/s][A
  2%|█▎                                                                      | 18867/1000000 [04:03<3:18:39, 82.31it/s][A
  2%|█▎                                                                      | 18876/1000000 [04:03<3:25:17, 79.65it/s][A
  2%|█▎                                                                      | 18885/1000000 [04:03<3:25:57, 79.40it/s][A
  2%|█▎                                                                      | 18894/1000000 [04:03<4:07:26, 66.08it/s][A
  2%|█▎                                                                      | 18904/1000000 [04:03<4:02:41, 67.38it/s][A
  2%|█▎                                                                      | 18916/1000000 [04:03<3:31:42, 77.24it/s][A
  2%|█▎         

  2%|█▍                                                                      | 19607/1000000 [04:11<3:30:47, 77.52it/s][A
  2%|█▍                                                                      | 19621/1000000 [04:11<3:15:51, 83.43it/s][A
  2%|█▍                                                                      | 19630/1000000 [04:11<3:24:36, 79.86it/s][A
  2%|█▍                                                                      | 19639/1000000 [04:11<4:09:21, 65.53it/s][A
  2%|█▍                                                                      | 19647/1000000 [04:11<4:11:04, 65.08it/s][A
  2%|█▍                                                                      | 19661/1000000 [04:11<3:31:58, 77.08it/s][A
  2%|█▍                                                                      | 19670/1000000 [04:12<3:41:20, 73.82it/s][A
  2%|█▍                                                                      | 19682/1000000 [04:12<3:17:37, 82.68it/s][A
  2%|█▍         

  2%|█▍                                                                      | 20326/1000000 [04:19<3:58:39, 68.42it/s][A
  2%|█▍                                                                      | 20339/1000000 [04:19<3:33:00, 76.65it/s][A
  2%|█▍                                                                      | 20357/1000000 [04:20<3:00:23, 90.51it/s][A
  2%|█▍                                                                     | 20375/1000000 [04:20<2:35:56, 104.70it/s][A
  2%|█▍                                                                      | 20388/1000000 [04:20<2:44:55, 99.00it/s][A
  2%|█▍                                                                      | 20400/1000000 [04:20<3:07:25, 87.11it/s][A
  2%|█▍                                                                      | 20410/1000000 [04:20<3:03:48, 88.82it/s][A
  2%|█▍                                                                      | 20420/1000000 [04:20<3:04:00, 88.73it/s][A
  2%|█▍         

  2%|█▌                                                                      | 21003/1000000 [04:28<4:52:28, 55.79it/s][A
  2%|█▌                                                                      | 21010/1000000 [04:28<4:58:13, 54.71it/s][A
  2%|█▌                                                                      | 21017/1000000 [04:28<4:40:35, 58.15it/s][A
  2%|█▌                                                                      | 21024/1000000 [04:28<4:56:50, 54.97it/s][A
  2%|█▌                                                                      | 21030/1000000 [04:28<4:53:00, 55.69it/s][A
  2%|█▌                                                                      | 21036/1000000 [04:28<4:48:34, 56.54it/s][A
  2%|█▌                                                                      | 21044/1000000 [04:29<4:38:53, 58.50it/s][A
  2%|█▌                                                                      | 21054/1000000 [04:29<4:14:17, 64.16it/s][A
  2%|█▌         

  2%|█▌                                                                      | 21655/1000000 [04:36<3:17:07, 82.72it/s][A
  2%|█▌                                                                      | 21666/1000000 [04:36<3:24:30, 79.73it/s][A
  2%|█▌                                                                      | 21676/1000000 [04:36<3:19:02, 81.92it/s][A
  2%|█▌                                                                      | 21686/1000000 [04:36<3:54:27, 69.55it/s][A
  2%|█▌                                                                      | 21695/1000000 [04:37<3:57:38, 68.61it/s][A
  2%|█▌                                                                      | 21703/1000000 [04:37<4:01:40, 67.47it/s][A
  2%|█▌                                                                      | 21711/1000000 [04:37<3:55:38, 69.19it/s][A
  2%|█▌                                                                      | 21719/1000000 [04:37<3:56:13, 69.02it/s][A
  2%|█▌         

  2%|█▌                                                                      | 22328/1000000 [04:44<3:06:51, 87.20it/s][A
  2%|█▌                                                                      | 22337/1000000 [04:44<3:38:50, 74.45it/s][A
  2%|█▌                                                                      | 22346/1000000 [04:44<3:27:54, 78.37it/s][A
  2%|█▌                                                                      | 22358/1000000 [04:44<3:07:46, 86.77it/s][A
  2%|█▌                                                                      | 22371/1000000 [04:45<2:57:11, 91.95it/s][A
  2%|█▌                                                                      | 22381/1000000 [04:45<3:00:08, 90.45it/s][A
  2%|█▌                                                                      | 22391/1000000 [04:45<3:23:19, 80.14it/s][A
  2%|█▌                                                                      | 22400/1000000 [04:45<3:50:58, 70.54it/s][A
  2%|█▌         

  2%|█▋                                                                      | 23039/1000000 [04:52<3:26:34, 78.82it/s][A
  2%|█▋                                                                      | 23048/1000000 [04:52<3:21:26, 80.83it/s][A
  2%|█▋                                                                      | 23061/1000000 [04:53<3:05:23, 87.82it/s][A
  2%|█▋                                                                      | 23075/1000000 [04:53<2:46:39, 97.70it/s][A
  2%|█▋                                                                      | 23086/1000000 [04:53<2:57:46, 91.59it/s][A
  2%|█▋                                                                      | 23096/1000000 [04:53<2:56:34, 92.21it/s][A
  2%|█▋                                                                      | 23106/1000000 [04:53<3:28:21, 78.14it/s][A
  2%|█▋                                                                      | 23117/1000000 [04:53<3:14:07, 83.87it/s][A
  2%|█▋         

  2%|█▋                                                                      | 23627/1000000 [05:01<3:20:37, 81.11it/s][A
  2%|█▋                                                                      | 23636/1000000 [05:01<3:34:32, 75.85it/s][A
  2%|█▋                                                                      | 23645/1000000 [05:01<3:24:49, 79.45it/s][A
  2%|█▋                                                                      | 23654/1000000 [05:01<3:27:44, 78.33it/s][A
  2%|█▋                                                                      | 23663/1000000 [05:01<3:22:13, 80.47it/s][A
  2%|█▋                                                                      | 23674/1000000 [05:02<3:06:15, 87.36it/s][A
  2%|█▋                                                                      | 23683/1000000 [05:02<3:12:28, 84.54it/s][A
  2%|█▋                                                                      | 23694/1000000 [05:02<3:07:23, 86.83it/s][A
  2%|█▋         

  2%|█▊                                                                      | 24319/1000000 [05:09<3:38:50, 74.31it/s][A
  2%|█▊                                                                      | 24328/1000000 [05:09<3:29:40, 77.56it/s][A
  2%|█▊                                                                      | 24339/1000000 [05:09<3:15:27, 83.19it/s][A
  2%|█▊                                                                      | 24350/1000000 [05:09<3:10:35, 85.32it/s][A
  2%|█▊                                                                      | 24359/1000000 [05:10<3:30:13, 77.35it/s][A
  2%|█▊                                                                      | 24376/1000000 [05:10<3:02:44, 88.98it/s][A
  2%|█▊                                                                      | 24388/1000000 [05:10<2:51:48, 94.64it/s][A
  2%|█▊                                                                      | 24399/1000000 [05:10<3:19:43, 81.42it/s][A
  2%|█▊         

  2%|█▊                                                                      | 24882/1000000 [05:17<3:31:53, 76.70it/s][A
  2%|█▊                                                                      | 24891/1000000 [05:18<3:32:37, 76.44it/s][A
  2%|█▊                                                                      | 24900/1000000 [05:18<3:46:05, 71.88it/s][A
  2%|█▊                                                                      | 24908/1000000 [05:18<3:40:15, 73.78it/s][A
  2%|█▊                                                                      | 24916/1000000 [05:18<3:36:16, 75.14it/s][A
  2%|█▊                                                                      | 24924/1000000 [05:18<3:33:31, 76.11it/s][A
  2%|█▊                                                                      | 24934/1000000 [05:18<3:27:00, 78.50it/s][A
  2%|█▊                                                                      | 24948/1000000 [05:18<3:03:13, 88.69it/s][A
  2%|█▊         

  3%|█▊                                                                      | 25568/1000000 [05:25<3:42:29, 73.00it/s][A
  3%|█▊                                                                      | 25578/1000000 [05:25<3:25:03, 79.20it/s][A
  3%|█▊                                                                      | 25591/1000000 [05:26<3:04:06, 88.21it/s][A
  3%|█▊                                                                      | 25605/1000000 [05:26<2:46:52, 97.32it/s][A
  3%|█▊                                                                      | 25616/1000000 [05:26<2:51:35, 94.64it/s][A
  3%|█▊                                                                      | 25626/1000000 [05:26<3:25:08, 79.16it/s][A
  3%|█▊                                                                      | 25635/1000000 [05:26<3:22:26, 80.22it/s][A
  3%|█▊                                                                      | 25647/1000000 [05:26<3:13:56, 83.73it/s][A
  3%|█▊         

  3%|█▉                                                                      | 26269/1000000 [05:34<3:16:30, 82.58it/s][A
  3%|█▉                                                                      | 26281/1000000 [05:34<3:01:15, 89.53it/s][A
  3%|█▉                                                                      | 26295/1000000 [05:34<2:48:32, 96.28it/s][A
  3%|█▊                                                                     | 26312/1000000 [05:34<2:27:49, 109.77it/s][A
  3%|█▊                                                                     | 26324/1000000 [05:34<2:39:43, 101.60it/s][A
  3%|█▊                                                                     | 26338/1000000 [05:34<2:36:32, 103.66it/s][A
  3%|█▉                                                                      | 26349/1000000 [05:34<2:51:48, 94.45it/s][A
  3%|█▉                                                                      | 26361/1000000 [05:35<2:44:25, 98.69it/s][A
  3%|█▉         

  3%|█▉                                                                      | 27002/1000000 [05:42<3:06:27, 86.97it/s][A
  3%|█▉                                                                      | 27015/1000000 [05:42<2:56:30, 91.87it/s][A
  3%|█▉                                                                      | 27025/1000000 [05:42<2:56:45, 91.74it/s][A
  3%|█▉                                                                      | 27037/1000000 [05:42<2:44:33, 98.54it/s][A
  3%|█▉                                                                     | 27050/1000000 [05:42<2:37:01, 103.27it/s][A
  3%|█▉                                                                     | 27065/1000000 [05:42<2:27:57, 109.59it/s][A
  3%|█▉                                                                     | 27077/1000000 [05:43<2:32:20, 106.44it/s][A
  3%|█▉                                                                     | 27095/1000000 [05:43<2:14:07, 120.89it/s][A
  3%|█▉         

  3%|██                                                                      | 27830/1000000 [05:50<2:45:39, 97.81it/s][A
  3%|█▉                                                                     | 27842/1000000 [05:50<2:37:34, 102.83it/s][A
  3%|██                                                                      | 27853/1000000 [05:50<2:46:42, 97.19it/s][A
  3%|██                                                                      | 27864/1000000 [05:50<3:22:37, 79.96it/s][A
  3%|██                                                                      | 27873/1000000 [05:51<3:49:38, 70.55it/s][A
  3%|██                                                                      | 27882/1000000 [05:51<3:45:54, 71.72it/s][A
  3%|██                                                                      | 27898/1000000 [05:51<3:08:44, 85.84it/s][A
  3%|██                                                                      | 27912/1000000 [05:51<2:50:53, 94.80it/s][A
  3%|██         

  3%|██                                                                      | 28782/1000000 [05:58<3:06:59, 86.57it/s][A
  3%|██                                                                      | 28792/1000000 [05:58<3:03:11, 88.36it/s][A
  3%|██                                                                     | 28812/1000000 [05:59<2:34:30, 104.76it/s][A
  3%|██                                                                      | 28824/1000000 [05:59<3:12:06, 84.26it/s][A
  3%|██                                                                      | 28835/1000000 [05:59<3:05:06, 87.44it/s][A
  3%|██                                                                      | 28845/1000000 [05:59<3:32:53, 76.03it/s][A
  3%|██                                                                      | 28854/1000000 [05:59<3:37:38, 74.37it/s][A
  3%|██                                                                      | 28863/1000000 [05:59<4:05:10, 66.02it/s][A
  3%|██         

  3%|██▏                                                                     | 29625/1000000 [06:07<2:50:02, 95.11it/s][A
  3%|██▏                                                                     | 29636/1000000 [06:07<2:57:01, 91.36it/s][A
  3%|██▏                                                                     | 29647/1000000 [06:07<2:49:20, 95.50it/s][A
  3%|██▏                                                                     | 29657/1000000 [06:07<3:10:39, 84.82it/s][A
  3%|██▏                                                                     | 29671/1000000 [06:07<2:49:07, 95.62it/s][A
  3%|██▏                                                                     | 29682/1000000 [06:07<2:52:08, 93.95it/s][A
  3%|██▏                                                                     | 29692/1000000 [06:08<2:49:39, 95.32it/s][A
  3%|██▏                                                                     | 29703/1000000 [06:08<2:44:10, 98.51it/s][A
  3%|██         

  3%|██▏                                                                    | 30480/1000000 [06:15<2:27:28, 109.57it/s][A
  3%|██▏                                                                    | 30492/1000000 [06:15<2:25:07, 111.34it/s][A
  3%|██▏                                                                    | 30504/1000000 [06:15<2:28:31, 108.80it/s][A
  3%|██▏                                                                    | 30516/1000000 [06:16<2:26:40, 110.17it/s][A
  3%|██▏                                                                    | 30528/1000000 [06:16<2:34:02, 104.89it/s][A
  3%|██▏                                                                    | 30546/1000000 [06:16<2:15:49, 118.96it/s][A
  3%|██▏                                                                    | 30559/1000000 [06:16<2:32:24, 106.01it/s][A
  3%|██▏                                                                    | 30574/1000000 [06:16<2:22:13, 113.60it/s][A
  3%|██▏        

  3%|██▏                                                                    | 31448/1000000 [06:24<2:35:31, 103.79it/s][A
  3%|██▏                                                                    | 31477/1000000 [06:24<2:05:48, 128.31it/s][A
  3%|██▏                                                                    | 31495/1000000 [06:24<1:56:30, 138.55it/s][A
  3%|██▏                                                                    | 31513/1000000 [06:24<1:57:46, 137.06it/s][A
  3%|██▏                                                                    | 31532/1000000 [06:24<1:49:22, 147.57it/s][A
  3%|██▏                                                                    | 31549/1000000 [06:24<2:11:23, 122.84it/s][A
  3%|██▎                                                                     | 31564/1000000 [06:24<2:43:53, 98.48it/s][A
  3%|██▎                                                                     | 31576/1000000 [06:25<2:52:20, 93.66it/s][A
  3%|██▏        

  3%|██▎                                                                     | 32306/1000000 [06:32<3:04:29, 87.42it/s][A
  3%|██▎                                                                     | 32315/1000000 [06:32<3:05:38, 86.88it/s][A
  3%|██▎                                                                     | 32326/1000000 [06:32<2:55:53, 91.69it/s][A
  3%|██▎                                                                     | 32336/1000000 [06:32<3:30:34, 76.59it/s][A
  3%|██▎                                                                     | 32345/1000000 [06:32<3:39:14, 73.56it/s][A
  3%|██▎                                                                     | 32354/1000000 [06:32<3:28:09, 77.48it/s][A
  3%|██▎                                                                     | 32364/1000000 [06:33<3:21:41, 79.96it/s][A
  3%|██▎                                                                     | 32374/1000000 [06:33<3:10:23, 84.70it/s][A
  3%|██▎        

  3%|██▎                                                                    | 33252/1000000 [06:40<1:52:52, 142.74it/s][A
  3%|██▎                                                                    | 33268/1000000 [06:40<1:55:34, 139.41it/s][A
  3%|██▎                                                                    | 33289/1000000 [06:40<1:47:21, 150.07it/s][A
  3%|██▎                                                                    | 33305/1000000 [06:40<1:51:05, 145.02it/s][A
  3%|██▎                                                                    | 33321/1000000 [06:40<2:11:11, 122.80it/s][A
  3%|██▎                                                                    | 33335/1000000 [06:41<2:18:24, 116.40it/s][A
  3%|██▎                                                                    | 33348/1000000 [06:41<2:22:23, 113.14it/s][A
  3%|██▍                                                                     | 33360/1000000 [06:41<2:44:51, 97.72it/s][A
  3%|██▎        

  3%|██▍                                                                     | 34042/1000000 [06:48<2:58:14, 90.32it/s][A
  3%|██▍                                                                    | 34056/1000000 [06:48<2:40:25, 100.35it/s][A
  3%|██▍                                                                     | 34067/1000000 [06:48<2:44:47, 97.69it/s][A
  3%|██▍                                                                     | 34078/1000000 [06:48<2:43:26, 98.50it/s][A
  3%|██▍                                                                     | 34089/1000000 [06:49<2:52:11, 93.49it/s][A
  3%|██▍                                                                     | 34099/1000000 [06:49<2:56:32, 91.19it/s][A
  3%|██▍                                                                     | 34109/1000000 [06:49<3:06:15, 86.43it/s][A
  3%|██▍                                                                     | 34123/1000000 [06:49<2:46:31, 96.67it/s][A
  3%|██▍        

  3%|██▍                                                                     | 34701/1000000 [06:57<3:48:01, 70.55it/s][A
  3%|██▍                                                                     | 34709/1000000 [06:57<3:59:38, 67.13it/s][A
  3%|██▍                                                                     | 34717/1000000 [06:57<4:03:00, 66.21it/s][A
  3%|██▌                                                                     | 34726/1000000 [06:57<3:51:42, 69.43it/s][A
  3%|██▌                                                                     | 34734/1000000 [06:57<4:10:53, 64.12it/s][A
  3%|██▌                                                                     | 34741/1000000 [06:57<4:18:09, 62.32it/s][A
  3%|██▌                                                                     | 34750/1000000 [06:57<3:57:05, 67.85it/s][A
  3%|██▌                                                                     | 34764/1000000 [06:57<3:23:48, 78.93it/s][A
  3%|██▌        

  4%|██▌                                                                    | 36035/1000000 [07:05<1:50:28, 145.43it/s][A
  4%|██▌                                                                    | 36052/1000000 [07:05<1:53:58, 140.96it/s][A
  4%|██▌                                                                    | 36081/1000000 [07:05<1:40:17, 160.20it/s][A
  4%|██▌                                                                    | 36099/1000000 [07:06<2:07:36, 125.89it/s][A
  4%|██▌                                                                    | 36114/1000000 [07:06<2:15:58, 118.15it/s][A
  4%|██▌                                                                    | 36136/1000000 [07:06<1:58:40, 135.37it/s][A
  4%|██▌                                                                    | 36152/1000000 [07:06<1:55:40, 138.86it/s][A
  4%|██▌                                                                    | 36179/1000000 [07:06<1:39:07, 162.04it/s][A
  4%|██▌        

## Saving the statistics

In [None]:
timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S%f")
joblib.dump(train_data,'ignore_simul1_traindata_'+timestamp+'.pkl')
joblib.dump(test_data,'ignore_simul1_testdata_'+timestamp+'.pkl')

## Analyzing the agent

We plot the variation in the number of states in the Q-matrix

In [None]:
plt.errorbar(range(n_episodes_training),np.mean(train_data[:,0,:],axis=0),yerr=np.std(train_data[:,0,:],axis=0))
plt.xlabel('episodes')
plt.ylabel('number of states')

In [None]:
plt.errorbar(range(n_episodes_test),np.mean(test_data[:,0,:],axis=0),yerr=np.std(test_data[:,0,:],axis=0))
plt.xlabel('episodes')
plt.ylabel('number of states')
plt.axhline(np.mean(test_data[:,0,:]),c='red',ls='--',label='mean')
plt.legend()