## Importing Libraries

We basic libraries and modules for our simulation

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import savgol_filter as SGfilter
from IPython.display import clear_output
import datetime
import joblib
from tqdm import tqdm

import const
import utilities as ut
import mockSQLenv as SQLenv
import agent as agn

## Setting up the model

We set up our environment.

In [2]:
env = SQLenv.mockSQLenv(verbose=True, flag_reward = 10, query_reward = -1)

Game setup with a random query


We instantiate an agent and look at its starting Q-table.

In [3]:
agt = agn.Agent(const.actions)
agt.set_learning_options(exploration=0.1, 
                         learningrate=0.1, 
                         discount=0.9, max_step = 1000)
print(agt.Q)
print(agt.total_successes, "/", agt.total_trials)

{(): array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])}
0 / 0


## Running the model

We run an agent on our environment.

In [4]:
agt.reset(env)
agt.run_episode()

Game reset (but not reinitialized with a new random query!)
Game reset
I received action 0: " and "1"="1
Query is syntactically wrong. I return -1
Server response is -1
I received action 2: " union select 1#
Query is syntactically wrong. I return -1
Server response is -1
I received action 0: " and "1"="1
Query is syntactically wrong. I return -1
Server response is -1
I received action 1: " and "1"="2
Query is syntactically wrong. I return -1
Server response is -1
I received action 0: " and "1"="1
Query is syntactically wrong. I return -1
Server response is -1
I received action 1: " and "1"="2
Query is syntactically wrong. I return -1
Server response is -1
I received action 2: " union select 1#
Query is syntactically wrong. I return -1
Server response is -1
I received action 3: " union select 1 limit 1 offset 1#
Query is syntactically wrong. I return -1
Server response is -1
I received action 0: " and "1"="1
Query is syntactically wrong. I return -1
Server response is -1
I received acti

I received action 19: ' union select 1#
Query is syntactically wrong. I return -1
Server response is -1
I received action 21: ' union select 1,2#
Query is syntactically wrong. I return -1
Server response is -1
I received action 22: ' union select 1,2 limit 1 offset 1#
Query is syntactically wrong. I return -1
Server response is -1
I received action 23: ' union select 1,2,3#
Query is syntactically wrong. I return -1
Server response is -1
I received action 24: ' union select 1,2,3 limit 1 offset 1#
Query is syntactically wrong. I return -1
Server response is -1
I received action 25: ' union select 1,2,3,4#
Query is syntactically wrong. I return -1
Server response is -1
I received action 26: ' union select 1,2,3,4 limit 1 offset 1#
Query is syntactically wrong. I return -1
Server response is -1
I received action 27: ' union select 1,2,3,4,5#
Query is syntactically wrong. I return -1
Server response is -1
I received action 28: ' union select 1,2,3,4,5 limit 1 offset 1#
Query is syntactical

True

We then take a look at some statistics of the agent: its Q-table, the number of its entries, the number of steps taken, and the amount of reward.

In [5]:
agt.Q

{(): array([0.89, 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  ,
        1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  ,
        1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  ,
        1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  ,
        1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  ]),
 (-1,): array([1.  , 1.  , 0.89, 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  ,
        1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  ,
        1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  ,
        1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  ,
        1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  ]),
 (-3,
  -1): array([0.89, 0.89, 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  ,
        1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  ,
        1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  ,
        1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1.  , 1. 

In [6]:
print(ut.getdictshape(agt.Q))

(52, (51,))


In [7]:
print(agt.steps)

769


In [8]:
print(agt.rewards)

-758


## Training the agent

We train the agent over $10^6$ episodes.

In [None]:
nepisodes = 10**6

agt = agn.Agent(const.actions,verbose=False)
agt.set_learning_options(exploration=0.02, 
                         learningrate=0.1, 
                         discount=0.9, max_step = 1000)

steps = []; rewards = []; states = []
for _ in tqdm(range(nepisodes)):
    env = SQLenv.mockSQLenv(verbose=False)
    
    agt.reset(env)
    agt.run_episode()
    
    steps.append(agt.steps)
    rewards.append(agt.rewards)
    states.append(ut.getdictshape(agt.Q)[0])    

  1%|▋                                                                        | 8742/1000000 [03:05<7:40:02, 35.91it/s]

We save its Q-matrix.

In [None]:
timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S%f")
joblib.dump(agt.Q,'ignore_simul1_'+timestamp+'.pkl')

## Analyzing the agent

We plot the variation in the number of states in the Q-matrix

In [None]:
plt.plot(states)
plt.xlabel('episodes')
plt.ylabel('number of states')

We plot the number of steps per episode (averaged over 100 consecutive episodes and smoothing)

In [None]:
plt.plot(np.mean(np.array(steps).reshape(-1,100),axis=1))
plt.xlabel('episodes')
plt.ylabel('number of steps')

In [None]:
plt.plot(SGfilter(steps,101,3))
plt.xlabel('episodes')
plt.ylabel('number of steps')

We plot the amount of reward per episode (averaged over 100 consecutive episodes)

In [None]:
plt.plot(np.mean(np.array(rewards).reshape(-1,100),axis=1))
plt.xlabel('episodes')
plt.ylabel('reward')

We now further run the agent in a test setting for further $100$ epiodes, by making its policy deterministic and setting the exploration parameter $\epsilon$ to 0.

In [None]:
nepisodes = 100

agt.set_learning_options(exploration=0.0)

Tsteps = []; Trewards = []; Tstates = []
for _ in tqdm(range(nepisodes)):
    env = SQLenv.mockSQLenv(verbose=False)
    
    agt.reset(env)
    agt.run_episode()
    
    Tsteps.append(agt.steps)
    Trewards.append(agt.rewards)
    Tstates.append(ut.getdictshape(agt.Q)[0])    

We plot the number of steps taken and their average.

In [None]:
#plt.plot(np.mean(np.array(Tsteps).reshape(-1,10),axis=1))
plt.plot(Tsteps)
plt.xlabel('episodes')
plt.ylabel('number of steps')
plt.axhline(np.average(Tsteps),c='red',ls='--',label='mean')
plt.legend()

We plot the rewards and its average.

In [None]:
#plt.plot(np.mean(np.array(Trewards).reshape(-1,10),axis=1))
plt.plot(Trewards)
plt.xlabel('rewards')
plt.ylabel('number of steps')
plt.axhline(np.average(Trewards),c='red',ls='--',label='mean')
plt.legend()

In [None]:
plt.bar(np.arange(len(agt.Q[(1,)])), agt.Q[()])
plt.xlabel('actions')
plt.ylabel('Q-value')

In [None]:
plt.bar(np.arange(len(agt.Q[(-1,)])), agt.Q[(-1,)])
plt.xlabel('actions')
plt.ylabel('Q-value')

In [None]:
plt.bar(np.arange(len(agt.Q[(1,)])), agt.Q[(1,)])
plt.xlabel('actions')
plt.ylabel('Q-value')

In [None]:
plt.bar(np.arange(len(agt.Q[(1,)])), agt.Q[(-12, 1)])
plt.xlabel('actions')
plt.ylabel('Q-value')

In [None]:
plt.bar(np.arange(len(agt.Q[(1,)])), agt.Q[(-12, -1)])
plt.xlabel('actions')
plt.ylabel('Q-value')

In [None]:
plt.bar(np.arange(len(agt.Q[(1,)])), agt.Q[(-18, -1)])
plt.xlabel('actions')
plt.ylabel('Q-value')

In [None]:
plt.bar(np.arange(len(agt.Q[(1,)])), agt.Q[(-35, -1)])
plt.xlabel('actions')
plt.ylabel('Q-value')