# 0. Install Dependencies

In [1]:
!pip install tensorflow==2.3.0
!pip install gym
!pip install keras
!pip install keras-rl2

Collecting tensorflow==2.3.0
  Downloading tensorflow-2.3.0-cp37-cp37m-manylinux2010_x86_64.whl (320.4 MB)
[K     |████████████████████████████████| 320.4 MB 43 kB/s 
[?25hCollecting h5py<2.11.0,>=2.10.0
  Downloading h5py-2.10.0-cp37-cp37m-manylinux1_x86_64.whl (2.9 MB)
[K     |████████████████████████████████| 2.9 MB 46.6 MB/s 
Collecting gast==0.3.3
  Downloading gast-0.3.3-py2.py3-none-any.whl (9.7 kB)
Collecting numpy<1.19.0,>=1.16.0
  Downloading numpy-1.18.5-cp37-cp37m-manylinux1_x86_64.whl (20.1 MB)
[K     |████████████████████████████████| 20.1 MB 1.2 MB/s 
Collecting tensorflow-estimator<2.4.0,>=2.3.0
  Downloading tensorflow_estimator-2.3.0-py2.py3-none-any.whl (459 kB)
[K     |████████████████████████████████| 459 kB 49.0 MB/s 
Installing collected packages: numpy, tensorflow-estimator, h5py, gast, tensorflow
  Attempting uninstall: numpy
    Found existing installation: numpy 1.21.5
    Uninstalling numpy-1.21.5:
      Successfully uninstalled numpy-1.21.5
  Attemptin

Collecting keras-rl2
  Downloading keras_rl2-1.0.5-py3-none-any.whl (52 kB)
[K     |████████████████████████████████| 52 kB 855 kB/s 
Installing collected packages: keras-rl2
Successfully installed keras-rl2-1.0.5


# 1. Test Random Environment with OpenAI Gym

In [2]:
from gym import Env
from gym.spaces import Discrete, Box
import numpy as np
import random
import math
import random
import re

In [3]:
sample_string = "2x-11y+3sin(x)-12"

# if first term is positive, add '+' at the beginning (easier regex parse then)
if sample_string[0] != '-' and sample_string[0] != '+':
    sample_string = '+' + sample_string

print(sample_string)

print(re.findall('([-|\+][\w()]+)', sample_string))

print(re.search(r'[\d]+','-11y').group())


+2x-11y+3sin(x)-12
['+2x', '-11y', '+3sin(x)', '-12']
11


In [4]:
print("{: >10}     ==> {: >8} {: >8} {: >10}".format('element', 'sign', 'coeff', 'variable'))
for element in re.findall('([-|\+][\w()]+)', sample_string):
    coeff = re.search(r'[\d]+',element).group()
    sign, variable = element.split(coeff)
    print("{: >10}     ==> {: >8} {: >8} {: >10}".format(element, sign, coeff, variable))

   element     ==>     sign    coeff   variable
       +2x     ==>        +        2          x
      -11y     ==>        -       11          y
  +3sin(x)     ==>        +        3     sin(x)
       -12     ==>        -       12           


In [5]:
var_to_index = {
    '': 0,
    '(': 1,
    ')': 2,
    '+' : 3,
    '-' : 4,
    '+x': 5,
    '-x': 6,
    '+y': 7,
    '-y': 8,
    '+xx': 9,
    '-xx': 10,
    '+yy': 11,
    '-yy': 12,
    '+sin(x)': 13,
    '-sin(x)': 14
}

index_to_var = {
    0: '',
    1: '(',
    2: ')',
    3: '+',
    4: '-',
    5: '+x',
    6: '-x',
    7: '+y',
    8: '-y',
    9: '+xx',
    10: '-xx',
    11: '+yy',
    12: '-yy',
    13: '+sin(x)',
    14: '-sin(x)'
}

# add two elements between every element (to add zeros) 
def intersperse(lst, item):
    result = [item] * (len(lst) * 3 - 1)
    result[0::3] = lst
    return result

def equation_to_state(equation: str):
    state = []
    for element in re.findall('([-|\+][\w()]+)', equation):
        coeff = re.search(r'[\d]+',element).group()
        sign, variable = element.split(coeff)
        for i in range(int(coeff)):
            state.append(var_to_index[sign+variable])

    random.shuffle(state)
    state = intersperse(state, 0)
    while len(state) < 100:
       state.append(var_to_index[''])

    return state


print(sample_string)
new_state = equation_to_state(sample_string)
print(new_state)

+2x-11y+3sin(x)-12
[4, 0, 0, 8, 0, 0, 4, 0, 0, 8, 0, 0, 8, 0, 0, 4, 0, 0, 8, 0, 0, 8, 0, 0, 8, 0, 0, 8, 0, 0, 4, 0, 0, 4, 0, 0, 4, 0, 0, 4, 0, 0, 4, 0, 0, 13, 0, 0, 8, 0, 0, 13, 0, 0, 8, 0, 0, 4, 0, 0, 8, 0, 0, 4, 0, 0, 13, 0, 0, 8, 0, 0, 5, 0, 0, 5, 0, 0, 4, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [6]:
def random_equation():
    vars = ['','x','xx','y','yy','sin(x)']
    coeffs = random.choices(range(-6, 6), k=len(vars))
    random_eq = ''
    for i in range(len(coeffs)):
        if coeffs[i] > 0:
            random_eq += ('+' + str(coeffs[i]) + vars[i])
        elif coeffs[i] < 0:
            random_eq += (str(coeffs[i]) + vars[i])
    return random_eq

for _ in range(6):
    print(random_equation())

+1-1xx+2y-4yy+1sin(x)
-2+4x-4xx-6y-3yy+3sin(x)
+3-4x-2xx+4y+4yy+1sin(x)
-4-2y+4yy+1sin(x)
-5-4x-2xx+5y+1yy+1sin(x)
+4-5x-1xx+5y+4yy+5sin(x)


In [7]:
rand_eq = random_equation()
print(rand_eq)
eq_state = equation_to_state(rand_eq)
print(eq_state)

-6+2x+1xx-2y+4yy
[8, 0, 0, 5, 0, 0, 4, 0, 0, 11, 0, 0, 11, 0, 0, 4, 0, 0, 5, 0, 0, 4, 0, 0, 4, 0, 0, 8, 0, 0, 4, 0, 0, 9, 0, 0, 11, 0, 0, 11, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [9]:
def state_to_equation(state):
    state_counter = {
        0: 0,
        1: 0,
        2: 0,
        3: 0,
        4: 0,
        5: 0,
        6: 0,
        7: 0,
        8: 0,
        9: 0,
        10: 0,
        11: 0,
        12: 0,
        13: 0,
        14: 0
    }
    for x in state:
        if x != 0 and x != 1 and x != 2:
            state_counter[x]+=1
    
    equation = ''
    for x in state_counter.keys():
        if state_counter[x] > 0:
            equation+=(index_to_var[x]+str(state_counter[x]))
    return equation

print(rand_eq)
state_to_equation(eq_state)

-6+2x+1xx-2y+4yy


'-6+x2-y2+xx1+yy4'

In [41]:
class MathEquationEnv(Env):
    
    def __init__(self):
        self.action_space = Discrete(66)
        self.observation_space = Discrete(100)
        self.state = self.random_state()
        
        self.generator_length = 60

        self.brackets_number = 0

    def random_equation(self):
        vars = ['','x','xx','y','yy','sin(x)']
        coeffs = random.choices(range(-6, 6), k=len(vars))
        random_eq = ''
        for i in range(len(coeffs)):
            if coeffs[i] > 0:
                random_eq += ('+' + str(coeffs[i]) + vars[i])
            elif coeffs[i] < 0:
                random_eq += (str(coeffs[i]) + vars[i])
        return random_eq

    def intersperse(self, lst, item):
        result = [item] * (len(lst) * 3 - 1)
        result[0::3] = lst
        return result

    def equation_to_state(self ,equation: str):
        state = []
        for element in re.findall('([-|\+][\w()]+)', equation):
            coeff = re.search(r'[\d]+',element).group()
            sign, variable = element.split(coeff)
            for i in range(int(coeff)):
                state.append(var_to_index[sign+variable])
        random.shuffle(state)
        state = self.intersperse(state, 0)
        while len(state) < 100:
            state.append(var_to_index[''])
        return state

    def random_state(self):
        return self.equation_to_state(self.random_equation())

    def state_to_equation(self, state):
        state_counter = {
            0: 0,
            1: 0,
            2: 0,
            3: 0,
            4: 0,
            5: 0,
            6: 0,
            7: 0,
            8: 0,
            9: 0,
            10: 0,
            11: 0,
            12: 0,
            13: 0,
            14: 0
        }
        for x in state:
            if x != 0 and x != 1 and x != 2:
                state_counter[x]+=1
        equation = ''
        for x in state_counter.keys():
            if state_counter[x] > 0:
                equation+=(index_to_var[x]+str(state_counter[x]))
        return equation


    def evaluate_brackets(self, state):
        left = False
        brackets = 0
        for i in range(len(state)):
            if state[i] == 1 and left == False:
                left = True
            elif state[i] == 1 and left == True:
                return -1
            elif state[i] == 2 and left == True:
                left = False
                brackets+=1
            elif state[i] == 2 and left == False:
                return -1

        if brackets > self.brackets_number:
            self.brackets_number = brackets
            return 10

        return 0
            
    def step(self, action):
        self.generator_length-=1

        info = {}
        reward = 0
        done = False      

        if action % 2 == 0: # (
            self.state[action + action//2 + 1] = 1
        elif action % 2 == 1: # )
            self.state[action + action//2 + 1] = 2

        reward = self.evaluate_brackets(self.state)


        if self.generator_length <= 0:
            print("Final state:")
            print(self.state_to_equation(self.state))
            print(self.state)
            print()
            done = True

        return self.state, reward, done, {}
        
    def render(self):
        # Implement viz
        pass
    
    def reset(self):
        self.state = self.random_state()
        print("Initial state:")
        print(self.state_to_equation(self.state))
        self.generator_length = 60
        return self.state

In [42]:
## Do not make additional spaces in the () brackets just the variables like:
env = MathEquationEnv()
#env = MathEquationEnv("x + y + z", 3)

In [59]:
env.observation_space.sample()

6

In [15]:
episodes = 5
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    
    while not done:
        #env.render()
        #print('hmm')
        action = env.action_space.sample()
        
        n_state, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))

Initial state:
+2+y3-xx1-yy2-sin(x)3
Final state:
+2+y3-xx1-yy2-sin(x)3
[14, 1, 0, 14, 0, 0, 7, 0, 2, 14, 1, 2, 12, 1, 2, 12, 1, 2, 7, 1, 2, 10, 1, 0, 3, 1, 2, 7, 1, 2, 3, 0, 2, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 1, 2, 0, 1, 2, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 2, 0, 1, 0, 0, 0, 2, 0, 1, 0, 0, 1, 0, 0, 0, 2, 0, 1, 2, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 1, 0, 0]

Episode:1 Score:-49
Initial state:
+3+x4+y3+xx3+sin(x)4
Final state:
+3+x4+y3+xx3+sin(x)4
[5, 0, 2, 3, 1, 2, 5, 1, 0, 13, 1, 0, 7, 0, 0, 5, 1, 2, 13, 0, 0, 3, 1, 0, 9, 0, 0, 9, 1, 0, 13, 1, 2, 5, 1, 0, 13, 1, 2, 9, 1, 2, 7, 0, 2, 3, 0, 2, 7, 1, 2, 0, 1, 2, 0, 0, 2, 0, 0, 2, 0, 1, 0, 0, 1, 2, 0, 0, 0, 0, 1, 2, 0, 0, 2, 0, 1, 0, 0, 1, 0, 0, 0, 2, 0, 1, 0, 0, 1, 2, 0, 1, 2, 0, 0, 2, 0, 1, 0, 0]

Episode:2 Score:-48
Initial state:
-6+x5+y1-xx2+yy1+sin(x)5
Final state:
-6+x5+y1-xx2+yy1+sin(x)5
[10, 1, 2, 13, 1, 2, 4, 1, 2, 13, 0, 0, 4, 0, 0, 11, 0, 2, 13, 1, 2, 5, 0, 2, 10, 0, 2, 5, 1, 0, 5, 0, 2, 13, 1, 0, 4, 0, 0, 7

# 2. Create a Deep Learning Model with Keras

In [16]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

In [17]:
env.observation_space.n

100

In [18]:
states = env.observation_space.shape
print(states)
actions = env.action_space.n

()


In [19]:
env.action_space

Discrete(66)

In [20]:
def build_model(observations, actions):
    model = Sequential()    
    model.add(Dense(observations, activation='relu', input_shape=(1, observations) )  )
    model.add(Dense(48, activation='relu'))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    model.add(Flatten())
    return model

In [43]:
del model

In [44]:
def quick_building(env):
    observation_space = env.observation_space.n
    action_space = env.action_space.n
    return build_model(observation_space, action_space)

In [45]:
env = MathEquationEnv()
model = quick_building(env)

In [46]:
model.input_shape

(None, 1, 100)

In [47]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_8 (Dense)              (None, 1, 100)            10100     
_________________________________________________________________
dense_9 (Dense)              (None, 1, 48)             4848      
_________________________________________________________________
dense_10 (Dense)             (None, 1, 24)             1176      
_________________________________________________________________
dense_11 (Dense)             (None, 1, 66)             1650      
_________________________________________________________________
flatten_2 (Flatten)          (None, 66)                0         
Total params: 17,774
Trainable params: 17,774
Non-trainable params: 0
_________________________________________________________________


# 3. Build Agent with Keras-RL

In [48]:
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [49]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                  nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
    return dqn

In [50]:
def quick_building_agent(model, env):
    actions = env.action_space.n
    return build_agent(model, actions)

In [51]:
tuple(model.input.shape)

(None, 1, 100)

In [52]:
dqn = quick_building_agent(model, env)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=10000, visualize=False, verbose=1)

Training for 10000 steps ...
Initial state:
+3-x3-y6-xx5-yy6-sin(x)2
Interval 1 (0 steps performed)
    1/10000 [..............................] - ETA: 18:49 - reward: 0.0000e+00



   55/10000 [..............................] - ETA: 3:38 - reward: -0.9818Final state:
+3-x3-y6-xx5-yy6-sin(x)2
[10, 0, 0, 12, 0, 0, 10, 1, 2, 8, 0, 0, 6, 0, 2, 10, 1, 0, 3, 1, 0, 12, 0, 0, 12, 1, 2, 6, 1, 0, 3, 1, 2, 10, 1, 0, 14, 0, 0, 8, 1, 0, 14, 1, 2, 8, 0, 0, 12, 0, 2, 6, 0, 0, 12, 0, 2, 12, 0, 2, 8, 0, 2, 8, 0, 0, 8, 0, 0, 3, 0, 0, 10, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 2, 0, 1, 0, 0, 0, 0, 0]

Initial state:
+1-y1+xx4+yy5-sin(x)1
  115/10000 [..............................] - ETA: 2:30 - reward: -0.9826Final state:
+1-y1+xx4+yy5-sin(x)1
[9, 0, 2, 8, 0, 0, 11, 1, 2, 14, 0, 2, 11, 1, 2, 9, 1, 2, 3, 1, 2, 11, 0, 0, 9, 1, 0, 11, 1, 0, 9, 1, 0, 11, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 1, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 1, 2, 0]

Initial state:
+2-x1-y6-xx5+yy5+sin(x)1
  173/10000 [..............................] - ETA: 2:10 - reward: -0.9191Final

<tensorflow.python.keras.callbacks.History at 0x7f117c43c8d0>

In [53]:
fscores = dqn.test(env, nb_episodes=10, visualize=False)
print(np.mean(fscores.history['episode_reward']))

Testing for 10 episodes ...
Initial state:
+4+x4-xx1-yy1+sin(x)3
Final state:
+4+x4-xx1-yy1+sin(x)3
[3, 0, 0, 3, 0, 0, 13, 0, 0, 3, 0, 0, 5, 0, 0, 13, 0, 0, 5, 0, 0, 5, 0, 0, 12, 0, 0, 3, 0, 0, 10, 0, 0, 13, 0, 0, 5, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

Episode 1: reward: -60.000, steps: 60
Initial state:
-3+x3-y5-xx3+yy4-sin(x)4
Final state:
-3+x3-y5-xx3+yy4-sin(x)4
[8, 0, 0, 5, 0, 0, 14, 0, 0, 5, 0, 0, 10, 0, 0, 8, 0, 0, 5, 0, 0, 4, 0, 0, 4, 0, 0, 10, 0, 0, 8, 0, 0, 14, 0, 0, 11, 0, 0, 11, 0, 0, 11, 0, 0, 11, 0, 0, 14, 0, 0, 4, 0, 0, 10, 1, 0, 14, 0, 0, 8, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

Episode 2: reward: 0.000, steps: 60
Initial state:
-4-x5-y4-xx3-yy6
Final state:
-4-x5-y4-xx3-yy6
[6, 0, 0, 12, 0, 0, 4, 1, 0, 6, 0, 0, 8, 0, 0, 12, 0, 0, 8, 0, 0, 