# 0. Install Dependencies

In [None]:
!pip install tensorflow==2.3.0
!pip install gym
!pip install keras
!pip install keras-rl2

# 1. Test Random Environment with OpenAI Gym

In [1]:
from gym import Env
from gym.spaces import Discrete, Box
import numpy as np
import random
import math
import random
import re

In [2]:
sample_string = "2x-11y+3sin(x)-12"

# if first term is positive, add '+' at the beginning (easier regex parse then)
if sample_string[0] != '-' and sample_string[0] != '+':
    sample_string = '+' + sample_string

print(sample_string)

print(re.findall('([-|\+][\w()]+)', sample_string))

print(re.search(r'[\d]+','-11y').group())


+2x-11y+3sin(x)-12
['+2x', '-11y', '+3sin(x)', '-12']
11


In [3]:
print("{: >10}     ==> {: >8} {: >8} {: >10}".format('element', 'sign', 'coeff', 'variable'))
for element in re.findall('([-|\+][\w()]+)', sample_string):
    coeff = re.search(r'[\d]+',element).group()
    sign, variable = element.split(coeff)
    print("{: >10}     ==> {: >8} {: >8} {: >10}".format(element, sign, coeff, variable))

   element     ==>     sign    coeff   variable
       +2x     ==>        +        2          x
      -11y     ==>        -       11          y
  +3sin(x)     ==>        +        3     sin(x)
       -12     ==>        -       12           


In [4]:
var_to_index = {
    '': 0,
    '(': 1,
    ')': 2,
    '+' : 3,
    '-' : 4,
    '+x': 5,
    '-x': 6,
    '+y': 7,
    '-y': 8,
    '+xx': 9,
    '-xx': 10,
    '+yy': 11,
    '-yy': 12,
    '+sin(x)': 13,
    '-sin(x)': 14
}

index_to_var = {
    0: '',
    1: '(',
    2: ')',
    3: '+',
    4: '-',
    5: '+x',
    6: '-x',
    7: '+y',
    8: '-y',
    9: '+xx',
    10: '-xx',
    11: '+yy',
    12: '-yy',
    13: '+sin(x)',
    14: '-sin(x)'
}

# add two elements between every element (to add zeros) 
def intersperse(lst, item):
    result = [item] * (len(lst) * 3 - 1)
    result[0::3] = lst
    return result

def equation_to_state(equation: str):
    state = []
    for element in re.findall('([-|\+][\w()]+)', equation):
        coeff = re.search(r'[\d]+',element).group()
        sign, variable = element.split(coeff)
        for i in range(int(coeff)):
            state.append(var_to_index[sign+variable])

    random.shuffle(state)
    state = intersperse(state, 0)
    while len(state) < 100:
       state.append(var_to_index[''])

    return state


print(sample_string)
new_state = equation_to_state(sample_string)
print(new_state)

+2x-11y+3sin(x)-12
[4, 0, 0, 8, 0, 0, 4, 0, 0, 8, 0, 0, 8, 0, 0, 8, 0, 0, 4, 0, 0, 8, 0, 0, 4, 0, 0, 8, 0, 0, 13, 0, 0, 4, 0, 0, 4, 0, 0, 8, 0, 0, 13, 0, 0, 4, 0, 0, 8, 0, 0, 4, 0, 0, 8, 0, 0, 13, 0, 0, 8, 0, 0, 4, 0, 0, 4, 0, 0, 4, 0, 0, 4, 0, 0, 5, 0, 0, 8, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [5]:
def random_equation():
    vars = ['','x','xx','y','yy','sin(x)']
    coeffs = random.choices(range(-6, 6), k=len(vars))
    random_eq = ''
    for i in range(len(coeffs)):
        if coeffs[i] > 0:
            random_eq += ('+' + str(coeffs[i]) + vars[i])
        elif coeffs[i] < 0:
            random_eq += (str(coeffs[i]) + vars[i])
    return random_eq

for _ in range(6):
    print(random_equation())

-4-5x-2xx+1yy-4sin(x)
-4+5x-1xx-1y-2yy+1sin(x)
-1x+3xx+4y+2yy+1sin(x)
-4x+3xx+5y-1yy-3sin(x)
+5-3x+4xx+4yy-6sin(x)
+3-5x-6xx+3y+3yy-3sin(x)


In [6]:
rand_eq = random_equation()
print(rand_eq)
eq_state = equation_to_state(rand_eq)
print(eq_state)

-3-3x-1xx-2y+5yy-4sin(x)
[11, 0, 0, 14, 0, 0, 4, 0, 0, 14, 0, 0, 14, 0, 0, 11, 0, 0, 8, 0, 0, 14, 0, 0, 8, 0, 0, 11, 0, 0, 6, 0, 0, 4, 0, 0, 6, 0, 0, 4, 0, 0, 11, 0, 0, 10, 0, 0, 6, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [34]:
def state_to_equation(state):
    state_counter = {
        0: 0,
        1: 0,
        2: 0,
        3: 0,
        4: 0,
        5: 0,
        6: 0,
        7: 0,
        8: 0,
        9: 0,
        10: 0,
        11: 0,
        12: 0,
        13: 0,
        14: 0
    }
    for x in state:
        if x != 0 and x != 1 and x != 2:
            state_counter[x]+=1
    
    equation = ''
    for x in state_counter.keys():
        if state_counter[x] > 0:
            equation+=(index_to_var[x]+str(state_counter[x]))
    return equation

print(rand_eq)
state_to_equation(eq_state)

-3-3x-1xx-2y+5yy-4sin(x)


'-3-x3-y2-xx1+yy5-sin(x)4'

In [40]:
def state_to_dict(state):
    index_to_var = {
    0: '',
    1: '(',
    2: ')',
    3: '+',
    4: '-',
    5: '+x',
    6: '-x',
    7: '+y',
    8: '-y',
    9: '+xx',
    10: '-xx',
    11: '+yy',
    12: '-yy',
    13: '+sin(x)',
    14: '-sin(x)'
    }
    out = {}
    for x in state:
        if x == 1 or x == 2:
            return None
        if x == 0:
            continue
            
        key = index_to_var[x][1:]
        if key not in out.keys():
            out[key] = 0
        if x % 2:
            out[key] += 1
        else:
            out[key] -= 1
    return out

def aggregate_parenthesis(state):
    open_idx = None
    is_open = False
    out = []
    for i, x in enumerate(state):
        if x == 1:
            if is_open:
                return -1
            else:
                is_open = True
                open_idx = i
        if x == 2:
            if is_open:
                out.append(state_to_dict(state[open_idx+1:i]))
                print(state_to_equation(state[open_idx+1:i]))
                is_open = False
            else:
                return -1
    return out

In [41]:
# Kinda obsolete, it is basically a copy of aggregate_parenthesis but it calls state_to_equation() instead of state_to_dict()
def obsolete_state_to_string(state):
    open_idx = None
    is_open = False
    out = ""
    for i, x in enumerate(state):
        if x == 1:
            if is_open:
                return -1
            else:
                is_open = True
                open_idx = i
        if x == 2:
            if is_open:
                out += '('
                out += state_to_equation(state[open_idx+1:i])
                out += ') + '
                is_open = False
            else:
                return -1
    return out[:-3]

In [42]:
def dictlist_to_string(eq_list):
    '''
    For state -> string you can do: 
        dictlist_to_string( aggregate_parenthesis( state ) )
    '''
    out = ""
    for x in eq_list:
        out += '('
        for sign in x.keys():
            if x[sign] > 0:
                out += ' +'
            else:
                out += ' '
            out += str(x[sign]) + sign
        out += ') + '
    return out[:-3]

In [43]:
eq_state[2] = 1
eq_state[22] = 2
eq_state[23] = 1
eq_state[60] = 2

out = aggregate_parenthesis(eq_state)
print(out)
print(obsolete_state_to_string(eq_state))
print(dictlist_to_string(out))

-1-y1+yy1-sin(x)4
-2-x3-y1-xx1+yy3
[{'sin(x)': -4, '': -1, 'yy': 1, 'y': -1}, {'y': -1, 'yy': 3, 'x': -3, '': -2, 'xx': -1}]
(-1-y1+yy1-sin(x)4) + (-2-x3-y1-xx1+yy3)
( -4sin(x) -1 +1yy -1y) + ( -1y +3yy -3x -2 -1xx)


In [44]:
class MathEquationEnv(Env):
    
    def __init__(self):
        self.action_space = Discrete(66)
        self.observation_space = Discrete(100)
        self.state = self.random_state()
        
        self.generator_length = 60

        self.brackets_number = 0

    def random_equation(self):
        vars = ['','x','xx','y','yy','sin(x)']
        coeffs = random.choices(range(-6, 6), k=len(vars))
        random_eq = ''
        for i in range(len(coeffs)):
            if coeffs[i] > 0:
                random_eq += ('+' + str(coeffs[i]) + vars[i])
            elif coeffs[i] < 0:
                random_eq += (str(coeffs[i]) + vars[i])
        return random_eq

    def intersperse(self, lst, item):
        result = [item] * (len(lst) * 3 - 1)
        result[0::3] = lst
        return result
    
    def state_to_dict(self, state):
        index_to_var = {
        0: '',
        1: '(',
        2: ')',
        3: '+',
        4: '-',
        5: '+x',
        6: '-x',
        7: '+y',
        8: '-y',
        9: '+xx',
        10: '-xx',
        11: '+yy',
        12: '-yy',
        13: '+sin(x)',
        14: '-sin(x)'
        }
        out = {}
        for x in state:
            if x == 1 or x == 2:
                return None
            if x == 0:
                continue

            key = index_to_var[x][1:]
            if key not in out.keys():
                out[key] = 0
            if x % 2:
                out[key] += 1
            else:
                out[key] -= 1
        return out

    def aggregate_parenthesis(self):
        open_idx = None
        is_open = False
        out = []
        for i, x in enumerate(self.state):
            if x == 1:
                if is_open:
                    return -1
                else:
                    is_open = True
                    open_idx = i
            if x == 2:
                if is_open:
                    out.append(state_to_dict(self.state[open_idx+1:i]))
                    is_open = False
                else:
                    return -1
        return out

    def equation_to_state(self ,equation: str):
        state = []
        for element in re.findall('([-|\+][\w()]+)', equation):
            coeff = re.search(r'[\d]+',element).group()
            sign, variable = element.split(coeff)
            for i in range(int(coeff)):
                state.append(var_to_index[sign+variable])
        random.shuffle(state)
        state = self.intersperse(state, 0)
        while len(state) < 100:
            state.append(var_to_index[''])
        return state

    def random_state(self):
        return self.equation_to_state(self.random_equation())

    def state_to_equation(self, state):
        state_counter = {
            0: 0,
            1: 0,
            2: 0,
            3: 0,
            4: 0,
            5: 0,
            6: 0,
            7: 0,
            8: 0,
            9: 0,
            10: 0,
            11: 0,
            12: 0,
            13: 0,
            14: 0
        }
        for x in state:
            if x != 0 and x != 1 and x != 2:
                state_counter[x]+=1
        equation = ''
        for x in state_counter.keys():
            if state_counter[x] > 0:
                equation+=(index_to_var[x]+str(state_counter[x]))
        return equation


    def evaluate_brackets(self, state):
        '''
        Outcomes of aggregate_parenthesis():
            -1   : wrong bracket syntax
            []   : no brackets or only one open bracket and no close
            [{}] : empty dict means that there are correct brackets but with nothing inside (only 0's)
            else : probably correct brackets
        '''
        brackets = self.aggregate_parenthesis()
        if brackets == -1:
            return -5
        if not brackets:
            return -2
        score = 0
        for x in brackets:
            score += 1
            if x == {}:
                score -= 1
                continue
            numbers = np.absolute(list(x.values()))
            for i in range(2, min(numbers)+1):
                if all(num % i == 0 for num in numbers):
                    print("Divisor: ", i)
                    score += 10
        return score
#         left = False
#         brackets = 0
#         for i in range(len(state)):
#             if state[i] == 1 and left == False:
#                 left = True
#             elif state[i] == 1 and left == True:
#                 return -1
#             elif state[i] == 2 and left == True:
#                 left = False
#                 brackets+=1
#             elif state[i] == 2 and left == False:
#                 return -1

#         if brackets > self.brackets_number:
#             self.brackets_number = brackets
#             return 10

#         return 0
            
    def step(self, action):
        self.generator_length-=1

        info = {}
        reward = 0
        done = False      

        if action % 2 == 1: # (
            self.state[action + action//2 + 1] = 1
        elif action % 2 == 0: # )
            self.state[action + action//2 + 1] = 2

        reward = self.evaluate_brackets(self.state)


        if self.generator_length <= 0:
#             print("Final state:")
#             print(self.state_to_equation(self.state))
#             print(self.state)
#             print()
            done = True

        return self.state, reward, done, {}
        
    def render(self):
        # Implement viz
        pass
    
    def reset(self):
        self.state = self.random_state()
        #print("Initial state:")
        #print(self.state_to_equation(self.state))
        self.generator_length = 60
        return self.state

In [45]:
## Do not make additional spaces in the () brackets just the variables like:
env = MathEquationEnv()
#env = MathEquationEnv("x + y + z", 3)

In [46]:
env.observation_space.sample()

54

In [72]:
episodes = 5
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    
    while not done:
        #env.render()
        #print('hmm')
        action = env.action_space.sample()
        
        n_state, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))

Episode:1 Score:-297
Episode:2 Score:-297
Episode:3 Score:-291
Episode:4 Score:-294
Episode:5 Score:-300


# 2. Create a Deep Learning Model with Keras

In [73]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

In [74]:
env.observation_space.n

100

In [75]:
states = env.observation_space.shape
print(states)
actions = env.action_space.n

()


In [76]:
env.action_space

Discrete(66)

In [77]:
def build_model(observations, actions):
    model = Sequential()    
    model.add(Dense(observations, activation='relu', input_shape=(1, observations) )  )
    model.add(Dense(48, activation='relu'))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    model.add(Flatten())
    return model

In [78]:
del model

NameError: name 'model' is not defined

In [80]:
def quick_building(env):
    observation_space = env.observation_space.n
    action_space = env.action_space.n
    return build_model(observation_space, action_space)

In [81]:
env = MathEquationEnv()
model = quick_building(env)

In [82]:
model.input_shape

(None, 1, 100)

In [83]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 1, 100)            10100     
                                                                 
 dense_1 (Dense)             (None, 1, 48)             4848      
                                                                 
 dense_2 (Dense)             (None, 1, 24)             1176      
                                                                 
 dense_3 (Dense)             (None, 1, 66)             1650      
                                                                 
 flatten (Flatten)           (None, 66)                0         
                                                                 
Total params: 17,774
Trainable params: 17,774
Non-trainable params: 0
_________________________________________________________________


# 3. Build Agent with Keras-RL

In [84]:
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [85]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                  nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
    return dqn

In [86]:
def quick_building_agent(model, env):
    actions = env.action_space.n
    return build_agent(model, actions)

In [87]:
tuple(model.input.shape)

(None, 1, 100)

In [88]:
dqn = quick_building_agent(model, env)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=10000, visualize=False, verbose=1)

  super(Adam, self).__init__(name, **kwargs)


Training for 10000 steps ...
Interval 1 (0 steps performed)


  updates=self.state_updates,


    1/10000 [..............................] - ETA: 50:15 - reward: -5.0000



 2281/10000 [=====>........................] - ETA: 1:54 - reward: -4.9303Divisor:  2
Divisor:  2
Divisor:  2
Divisor:  2
Divisor:  4
done, took 149.116 seconds


<keras.callbacks.History at 0x7f8dd87a2e80>

In [93]:
fscores = dqn.test(env, nb_episodes=10, visualize=False)
print(np.mean(fscores.history['episode_reward']))

Testing for 10 episodes ...
Episode 1: reward: -120.000, steps: 60
Episode 2: reward: 57.000, steps: 60
Episode 3: reward: -120.000, steps: 60
Episode 4: reward: -120.000, steps: 60
Episode 5: reward: -120.000, steps: 60
Episode 6: reward: -300.000, steps: 60
Episode 7: reward: -120.000, steps: 60
Episode 8: reward: 57.000, steps: 60
Episode 9: reward: -120.000, steps: 60
Episode 10: reward: -120.000, steps: 60
-102.6
