# FUNCTION GENERATOR using Policy Gradient

Useful links:
Policy Gradient Explanation: http://karpathy.github.io/2016/05/31/rl/ <br>
Example of Policy Gradient: https://github.com/keon/policy-gradient

In [1]:
import numpy as np
from keras.models import Sequential, Model
from keras.layers import TimeDistributed, Dense, Reshape, Flatten, GRU, Input, Embedding
from keras.optimizers import Adam
from keras.layers.convolutional import Convolution2D
from PolicyGradientModel import PolicyGradientModel
from RewardCalculator import RewardCalculator

Using TensorFlow backend.


In [2]:
ALLOWED_PARAMETERS = list('XY')
ALLOWED_SYMBOLS = ALLOWED_PARAMETERS + list('0123456789+-*/#')
NUM_SYMBOLS = len(ALLOWED_SYMBOLS)
MAX_LENGTH = 30 # Max length of the output expression
CORRECT_EXPRESSION = "3*X+2*Y"

### DEFINE MODEL

In [3]:
def getModel():
    # Trying to neglect input
    input1 = Input(shape=(1,1))
    # TODO: Add noise layer to make output vary
    x = GRU(32)(input1)
    out = Dense(NUM_SYMBOLS, activation='sigmoid')(x)
    model = Model(inputs=input1, outputs=out)
    model.compile(optimizer=Adam(),
                loss='categorical_crossentropy')
    return model

In [5]:
rewardCalculator = RewardCalculator(correctExpression=CORRECT_EXPRESSION,
                                    parameters=ALLOWED_PARAMETERS,
                                    functionDifferenceRewardWeight=1.0,
                                    compilableRewardWeight=1.0,
                                    lengthRewardWeight=-0.02,
                                    foundSymbolWeight=0.1,
                                    rewardOffset=0.0,
                                    usingFile=False)

In [6]:
model = getModel()
pgModel = PolicyGradientModel(model=model,
                              allowedSymbol=ALLOWED_SYMBOLS,
                              numSymbol=NUM_SYMBOLS,
                              maxLength=MAX_LENGTH,
                              rewardCalculator=rewardCalculator,
                              learningRate=0.0001,
                              fileName="Model1.hdf5")

In [None]:
pgModel.loadWeight()

## TRAINING

In [None]:
pgModel.train(input=np.ones((1,1,1)))